During recent years, data lakes emerged as a way to manage large amounts of heterogeneous data for modern data analytics. Although various work on individual aspects of data lakes exists, there is no comprehensive data lake architecture yet. Concepts that describe themselves as a “data lake architecture” are only partial. In this work, we introduce the data lake architecture framework. It supports the definition of data lake architectures by defining nine architectural aspects, i.e., perspectives on a data lake, such as data storage or data modeling, and by exploring the interdependencies between these aspects. The included methodology helps to choose appropriate concepts to instantiate each aspect. To evaluate the framework, we use it to configure an exemplary data lake architecture for a real-world data lake implementation. This final assessment shows that our framework provides comprehensive guidance in the configuration of a data lake architecture.
%0 Conference Paper
%1 Giebler2021
%A Giebler, Corinna
%A Gröger, Christoph
%A Hoos, Eva
%A Eichler, Rebecca
%A Schwarz, Holger
%A Mitschang, Bernhard
%B Proceedings der 19. Fachtagung für Datenbanksysteme für Business,Technologie und Web (BTW 2021)
%D 2021
%K big_data data_lake data_lake_architecture myown
%T The Data Lake Architecture Framework: A Foundation for Building a
Comprehensive Data Lake Architecture
%U https://opencms.uni-stuttgart.de/fak5/ipvs/departments/as/publications/giebleca/20_dlaf_BTW_preprint.pdf
%X During recent years, data lakes emerged as a way to manage large amounts of heterogeneous data for modern data analytics. Although various work on individual aspects of data lakes exists, there is no comprehensive data lake architecture yet. Concepts that describe themselves as a “data lake architecture” are only partial. In this work, we introduce the data lake architecture framework. It supports the definition of data lake architectures by defining nine architectural aspects, i.e., perspectives on a data lake, such as data storage or data modeling, and by exploring the interdependencies between these aspects. The included methodology helps to choose appropriate concepts to instantiate each aspect. To evaluate the framework, we use it to configure an exemplary data lake architecture for a real-world data lake implementation. This final assessment shows that our framework provides comprehensive guidance in the configuration of a data lake architecture.
@inproceedings{Giebler2021,
abstract = {During recent years, data lakes emerged as a way to manage large amounts of heterogeneous data for modern data analytics. Although various work on individual aspects of data lakes exists, there is no comprehensive data lake architecture yet. Concepts that describe themselves as a “data lake architecture” are only partial. In this work, we introduce the data lake architecture framework. It supports the definition of data lake architectures by defining nine architectural aspects, i.e., perspectives on a data lake, such as data storage or data modeling, and by exploring the interdependencies between these aspects. The included methodology helps to choose appropriate concepts to instantiate each aspect. To evaluate the framework, we use it to configure an exemplary data lake architecture for a real-world data lake implementation. This final assessment shows that our framework provides comprehensive guidance in the configuration of a data lake architecture.},
added-at = {2021-02-02T11:54:37.000+0100},
author = {Giebler, Corinna and Gröger, Christoph and Hoos, Eva and Eichler, Rebecca and Schwarz, Holger and Mitschang, Bernhard},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/21ca8ffe1608f71e0e15c021a9a2f9fed/corinnagiebler},
booktitle = { Proceedings der 19. Fachtagung für Datenbanksysteme für Business,Technologie und Web (BTW 2021)},
interhash = {fde26d2e8b24de78bb55a7a2583928fd},
intrahash = {1ca8ffe1608f71e0e15c021a9a2f9fed},
keywords = {big_data data_lake data_lake_architecture myown},
timestamp = {2021-02-02T10:54:37.000+0100},
title = {The Data Lake Architecture Framework: A Foundation for Building a
Comprehensive Data Lake Architecture},
url = {https://opencms.uni-stuttgart.de/fak5/ipvs/departments/as/publications/giebleca/20_dlaf_BTW_preprint.pdf},
year = 2021
}