Challenges of Research Data Management for High Performance Computing
B. Schembera, and T. Bönisch. International Conference on Theory and Practice of Digital Libraries, page 140--151. Springer, (2017)
Abstract
This paper targets the challenges of research data management with a focus on High Performance Computing (HPC) and simulation data. Main challenges are discussed: The Big Data qualities of HPC research data, technical data management, organizational and administrative challenges. Emerging from these challenges, requirements for a feasible HPC research data management are derived and an alternative data life cycle is proposed. The requirement analysis includes recommendations which are based on a modified OAIS architecture: To meet the HPC requirements of a scalable system, metadata and data must not be stored together. Metadata keys are defined and organizational actions are recommended. Moreover, this paper contributes by introducing the role of a Scientific Data Manager, who is responsible for the institution’s data management and taking stewardship of the data.
%0 Conference Paper
%1 schembera2017challenges
%A Schembera, Björn
%A Bönisch, Thomas
%B International Conference on Theory and Practice of Digital Libraries
%D 2017
%K data hpc management myown research simulation
%P 140--151
%T Challenges of Research Data Management for High Performance Computing
%U https://link.springer.com/chapter/10.1007/978-3-319-67008-9_12
%X This paper targets the challenges of research data management with a focus on High Performance Computing (HPC) and simulation data. Main challenges are discussed: The Big Data qualities of HPC research data, technical data management, organizational and administrative challenges. Emerging from these challenges, requirements for a feasible HPC research data management are derived and an alternative data life cycle is proposed. The requirement analysis includes recommendations which are based on a modified OAIS architecture: To meet the HPC requirements of a scalable system, metadata and data must not be stored together. Metadata keys are defined and organizational actions are recommended. Moreover, this paper contributes by introducing the role of a Scientific Data Manager, who is responsible for the institution’s data management and taking stewardship of the data.
@inproceedings{schembera2017challenges,
abstract = {This paper targets the challenges of research data management with a focus on High Performance Computing (HPC) and simulation data. Main challenges are discussed: The Big Data qualities of HPC research data, technical data management, organizational and administrative challenges. Emerging from these challenges, requirements for a feasible HPC research data management are derived and an alternative data life cycle is proposed. The requirement analysis includes recommendations which are based on a modified OAIS architecture: To meet the HPC requirements of a scalable system, metadata and data must not be stored together. Metadata keys are defined and organizational actions are recommended. Moreover, this paper contributes by introducing the role of a Scientific Data Manager, who is responsible for the institution’s data management and taking stewardship of the data.},
added-at = {2017-09-22T13:12:36.000+0200},
author = {Schembera, Bj{\"o}rn and B{\"o}nisch, Thomas},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/224312a954079a07a4488cc88392ef995/bjoernschembera},
booktitle = {International Conference on Theory and Practice of Digital Libraries},
interhash = {da85dbed0522ad7ae1cad483bacb645e},
intrahash = {24312a954079a07a4488cc88392ef995},
keywords = {data hpc management myown research simulation},
organization = {Springer},
pages = {140--151},
timestamp = {2017-09-22T11:12:36.000+0200},
title = {Challenges of Research Data Management for High Performance Computing},
url = {https://link.springer.com/chapter/10.1007/978-3-319-67008-9_12},
year = 2017
}