High-dimensional grid-based simulations serve as both a tool and a challenge in researching various domains. The main challenge of these approaches is the well-known curse of dimensionality, amplified by the need for fine resolutions in highfidelity applications. The combination technique (CT) provides a straightforward way of performing such simulations while alleviating the curse of dimensionality. Recent work demonstrated the potential of the CT to join multiple systems simultaneously to perform a single high-dimensional simulation. This paper shows how to extend this to three or more systems and addresses some remaining challenges: load balancing on heterogeneous hardware; utilizing compression to maximize the communication bandwidth; efficient I/O management through hardware mapping; and improving memory utilization through algorithmic optimizations. Combining these contributions, we demonstrate the feasibility of the CT for extreme-scale Superfacility scenarios of 46 trillion DOF on two systems and 35 trillion DOF on three systems. Scenarios at these resolutions would be intractable with full-grid solvers (> 1,000 nonillion DOF each).
%0 Conference Paper
%1 pollinger2024realizing
%A Pollinger, Theresa
%A Van Craen, Alexander
%A Offenhäuser, Philipp
%A Pflüger, Dirk
%B 2024 SC24: International Conference for High Performance Computing, Networking, Storage and Analysis SC
%C Los Alamitos, CA, USA
%D 2024
%I IEEE Computer Society
%K myown
%P 1568-1584
%R 10.1109/SC41406.2024.00104
%T Realizing Joint Extreme-Scale Simulations on Multiple Supercomputers—Two Superfacility Case Studies
%U https://doi.ieeecomputersociety.org/10.1109/SC41406.2024.00104
%X High-dimensional grid-based simulations serve as both a tool and a challenge in researching various domains. The main challenge of these approaches is the well-known curse of dimensionality, amplified by the need for fine resolutions in highfidelity applications. The combination technique (CT) provides a straightforward way of performing such simulations while alleviating the curse of dimensionality. Recent work demonstrated the potential of the CT to join multiple systems simultaneously to perform a single high-dimensional simulation. This paper shows how to extend this to three or more systems and addresses some remaining challenges: load balancing on heterogeneous hardware; utilizing compression to maximize the communication bandwidth; efficient I/O management through hardware mapping; and improving memory utilization through algorithmic optimizations. Combining these contributions, we demonstrate the feasibility of the CT for extreme-scale Superfacility scenarios of 46 trillion DOF on two systems and 35 trillion DOF on three systems. Scenarios at these resolutions would be intractable with full-grid solvers (> 1,000 nonillion DOF each).
@inproceedings{pollinger2024realizing,
abstract = { High-dimensional grid-based simulations serve as both a tool and a challenge in researching various domains. The main challenge of these approaches is the well-known curse of dimensionality, amplified by the need for fine resolutions in highfidelity applications. The combination technique (CT) provides a straightforward way of performing such simulations while alleviating the curse of dimensionality. Recent work demonstrated the potential of the CT to join multiple systems simultaneously to perform a single high-dimensional simulation. This paper shows how to extend this to three or more systems and addresses some remaining challenges: load balancing on heterogeneous hardware; utilizing compression to maximize the communication bandwidth; efficient I/O management through hardware mapping; and improving memory utilization through algorithmic optimizations. Combining these contributions, we demonstrate the feasibility of the CT for extreme-scale Superfacility scenarios of 46 trillion DOF on two systems and 35 trillion DOF on three systems. Scenarios at these resolutions would be intractable with full-grid solvers (> 1,000 nonillion DOF each). },
added-at = {2024-11-18T19:33:32.000+0100},
address = {Los Alamitos, CA, USA},
author = {Pollinger, Theresa and Van Craen, Alexander and Offenhäuser, Philipp and Pflüger, Dirk},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/200d073adecd8ac05be27acc7c2f6964d/ipvs-sc},
booktitle = { 2024 SC24: International Conference for High Performance Computing, Networking, Storage and Analysis SC },
doi = {10.1109/SC41406.2024.00104},
interhash = {19314082840bfc3a4757a43fe7738459},
intrahash = {00d073adecd8ac05be27acc7c2f6964d},
keywords = {myown},
month = nov,
pages = {1568-1584},
publisher = {IEEE Computer Society},
timestamp = {2024-11-18T19:33:32.000+0100},
title = {{ Realizing Joint Extreme-Scale Simulations on Multiple Supercomputers—Two Superfacility Case Studies }},
url = {https://doi.ieeecomputersociety.org/10.1109/SC41406.2024.00104},
year = 2024
}