We provide details on the shared-memory parallelization for manycore architectures of the molecular dynamics framework ls1-mardyn, including an optimization of the SIMD vectorization for multi-centered molecules. The novel shared-memory parallelization scheme allows to re- tain Newton's third law optimization and exhibits very good scaling on many-core devices such as a full Xeon Phi card running 240 threads. The Xeon Phi can thus be exploited and delivers comparable performance as IvyBridge nodes in our experiments.
%0 Generic
%1 wafai15
%A Tchipev, Nikola
%A Wafai, Amer
%A Glass, Colin W.
%A Eckhardt, Wolfgang
%A Heinecke, Alexander
%A Bungartz, Hans-Joachim
%A Neumann, Philipp
%B Euro-Par 2015: Parallel Processing
%C Vienna, Austria
%D 2015
%E Träff, Jesper Larsson
%E Hunold, Sascha
%E Versaci, Francesco
%K HLRS SCOPE absolute and calculation cell colored core distributed dynamic force gather intel interaction law linked memory molecular myown newton operation optimization parallel parallelization performance phi processing range scatter shared short site third xeon yellow
%T Optimized Force Calculation of Molecular Dynamics Simulations for the Intel Xeon Phi
%V 9233
%X We provide details on the shared-memory parallelization for manycore architectures of the molecular dynamics framework ls1-mardyn, including an optimization of the SIMD vectorization for multi-centered molecules. The novel shared-memory parallelization scheme allows to re- tain Newton's third law optimization and exhibits very good scaling on many-core devices such as a full Xeon Phi card running 240 threads. The Xeon Phi can thus be exploited and delivers comparable performance as IvyBridge nodes in our experiments.
@conference{wafai15,
abstract = {We provide details on the shared-memory parallelization for manycore architectures of the molecular dynamics framework ls1-mardyn, including an optimization of the SIMD vectorization for multi-centered molecules. The novel shared-memory parallelization scheme allows to re- tain Newton's third law optimization and exhibits very good scaling on many-core devices such as a full Xeon Phi card running 240 threads. The Xeon Phi can thus be exploited and delivers comparable performance as IvyBridge nodes in our experiments.},
added-at = {2016-01-29T09:34:55.000+0100},
address = {Vienna, Austria},
author = {Tchipev, Nikola and Wafai, Amer and Glass, Colin W. and Eckhardt, Wolfgang and Heinecke, Alexander and Bungartz, Hans-Joachim and Neumann, Philipp},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2dccb0302d8b765b1450e86f4b2798951/amerwafai},
booktitle = {Euro-Par 2015: Parallel Processing},
date-added = {2015-08-19 09:05:42 +0000},
date-modified = {2015-08-19 09:10:27 +0000},
editor = {Tr{\"a}ff, Jesper Larsson and Hunold, Sascha and Versaci, Francesco},
interhash = {902fd5f88d25cf9f9a271091a6bbd41a},
intrahash = {dccb0302d8b765b1450e86f4b2798951},
keywords = {HLRS SCOPE absolute and calculation cell colored core distributed dynamic force gather intel interaction law linked memory molecular myown newton operation optimization parallel parallelization performance phi processing range scatter shared short site third xeon yellow},
month = {August},
series = {LNCS},
timestamp = {2016-01-29T08:41:13.000+0100},
title = {Optimized Force Calculation of Molecular Dynamics Simulations for the Intel Xeon Phi},
volume = 9233,
year = 2015
}