This paper presents a new programming methodology for intro- ducing and tuning parallelism for heterogeneous shared-memory systems (comprising a mixture of CPUs and GPUs), using a com- bination of algorithmic skeletons (such as farms and pipelines), Monte-Carlo tree search for deriving mappings of tasks to avail- able hardware resources, and refactoring tool support for applying the patterns and mappings in an easy and effective way. Using our approach, we demonstrate easily obtainable, significant and scal- able speedups on a number of case studies showing speedups of up to 41 over the sequential code on a 24-core machine with one GPU. We also demonstrate that the mappings the MCTS algorithm suggest are comparable to the best possible speedups that can be obtained.
%0 Generic
%1 2016-01
%A Janjic, V.
%A Hammond, K.
%A Goli, M.
%A McCall, J.
%A Idrees, K.
%A Glass, C.
%A Wafai, M. A.
%B High-Level Programming for Heterogeneous and Hierarchical Parallel Systems
%D 2016
%K HLRS Heterogeneous Parallel SCOPE myown
%T Bridging the Divide: A New Methodology for Semi-Automatic Programming of Heterogeneous Parallel Machines
%X This paper presents a new programming methodology for intro- ducing and tuning parallelism for heterogeneous shared-memory systems (comprising a mixture of CPUs and GPUs), using a com- bination of algorithmic skeletons (such as farms and pipelines), Monte-Carlo tree search for deriving mappings of tasks to avail- able hardware resources, and refactoring tool support for applying the patterns and mappings in an easy and effective way. Using our approach, we demonstrate easily obtainable, significant and scal- able speedups on a number of case studies showing speedups of up to 41 over the sequential code on a 24-core machine with one GPU. We also demonstrate that the mappings the MCTS algorithm suggest are comparable to the best possible speedups that can be obtained.
@conference{2016-01,
abstract = {This paper presents a new programming methodology for intro- ducing and tuning parallelism for heterogeneous shared-memory systems (comprising a mixture of CPUs and GPUs), using a com- bination of algorithmic skeletons (such as farms and pipelines), Monte-Carlo tree search for deriving mappings of tasks to avail- able hardware resources, and refactoring tool support for applying the patterns and mappings in an easy and effective way. Using our approach, we demonstrate easily obtainable, significant and scal- able speedups on a number of case studies showing speedups of up to 41 over the sequential code on a 24-core machine with one GPU. We also demonstrate that the mappings the MCTS algorithm suggest are comparable to the best possible speedups that can be obtained.},
added-at = {2016-01-29T09:34:55.000+0100},
author = {Janjic, V. and Hammond, K. and Goli, M. and McCall, J. and Idrees, K. and Glass, C. and Wafai, M. A.},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/21f9f7c2cbd18f9a7f47082575e6198ee/amerwafai},
booktitle = {High-Level Programming for Heterogeneous and Hierarchical Parallel Systems},
date-added = {2016-01-29 08:09:45 +0000},
date-modified = {2016-01-29 08:17:22 +0000},
interhash = {710ae981f52849337a343d9e07607114},
intrahash = {1f9f7c2cbd18f9a7f47082575e6198ee},
keywords = {HLRS Heterogeneous Parallel SCOPE myown},
month = {January},
timestamp = {2016-01-29T08:43:50.000+0100},
title = {Bridging the Divide: A New Methodology for Semi-Automatic Programming of Heterogeneous Parallel Machines},
year = 2016
}