
{  
   "types" : {
      "Bookmark" : {
         "pluralLabel" : "Bookmarks"
      },
      "Publication" : {
         "pluralLabel" : "Publications"
      },
      "GoldStandardPublication" : {
         "pluralLabel" : "GoldStandardPublications"
      },
      "GoldStandardBookmark" : {
         "pluralLabel" : "GoldStandardBookmarks"
      },
      "Tag" : {
         "pluralLabel" : "Tags"
      },
      "User" : {
         "pluralLabel" : "Users"
      },
      "Group" : {
         "pluralLabel" : "Groups"
      },
      "Sphere" : {
         "pluralLabel" : "Spheres"
      }
   },
   
   "properties" : {
      "count" : {
         "valueType" : "number"
      },
      "date" : {
         "valueType" : "date"
      },
      "changeDate" : {
         "valueType" : "date"
      },
      "url" : {
         "valueType" : "url"
      },
      "id" : {
         "valueType" : "url"
      },
      "tags" : {
         "valueType" : "item"
      },
      "user" : {
         "valueType" : "item"
      }      
   },
   
   "items" : [
   	  
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2865b04cce31a607aa787e4051fded357/tpollinger",         
         "tags" : [
            "combination_technique","hpc","myown","parallelism","processing","simulation","sparse_grids"
         ],
         
         "intraHash" : "865b04cce31a607aa787e4051fded357",
         "interHash" : "2716385e3707408ae1db87d8be44ecc7",
         "label" : "Distributing Higher-Dimensional Simulations Across Compute Systems: A Widely Distributed Combination Technique",
         "user" : "tpollinger",
         "description" : "",
         "date" : "2022-03-24 15:53:08",
         "changeDate" : "2022-03-31 13:46:17",
         "count" : 3,
         "pub-type": "inproceedings",
         "booktitle": "2021 IEEE/ACM International Workshop on Hierarchical Parallelism for Exascale Computing (HiPar)",
         "year": "2021", 
         "url": "https://ieeexplore.ieee.org/abstract/document/9654243", 
         
         "author": [ 
            "Theresa Pollinger","Marcel Hurler","Michael Obersteiner","Dirk Pflüger"
         ],
         "authors": [
         	
            	{"first" : "Theresa",	"last" : "Pollinger"},
            	{"first" : "Marcel",	"last" : "Hurler"},
            	{"first" : "Michael",	"last" : "Obersteiner"},
            	{"first" : "Dirk",	"last" : "Pflüger"}
         ],
         "pages": "1--9","abstract": "The numerical solution of high-dimensional PDE problems is essential for many research questions, such as understanding relativistic astrophysics, quantum physics, or hot fusion plasmas. At the same time, it is haunted by the curse of dimensionality, rendering finely resolved simulations infeasible even on modern architectures. The Sparse Grid Combination Technique helps to break the curse of dimensionality for high-dimensional PDE problems to some extent. But even then, simulations are restricted by the size of HPC systems. A new implementation based on the open-source code DisCoTec allows to distribute existing solvers even across compute systems: The widely distributed combination technique enables simulations at scales that would otherwise be intractable.This paper introduces the extended algorithm and showcases a proof of concept for the remote communication set-up. The scaling properties for the single-system and two-system cases are presented, and the numerical correctness of the implementation is validated.The widely distributed combination technique is useful in cases where the memory and/or compute resources are not sufficient for a particular problem to fit on one single available system, but multiple systems are able to accommodate it.",
         "eventtitle" : "2021 IEEE/ACM International Workshop on Hierarchical Parallelism for Exascale Computing (HiPar)",
         
         "venue" : "SC21",
         
         "shorttitle" : "Distributing Higher-Dimensional Simulations Across Compute Systems",
         
         "doi" : "10.1109/HiPar54615.2021.00006",
         
         "bibtexKey": "pollingerdistributing"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2916ce1b33327b6553d694d5e1e52e876/markusfrank",         
         "tags" : [
            "auto-scaler","data","elastic","myown","processing","streams"
         ],
         
         "intraHash" : "916ce1b33327b6553d694d5e1e52e876",
         "interHash" : "e449a680c7e10ebf3395116d87d6c913",
         "label" : "The Elastic Processing of Data Streams in Cloud Environments: A\r\n               Systematic Mapping Study",
         "user" : "markusfrank",
         "description" : "",
         "date" : "2019-12-17 15:40:34",
         "changeDate" : "2019-12-17 14:40:34",
         "count" : 3,
         "pub-type": "inproceedings",
         "booktitle": "Proceedings of the 9th International Conference on Cloud Computing               and Services Science, CLOSER 2019, Heraklion, Crete, Greece, May               2-4, 2019",
         "year": "2019", 
         "url": "https://doi.org/10.5220/0007708503160323", 
         
         "author": [ 
            "Floriment Klinaku","Michael Zigldrum","Markus Frank","Steffen Becker"
         ],
         "authors": [
         	
            	{"first" : "Floriment",	"last" : "Klinaku"},
            	{"first" : "Michael",	"last" : "Zigldrum"},
            	{"first" : "Markus",	"last" : "Frank"},
            	{"first" : "Steffen",	"last" : "Becker"}
         ],
         "pages": "316--323",
         "bibsource" : "dblp computer science bibliography, https://dblp.org",
         
         "doi" : "10.5220/0007708503160323",
         
         "bibtexKey": "DBLP:conf/closer/KlinakuZF019"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2faa2d6258b978f9b95243ae1a1f0cd8f/lhuillae",         
         "tags" : [
            "Computer","Fourier","Generation;I.3.6","Graphics\u2014Methodology","Graphics\u2014Picture/Image","Methodologies]:","Techniques","[Computing","analysis;data","and","bundling;fast","data","detection;Kernel;Scalability;I.3.3","drawings;information","edge","myown","process;edge","processing","simplification;Clutter;Convolution;Fourier","streaming","theory;FFTEB;data","transform;graph","transforms;Graphics","transforms;graph","units;Image","visualisation;fast","visualization;visual"
         ],
         
         "intraHash" : "faa2d6258b978f9b95243ae1a1f0cd8f",
         "interHash" : "f0d62e0e7749f8ebce5b636ece6af154",
         "label" : "FFTEB: Edge bundling of huge graphs by the Fast Fourier Transform",
         "user" : "lhuillae",
         "description" : "",
         "date" : "2018-06-14 18:09:07",
         "changeDate" : "2018-06-14 16:09:07",
         "count" : 1,
         "pub-type": "inproceedings",
         "booktitle": "2017 IEEE Pacific Visualization Symposium (PacificVis)",
         "year": "2017", 
         "url": "", 
         
         "author": [ 
            "Antoine Lhuillier","Christophe Hurter","Alexandru Telea"
         ],
         "authors": [
         	
            	{"first" : "Antoine",	"last" : "Lhuillier"},
            	{"first" : "Christophe",	"last" : "Hurter"},
            	{"first" : "Alexandru",	"last" : "Telea"}
         ],
         "pages": "190-199",
         "doi" : "10.1109/PACIFICVIS.2017.8031594",
         
         "bibtexKey": "8031594"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/22e472a2e1a03b74130da7b54b78eba21/djfoerster",         
         "tags" : [
            "@ifsw","Laser","materials","myown","peer","processing"
         ],
         
         "intraHash" : "2e472a2e1a03b74130da7b54b78eba21",
         "interHash" : "ae46bf8271cdb93fc668f1fe3e0c3c92",
         "label" : "Estimation of the depth limit for percussion drilling with picosecond laser pulses",
         "user" : "djfoerster",
         "description" : "",
         "date" : "2018-04-20 07:46:23",
         "changeDate" : "2018-04-20 05:46:23",
         "count" : 5,
         "pub-type": "article",
         "journal": "Opt. Express","publisher":"OSA",
         "year": "2018", 
         "url": "http://www.opticsexpress.org/abstract.cfm?URI=oe-26-9-11546", 
         
         "author": [ 
            "Daniel J. Förster","Rudolf Weber","Daniel Holder","Thomas Graf"
         ],
         "authors": [
         	
            	{"first" : "Daniel J.",	"last" : "Förster"},
            	{"first" : "Rudolf",	"last" : "Weber"},
            	{"first" : "Daniel",	"last" : "Holder"},
            	{"first" : "Thomas",	"last" : "Graf"}
         ],
         "volume": "26","number": "9","pages": "11546--11552","abstract": "We present a model to predict the final depth of percussion-drilled holes that are produced with picosecond laser pulses in metals. It is based on the assumption that boreholes always have conical geometries when the drilling process terminates. We show that the model is valid for various process parameters when drilling in stainless steel. This was even confirmed by drilling with 3 mJ pulses, which resulted in a 10 mm deep borehole without thermal damage.",
         "doi" : "10.1364/OE.26.011546",
         
         "bibtexKey": "Forster:18"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2dccb0302d8b765b1450e86f4b2798951/amerwafai",         
         "tags" : [
            "HLRS","SCOPE","absolute","and","calculation","cell","colored","core","distributed","dynamic","force","gather","intel","interaction","law","linked","memory","molecular","myown","newton","operation","optimization","parallel","parallelization","performance","phi","processing","range","scatter","shared","short","site","third","xeon","yellow"
         ],
         
         "intraHash" : "dccb0302d8b765b1450e86f4b2798951",
         "interHash" : "902fd5f88d25cf9f9a271091a6bbd41a",
         "label" : "Optimized Force Calculation of Molecular Dynamics Simulations for the Intel Xeon Phi",
         "user" : "amerwafai",
         "description" : "",
         "date" : "2016-01-29 09:34:55",
         "changeDate" : "2016-01-29 08:41:13",
         "count" : 1,
         "pub-type": "conference",
         "booktitle": "Euro-Par 2015: Parallel Processing","series": "LNCS","address":"Vienna, Austria",
         "year": "2015", 
         "url": "", 
         
         "author": [ 
            "Nikola Tchipev","Amer Wafai","Colin W. Glass","Wolfgang Eckhardt","Alexander Heinecke","Hans-Joachim Bungartz","Philipp Neumann"
         ],
         "authors": [
         	
            	{"first" : "Nikola",	"last" : "Tchipev"},
            	{"first" : "Amer",	"last" : "Wafai"},
            	{"first" : "Colin W.",	"last" : "Glass"},
            	{"first" : "Wolfgang",	"last" : "Eckhardt"},
            	{"first" : "Alexander",	"last" : "Heinecke"},
            	{"first" : "Hans-Joachim",	"last" : "Bungartz"},
            	{"first" : "Philipp",	"last" : "Neumann"}
         ],
         
         "editor": [ 
            "Jesper Larsson Träff","Sascha Hunold","Francesco Versaci"
         ],
         "editors": [
         	
            	{"first" : "Jesper Larsson",	"last" : "Träff"},
            	{"first" : "Sascha",	"last" : "Hunold"},
            	{"first" : "Francesco",	"last" : "Versaci"}
         ],
         "volume": "9233","abstract": "We provide details on the shared-memory parallelization for manycore architectures of the molecular dynamics framework ls1-mardyn, including an optimization of the SIMD vectorization for multi-centered molecules. The novel shared-memory parallelization scheme allows to re- tain Newton's third law optimization and exhibits very good scaling on many-core devices such as a full Xeon Phi card running 240 threads. The Xeon Phi can thus be exploited and delivers comparable performance as IvyBridge nodes in our experiments.",
         "date-added" : "2015-08-19 09:05:42 +0000",
         
         "date-modified" : "2015-08-19 09:10:27 +0000",
         
         "bibtexKey": "wafai15"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2ce0df7291d15f3b0f1467222f3958c4d/amerwafai",         
         "tags" : [
            "Architecture","CUDA","Compute","Device","GPU","Graphics","HLRS","Processing","SCOPE","Unified","Unit","myown"
         ],
         
         "intraHash" : "ce0df7291d15f3b0f1467222f3958c4d",
         "interHash" : "782fe775e19c904f41d1b5e185f207f1",
         "label" : "Sparse matrix vector multiplications on graphic processors",
         "user" : "amerwafai",
         "description" : "",
         "date" : "2016-01-29 09:34:55",
         "changeDate" : "2016-01-29 08:39:18",
         "count" : 1,
         "pub-type": "mastersthesis",
         "address":"Nobelstr. 19, 70569, Stuttgart",
         "year": "2009", 
         "url": "", 
         
         "author": [ 
            "Mhd. Amer Wafai"
         ],
         "authors": [
         	
            	{"first" : "Mhd. Amer",	"last" : "Wafai"}
         ],
         "abstract": "The modern computer architecture is moving towards multi-core systems. Intel processors are now coming with double or even quad cores like Xeon processor. Graphics Processing Units (GPUs) are considered to be highly parallel multi-core processors with tremendous performance. They are specially designed to deal with 3D and realtime graphics. And after the introduction of the new API, Compute Unified Device Architecture (CUDA), from NVIDA, the GPU became an attractive choice for general purpose parallel computing to solve many complex numerical problems. \r\nSparse Matrix-Vector (SpMV) multiplication is one of the most important kernels in scientific computing. Its sparsity, irregularity and indirect addressing properties present new challenges to map it to multi-core systems. \r\nThe objective of this work is to analyze the speed of execution of SpMV multiplication on NVIDIA GPUs (Tesla C1060). An algorithm based on a tailored version of ELLPACK, called Aligned-ELLPACK-R, as well as different algorithms have been developed using different storage formats. These implementations are done using the programming language CUDA. Finally the comparison of that performance has been done with respect to different implementations of SpMV on Intel Xeon E5560 processor using Jagged Diagonal Formats (JAD), ELLPACK and ELLPACK-R storage formats. \r\nThe results show the superiority of JAD storage format over the matrices used to test SpMV on conventional super scaler processors. SpMV on Tesla C1060 based on Aligned-ELLPACK-R outperforms the fastest implementation on CPU with speedup factor 13 times. It also outperforms the CUDA library based on ELLPACK with 2.3 speedup factor.",
         "date-added" : "2015-08-18 13:53:17 +0000",
         
         "date-modified" : "2015-08-18 14:02:15 +0000",
         
         "bibtexKey": "wafai09"

      }
	  
   ]
}
