
{  
   "types" : {
      "Bookmark" : {
         "pluralLabel" : "Bookmarks"
      },
      "Publication" : {
         "pluralLabel" : "Publications"
      },
      "GoldStandardPublication" : {
         "pluralLabel" : "GoldStandardPublications"
      },
      "GoldStandardBookmark" : {
         "pluralLabel" : "GoldStandardBookmarks"
      },
      "Tag" : {
         "pluralLabel" : "Tags"
      },
      "User" : {
         "pluralLabel" : "Users"
      },
      "Group" : {
         "pluralLabel" : "Groups"
      },
      "Sphere" : {
         "pluralLabel" : "Spheres"
      }
   },
   
   "properties" : {
      "count" : {
         "valueType" : "number"
      },
      "date" : {
         "valueType" : "date"
      },
      "changeDate" : {
         "valueType" : "date"
      },
      "url" : {
         "valueType" : "url"
      },
      "id" : {
         "valueType" : "url"
      },
      "tags" : {
         "valueType" : "item"
      },
      "user" : {
         "valueType" : "item"
      }      
   },
   
   "items" : [
   	  
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2e8ebd2f42b453167d2065725486828cb/aisa",         
         "tags" : [
            "myown","Performance","CPU","Evaluation","SVM","SYCL","GPU","AISA","exc2075"
         ],
         
         "intraHash" : "e8ebd2f42b453167d2065725486828cb",
         "interHash" : "bfbc52cd98d241445f5051b284bf6ded",
         "label" : "Evaluation of SYCL\u2019s Different Data Parallel Kernels",
         "user" : "aisa",
         "description" : "",
         "date" : "2025-06-23 09:45:25",
         "changeDate" : "2025-06-23 09:45:25",
         "count" : 10,
         "pub-type": "inproceedings",
         "booktitle": "Proceedings of the 12th International Workshop on OpenCL and SYCL","series": "IWOCL '24","publisher":"Association for Computing Machinery","address":"New York, NY, USA",
         "year": "2024", 
         "url": "https://doi.org/10.1145/3648115.3648130", 
         
         "author": [ 
            "Marcel Breyer","Alexander Van Craen","Dirk Pflüger"
         ],
         "authors": [
         	
            	{"first" : "Marcel",	"last" : "Breyer"},
            	{"first" : "Alexander",	"last" : "Van Craen"},
            	{"first" : "Dirk",	"last" : "Pflüger"}
         ],
         "pages": "1-4","abstract": "SYCL provides programmers with four, and in the case of AdaptiveCpp even five, ways for calling and writing a device kernel. This paper analyzes the performance of these diverse kernel invocation types for DPC++ and AdaptiveCpp as SYCL implementations on an NVIDIA A100 GPU, an AMD Instinct MI210 GPU, and a dual-socket AMD EPYC 9274F CPU. Using the example of a kernel matrix assembly, we show why the performance can differ by a factor of 100 in the worst case on the same hardware for the same problem using different SYCL implementations and kernel invocation types.",
         "isbn" : "9798400717901",
         
         "language" : "english",
         
         "numpages" : "4",
         
         "articleno" : "10",
         
         "location" : "Chicago, IL, USA",
         
         "doi" : "10.1145/3648115.3648130",
         
         "bibtexKey": "breyer2024evaluation"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2e8ebd2f42b453167d2065725486828cb/vancraen",         
         "tags" : [
            "AISA","CPU","Evaluation","GPU","Performance","SVM","SYCL","exc2075","myown"
         ],
         
         "intraHash" : "e8ebd2f42b453167d2065725486828cb",
         "interHash" : "bfbc52cd98d241445f5051b284bf6ded",
         "label" : "Evaluation of SYCL\u2019s Different Data Parallel Kernels",
         "user" : "vancraen",
         "description" : "",
         "date" : "2024-09-30 13:17:58",
         "changeDate" : "2025-06-23 09:45:25",
         "count" : 10,
         "pub-type": "inproceedings",
         "booktitle": "Proceedings of the 12th International Workshop on OpenCL and SYCL","series": "IWOCL '24","publisher":"Association for Computing Machinery","address":"New York, NY, USA",
         "year": "2024", 
         "url": "https://doi.org/10.1145/3648115.3648130", 
         
         "author": [ 
            "Marcel Breyer","Alexander Van Craen","Dirk Pflüger"
         ],
         "authors": [
         	
            	{"first" : "Marcel",	"last" : "Breyer"},
            	{"first" : "Alexander",	"last" : "Van Craen"},
            	{"first" : "Dirk",	"last" : "Pflüger"}
         ],
         "pages": "1-4","abstract": "SYCL provides programmers with four, and in the case of AdaptiveCpp even five, ways for calling and writing a device kernel. This paper analyzes the performance of these diverse kernel invocation types for DPC++ and AdaptiveCpp as SYCL implementations on an NVIDIA A100 GPU, an AMD Instinct MI210 GPU, and a dual-socket AMD EPYC 9274F CPU. Using the example of a kernel matrix assembly, we show why the performance can differ by a factor of 100 in the worst case on the same hardware for the same problem using different SYCL implementations and kernel invocation types.",
         "isbn" : "9798400717901",
         
         "language" : "english",
         
         "numpages" : "4",
         
         "articleno" : "10",
         
         "location" : "Chicago, IL, USA",
         
         "doi" : "10.1145/3648115.3648130",
         
         "bibtexKey": "breyer2024evaluation"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2e8ebd2f42b453167d2065725486828cb/ipvs-sgs",         
         "tags" : [
            "myown","Performance","CPU","Evaluation","SVM","SYCL","GPU","AISA","exc2075","aisa"
         ],
         
         "intraHash" : "e8ebd2f42b453167d2065725486828cb",
         "interHash" : "bfbc52cd98d241445f5051b284bf6ded",
         "label" : "Evaluation of SYCL\u2019s Different Data Parallel Kernels",
         "user" : "ipvs-sgs",
         "description" : "",
         "date" : "2024-09-30 13:17:58",
         "changeDate" : "2025-06-23 09:45:25",
         "count" : 10,
         "pub-type": "inproceedings",
         "booktitle": "Proceedings of the 12th International Workshop on OpenCL and SYCL","series": "IWOCL '24","publisher":"Association for Computing Machinery","address":"New York, NY, USA",
         "year": "2024", 
         "url": "https://doi.org/10.1145/3648115.3648130", 
         
         "author": [ 
            "Marcel Breyer","Alexander Van Craen","Dirk Pflüger"
         ],
         "authors": [
         	
            	{"first" : "Marcel",	"last" : "Breyer"},
            	{"first" : "Alexander",	"last" : "Van Craen"},
            	{"first" : "Dirk",	"last" : "Pflüger"}
         ],
         "pages": "1-4","abstract": "SYCL provides programmers with four, and in the case of AdaptiveCpp even five, ways for calling and writing a device kernel. This paper analyzes the performance of these diverse kernel invocation types for DPC++ and AdaptiveCpp as SYCL implementations on an NVIDIA A100 GPU, an AMD Instinct MI210 GPU, and a dual-socket AMD EPYC 9274F CPU. Using the example of a kernel matrix assembly, we show why the performance can differ by a factor of 100 in the worst case on the same hardware for the same problem using different SYCL implementations and kernel invocation types.",
         "isbn" : "9798400717901",
         
         "language" : "english",
         
         "numpages" : "4",
         
         "articleno" : "10",
         
         "location" : "Chicago, IL, USA",
         
         "doi" : "10.1145/3648115.3648130",
         
         "bibtexKey": "breyer2024evaluation"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/2e8ebd2f42b453167d2065725486828cb/ipvs-sc",         
         "tags" : [
            "myown","Performance","CPU","Evaluation","SVM","SYCL","GPU","AISA","exc2075","aisa"
         ],
         
         "intraHash" : "e8ebd2f42b453167d2065725486828cb",
         "interHash" : "bfbc52cd98d241445f5051b284bf6ded",
         "label" : "Evaluation of SYCL\u2019s Different Data Parallel Kernels",
         "user" : "ipvs-sc",
         "description" : "",
         "date" : "2024-09-30 13:17:58",
         "changeDate" : "2025-06-23 09:45:25",
         "count" : 10,
         "pub-type": "inproceedings",
         "booktitle": "Proceedings of the 12th International Workshop on OpenCL and SYCL","series": "IWOCL '24","publisher":"Association for Computing Machinery","address":"New York, NY, USA",
         "year": "2024", 
         "url": "https://doi.org/10.1145/3648115.3648130", 
         
         "author": [ 
            "Marcel Breyer","Alexander Van Craen","Dirk Pflüger"
         ],
         "authors": [
         	
            	{"first" : "Marcel",	"last" : "Breyer"},
            	{"first" : "Alexander",	"last" : "Van Craen"},
            	{"first" : "Dirk",	"last" : "Pflüger"}
         ],
         "pages": "1-4","abstract": "SYCL provides programmers with four, and in the case of AdaptiveCpp even five, ways for calling and writing a device kernel. This paper analyzes the performance of these diverse kernel invocation types for DPC++ and AdaptiveCpp as SYCL implementations on an NVIDIA A100 GPU, an AMD Instinct MI210 GPU, and a dual-socket AMD EPYC 9274F CPU. Using the example of a kernel matrix assembly, we show why the performance can differ by a factor of 100 in the worst case on the same hardware for the same problem using different SYCL implementations and kernel invocation types.",
         "isbn" : "9798400717901",
         
         "language" : "english",
         
         "numpages" : "4",
         
         "articleno" : "10",
         
         "location" : "Chicago, IL, USA",
         
         "doi" : "10.1145/3648115.3648130",
         
         "bibtexKey": "breyer2024evaluation"

      }
,
      {
         "type" : "Publication",
         "id"   : "https://puma.ub.uni-stuttgart.de/bibtex/20fa2e309a2cd7a291449888471514bdf/thomasrichter",         
         "tags" : [
            "compression","cpu","gpu","image","low-complexity","myown"
         ],
         
         "intraHash" : "0fa2e309a2cd7a291449888471514bdf",
         "interHash" : "1c214f842d3f6bafc3be0b32e40c4d75",
         "label" : "Comparison of CPU and GPU Based Coding on Low-Complexity Algorithms for Display Signals",
         "user" : "thomasrichter",
         "description" : "",
         "date" : "2016-03-10 09:18:49",
         "changeDate" : "2016-03-10 08:35:27",
         "count" : 3,
         "pub-type": "inproceedings",
         "booktitle": "Applications of Digital Image Processing XXXVI","publisher":"SPIE",
         "year": "2013", 
         "url": "http://spie.org/Publications/Proceedings/Paper/10.1117/12.2022398", 
         
         "author": [ 
            "T. Richter","S. Simon"
         ],
         "authors": [
         	
            	{"first" : "T.",	"last" : "Richter"},
            	{"first" : "S.",	"last" : "Simon"}
         ],
         
         "editor": [ 
            "Andrew G. Tescher"
         ],
         "editors": [
         	
            	{"first" : "Andrew G.",	"last" : "Tescher"}
         ],
         "volume": "8856","pages": "14 pages","abstract": "Graphics Processing Units (GPUs) are freely programmable massively parallel general purpose processing units and thus offer the opportunity to off-load heavy computations from the CPU to the GPU. One application for GPU programming is image compression, where the massively parallel nature of GPUs promises high speed benefits. This article analyzes the predicaments of data-parallel image coding on the example of two high-throughput coding algorithms. The codecs discussed here were designed to answer a call from the Video Electronics Standards Association (VESA), and require only minimal buffering at encoder and decoder side while avoiding any pixel-based feedback loops limiting the operating frequency of hardware implementations. Comparing CPU and GPU implementations of the codes show that GPU based codes are usually not considerably faster, or perform only with less than ideal rate-distortion performance. Analyzing the details of this result provides theoretical evidence that for any coding engine either parts of the entropy coding and bit-stream build-up must remain serial, or rate-distortion penalties must be paid when offloading all computations on the GPU.",
         "isbn" : "9780819497062",
         
         "doi" : "10.1117/12.2022398",
         
         "bibtexKey": "richter2013comparison"

      }
	  
   ]
}
