<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:community="http://www.bibsonomy.org/ontologies/2008/05/community#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:admin="http://webns.net/mvcb/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:cc="http://web.resource.org/cc/" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:swrc="http://swrc.ontoware.org/ontology#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="https://puma.ub.uni-stuttgart.de/group/simtech/GPU"><owl:Ontology rdf:about=""><rdfs:comment>PUMA publications for /group/simtech/GPU</rdfs:comment><owl:imports rdf:resource="http://swrc.ontoware.org/ontology/portal"/></owl:Ontology><rdf:Description rdf:about="https://puma.ub.uni-stuttgart.de/bibtex/2e8ebd2f42b453167d2065725486828cb/vancraen"><owl:sameAs rdf:resource="/uri/bibtex/2e8ebd2f42b453167d2065725486828cb/vancraen"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="https://doi.org/10.1145/3648115.3648130"/><swrc:date>Mon Sep 30 13:17:58 CEST 2024</swrc:date><swrc:address>New York, NY, USA</swrc:address><swrc:booktitle>Proceedings of the 12th International Workshop on OpenCL and SYCL</swrc:booktitle><swrc:month>04</swrc:month><swrc:pages>1-4</swrc:pages><swrc:publisher><swrc:Organization swrc:name="Association for Computing Machinery"/></swrc:publisher><swrc:series>IWOCL &#039;24</swrc:series><swrc:title>Evaluation of SYCL’s Different Data Parallel Kernels</swrc:title><swrc:year>2024</swrc:year><swrc:keywords>myown Performance Evaluation CPU SVM SYCL GPU AISA exc2075 </swrc:keywords><swrc:abstract>SYCL provides programmers with four, and in the case of AdaptiveCpp even five, ways for calling and writing a device kernel. This paper analyzes the performance of these diverse kernel invocation types for DPC++ and AdaptiveCpp as SYCL implementations on an NVIDIA A100 GPU, an AMD Instinct MI210 GPU, and a dual-socket AMD EPYC 9274F CPU. Using the example of a kernel matrix assembly, we show why the performance can differ by a factor of 100 in the worst case on the same hardware for the same problem using different SYCL implementations and kernel invocation types.</swrc:abstract><swrc:hasExtraField><swrc:Field swrc:value="9798400717901" swrc:key="isbn"/></swrc:hasExtraField><swrc:hasExtraField><swrc:Field swrc:value="english" swrc:key="language"/></swrc:hasExtraField><swrc:hasExtraField><swrc:Field swrc:value="4" swrc:key="numpages"/></swrc:hasExtraField><swrc:hasExtraField><swrc:Field swrc:value="10" swrc:key="articleno"/></swrc:hasExtraField><swrc:hasExtraField><swrc:Field swrc:value="Chicago, IL, USA" swrc:key="location"/></swrc:hasExtraField><swrc:hasExtraField><swrc:Field swrc:value="10.1145/3648115.3648130" swrc:key="doi"/></swrc:hasExtraField><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Marcel Breyer"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Alexander Van Craen"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Dirk Pflüger"/></rdf:_3></rdf:Seq></swrc:author></rdf:Description></rdf:RDF>