This paper introduces the porting of an industrial neural network simulator onto GPUs used in a tool-chain to sort massive amounts of E-mails and other textual data. Compared to other previous work, all steps are being executed on the GPU, achieving overall up to 33× speedup without using any cuBLAS functionality. All the time-consuming routines have been ported onto the GPU, i.e. the training-, the simulation- and the verification-phases, the training being the most time-consuming. It is planned to include these GPU-kernels into the product for special costumer's demands.
%0 Conference Paper
%1 wafai12
%A Wafai, Mhd. Amer
%A Ahmed, Zaheer
%A Keller, Rainer
%A Holzmann, Sven
%A Sander, Björn
%A Resch, Michael
%B New Horizons in Web Based Learning
%D 2011
%E Chiu, Dickson K. W.
%E Wang, Minhong
%E Popescu, Elvira
%E Li, Qing
%E Lau, Rynson
%I Springer Berlin Heidelberg
%K Back CUDA GPGPU GPU HLRS Network Neural Parallelisation Propagation SCOPE myown
%P 21-29
%T Optimization of industrial Neural Network simulators for GPGPUs
%V 7697
%X This paper introduces the porting of an industrial neural network simulator onto GPUs used in a tool-chain to sort massive amounts of E-mails and other textual data. Compared to other previous work, all steps are being executed on the GPU, achieving overall up to 33× speedup without using any cuBLAS functionality. All the time-consuming routines have been ported onto the GPU, i.e. the training-, the simulation- and the verification-phases, the training being the most time-consuming. It is planned to include these GPU-kernels into the product for special costumer's demands.
@inproceedings{wafai12,
abstract = {This paper introduces the porting of an industrial neural network simulator onto GPUs used in a tool-chain to sort massive amounts of E-mails and other textual data. Compared to other previous work, all steps are being executed on the GPU, achieving overall up to 33× speedup without using any cuBLAS functionality. All the time-consuming routines have been ported onto the GPU, i.e. the training-, the simulation- and the verification-phases, the training being the most time-consuming. It is planned to include these GPU-kernels into the product for special costumer's demands.},
added-at = {2016-01-29T09:34:55.000+0100},
author = {Wafai, Mhd. Amer and Ahmed, Zaheer and Keller, Rainer and Holzmann, Sven and Sander, Bj{\"o}rn and Resch, Michael},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a4a5c9728db0683dda1b2b3318fa0c9d/amerwafai},
booktitle = {New Horizons in Web Based Learning},
date-added = {2015-08-18 14:03:50 +0000},
date-modified = {2015-08-18 14:20:22 +0000},
editor = {Chiu, Dickson K. W. and Wang, Minhong and Popescu, Elvira and Li, Qing and Lau, Rynson},
interhash = {0f4f1b3c77004231f7c17f1c80480dfe},
intrahash = {a4a5c9728db0683dda1b2b3318fa0c9d},
keywords = {Back CUDA GPGPU GPU HLRS Network Neural Parallelisation Propagation SCOPE myown},
month = {December},
pages = {21-29},
publisher = {Springer Berlin Heidelberg},
series = {LNCS},
timestamp = {2016-01-29T08:42:48.000+0100},
title = {Optimization of industrial Neural Network simulators for GPGPUs},
volume = 7697,
year = 2011
}