Deep Neural Networks (DNNs) denote multilayer artificial neural networks with more than one hidden layer and millions of free parameters. We propose a Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension optimized with respect to a fast classification from a large set of acoustic features for emotion recognition. On nine frequently used emotional speech corpora, we compare the performance of GerDA features and their subsequent linear classification with previously reported benchmarks obtained using the same set of acoustic features classified by Support Vector Machines (SVMs). Our results impressively show that low-dimensional GerDA features capture hidden information from the acoustic features leading to a significantly raised unweighted average recall and considerably raised weighted average recall.
2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)
Jahr
2011
Monat
May
Seiten
5688-5691
Kommentar
Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension;
Experiments on 9 emotion speech corpora show that GerDA features perform better than previously reported benchmarks;
%0 Conference Paper
%1 stuhlsatz2011deep
%A Stuhlsatz, A.
%A Meyer, C.
%A Eyben, F.
%A Zielke, T.
%A Meier, G.
%A Schuller, B.
%B 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)
%D 2011
%K Analysis; Computing;Deep Discriminant Networks;Emotion Neural Recognition;Generalized acoustic analysis;linear artificial classification;multilayer discriminant emotion extraction;Speech;Support extraction;neural feature;acoustic machine;Acoustics;Artificial machines;Affective machines;GerDA nets;support network;support networks;Databases;Emotion networks;generalized neural processing;emotion recognition;Feature recognition;deep recognition;feature signal vector
%P 5688-5691
%R 10.1109/ICASSP.2011.5947651
%T Deep neural networks for acoustic emotion recognition: Raising the benchmarks
%X Deep Neural Networks (DNNs) denote multilayer artificial neural networks with more than one hidden layer and millions of free parameters. We propose a Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension optimized with respect to a fast classification from a large set of acoustic features for emotion recognition. On nine frequently used emotional speech corpora, we compare the performance of GerDA features and their subsequent linear classification with previously reported benchmarks obtained using the same set of acoustic features classified by Support Vector Machines (SVMs). Our results impressively show that low-dimensional GerDA features capture hidden information from the acoustic features leading to a significantly raised unweighted average recall and considerably raised weighted average recall.
%Z Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension;
Experiments on 9 emotion speech corpora show that GerDA features perform better than previously reported benchmarks;
@inproceedings{stuhlsatz2011deep,
abstract = {Deep Neural Networks (DNNs) denote multilayer artificial neural networks with more than one hidden layer and millions of free parameters. We propose a Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension optimized with respect to a fast classification from a large set of acoustic features for emotion recognition. On nine frequently used emotional speech corpora, we compare the performance of GerDA features and their subsequent linear classification with previously reported benchmarks obtained using the same set of acoustic features classified by Support Vector Machines (SVMs). Our results impressively show that low-dimensional GerDA features capture hidden information from the acoustic features leading to a significantly raised unweighted average recall and considerably raised weighted average recall.},
added-at = {2018-02-19T14:58:22.000+0100},
annote = {Generalized Discriminant Analysis (GerDA) based on DNNs to learn discriminative features of low dimension;
Experiments on 9 emotion speech corpora show that GerDA features perform better than previously reported benchmarks;},
author = {Stuhlsatz, A. and Meyer, C. and Eyben, F. and Zielke, T. and Meier, G. and Schuller, B.},
bdsk-file-1 = {YnBsaXN0MDDUAQIDBAUGJCVYJHZlcnNpb25YJG9iamVjdHNZJGFyY2hpdmVyVCR0b3ASAAGGoKgHCBMUFRYaIVUkbnVsbNMJCgsMDxJXTlMua2V5c1pOUy5vYmplY3RzViRjbGFzc6INDoACgAOiEBGABIAFgAdccmVsYXRpdmVQYXRoWWFsaWFzRGF0YV8QN1N0dWhsc2F0ei9EZWVwIG5ldXJhbCBuZXR3b3JrcyBmb3IgYWNvdXN0aWMgZW1vdGlvbi5wZGbSFwsYGVdOUy5kYXRhTxECDAAAAAACDAACAAAWQXBwbGUgU1NEIFNNMTI4QyBNZWRpYQAAAAAAAAAAAEJEAAH/////H0RlZXAgbmV1cmFsIG5ldHdvciNGRkZGRkZGRi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAUERGIAAAAAAAAQADAAAKIGN1AAAAAAAAAAAAAAAAAAlTdHVobHNhdHoAAAIAYC86VXNlcnM6bWljaGFlbDpEcm9wYm94OlVuaTpiaWJsaW9ncmFwaHk6U3R1aGxzYXR6OkRlZXAgbmV1cmFsIG5ldHdvcmtzIGZvciBhY291c3RpYyBlbW90aW9uLnBkZgAOAFwALQBEAGUAZQBwACAAbgBlAHUAcgBhAGwAIABuAGUAdAB3AG8AcgBrAHMAIABmAG8AcgAgAGEAYwBvAHUAcwB0AGkAYwAgAGUAbQBvAHQAaQBvAG4ALgBwAGQAZgAPAC4AFgBBAHAAcABsAGUAIABTAFMARAAgAFMATQAxADIAOABDACAATQBlAGQAaQBhABIAXlVzZXJzL21pY2hhZWwvRHJvcGJveC9VbmkvYmlibGlvZ3JhcGh5L1N0dWhsc2F0ei9EZWVwIG5ldXJhbCBuZXR3b3JrcyBmb3IgYWNvdXN0aWMgZW1vdGlvbi5wZGYAEwABLwAAFQACAA7//wAAgAbSGxwdHlokY2xhc3NuYW1lWCRjbGFzc2VzXU5TTXV0YWJsZURhdGGjHR8gVk5TRGF0YVhOU09iamVjdNIbHCIjXE5TRGljdGlvbmFyeaIiIF8QD05TS2V5ZWRBcmNoaXZlctEmJ1Ryb290gAEACAARABoAIwAtADIANwBAAEYATQBVAGAAZwBqAGwAbgBxAHMAdQB3AIQAjgDIAM0A1QLlAucC7AL3AwADDgMSAxkDIgMnAzQDNwNJA0wDUQAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAANT},
bdsk-url-1 = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=5947651},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2b9284ed5fecf564f51fd4d58e1e40bc0/michaelneumann},
booktitle = {2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
date-added = {2016-06-01 13:33:18 +0000},
date-modified = {2017-02-22 13:00:49 +0000},
doi = {10.1109/ICASSP.2011.5947651},
interhash = {bad6d2b3cb3e5f65201b3e0fe77c04e8},
intrahash = {b9284ed5fecf564f51fd4d58e1e40bc0},
issn = {1520-6149},
keywords = {Analysis; Computing;Deep Discriminant Networks;Emotion Neural Recognition;Generalized acoustic analysis;linear artificial classification;multilayer discriminant emotion extraction;Speech;Support extraction;neural feature;acoustic machine;Acoustics;Artificial machines;Affective machines;GerDA nets;support network;support networks;Databases;Emotion networks;generalized neural processing;emotion recognition;Feature recognition;deep recognition;feature signal vector},
month = {{May}},
pages = {5688-5691},
timestamp = {2018-02-19T13:58:22.000+0100},
title = {Deep neural networks for acoustic emotion recognition: Raising the benchmarks},
year = 2011
}