@inproceedings{d996ceaae5394cab8ec74f8dc2387455,
title = "Combination of two-dimensional cochleogram and spectrogram features for deep learning-based ASR",
abstract = "This paper explores the use of auditory features based on cochleograms; two dimensional speech features derived from gammatone filters within the convolutional neural network (CNN) framework. Furthermore, we also propose various possibilities to combine cochleogram features with log-mel filter banks or spectrogram features. In particular, we combine within low and high levels of CNN framework which we refer to as low-level and high-level feature combination. As comparison, we also construct the similar configuration with deep neural network (DNN). Performance was evaluated in the framework of hybrid neural network - hidden Markov model (NN-HMM) system on TIMIT phoneme sequence recognition task. The results reveal that cochleogram-spectrogram feature combination provides significant advantages. The best accuracy was obtained by high-level combination of two dimensional cochleogram-spectrogram features using CNN, achieved up to 8.2% relative phoneme error rate (PER) reduction from CNN single features or 19.7% relative PER reduction from DNN single features.",
keywords = "DNN and CNN, Deep learning, cochleogram, feature combination",
author = "Andros Tjandra and Sakriani Sakti and Graham Neubig and Tomoki Toda and Mirna Adriani and Satoshi Nakamura",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 40th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2015 ; Conference date: 19-04-2014 Through 24-04-2014",
year = "2015",
month = aug,
day = "4",
doi = "10.1109/ICASSP.2015.7178827",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4525--4529",
booktitle = "2015 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2015 - Proceedings",
address = "United States",
}