@inproceedings{9f0aff62dded4a318dab14933710161b,
title = "Stochastic Gradient Variational Bayes for deep learning-based ASR",
abstract = "Many successful methods for training deep neural networks (DNN) rely on an unsupervised pretraining algorithm. It is particularly effective when the number of labeled training samples is not large enough, because pretraining method helps to initialize the parameter values in the appropriate range near a local good minimum, for further discriminative finetuning. However, while the improvement is impressive, training DNN is difficult because the objective function of DNN is highly non-convex function of the parameters. To avoid placing the parameter that generalizes poorly, a robust generative modelling is necessary. This paper explore an alternative of generative modelling for pretraining DNN-based acoustic modelling using Stochastic Gradient Variational Bayes (SGVB) within autoencoder framework called Variational Bayes Autoencoder (VBAE). It performs an efficient approximate inference and learning with directed probabilistic graphical models. During fine-tuning, probabilistic encoder parameters with latent variable components are then used in discriminative training for acoustic model. Here, we investigate the performances of DNN-based acoustic model using the proposed pretrained VBAE in comparison with widely used pretraining algorithms like Restricted Boltzmann Machine (RBM) and Stacked Denoising Autoencoder (SDAE). The results reveal that VBAE pretraining with Gaussian latent variables gave the best performance.",
keywords = "acoustic model, autoencoder, deep neural network, variational Bayes",
author = "Andros Tjandra and Sakriani Sakti and Satoshi Nakamura and Mirna Adriani",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 ; Conference date: 13-12-2015 Through 17-12-2015",
year = "2016",
month = feb,
day = "10",
doi = "10.1109/ASRU.2015.7404791",
language = "English",
series = "2015 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "175--180",
booktitle = "2015 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 - Proceedings",
address = "United States",
}