@inproceedings{6035e4fc564243548a0885b42e867987,
title = "Enhancing Indonesian Automatic Speech Recognition: Evaluating Multilingual Models with Diverse Speech Variabilities",
abstract = "An ideal speech recognition model has the capability to transcribe speech accurately under various characteristics of speech signals, such as speaking style (read and spontaneous), speech context (formal and informal), and background noise conditions (clean and moderate). Building such a model requires a significant amount of training data with diverse speech characteristics. Currently, Indonesian data is dominated by read, formal, and clean speech, leading to a scarcity of Indonesian data with other speech variabilities. To develop Indonesian automatic speech recognition (ASR), we present our research on state-of-the-art speech recognition models, namely Massively Multilingual Speech (MMS) and Whisper, as well as compiling a dataset comprising Indonesian speech with variabilities to facilitate our study. We further investigate the models' predictive ability to transcribe Indonesian speech data across different variability groups. The best results were achieved by the Whisper fine-tuned model across datasets with various characteristics, as indicated by the decrease in word error rate (WER) and character error rate (CER). Moreover, we found that speaking style variability affected model performance the most.",
keywords = "Indonesian language, MMS, speech recognition, speech variability, Whisper",
author = "Aulia Adila and Dessi Lestari and Ayu Purwarianti and Dipta Tanaya and Kurniawati Azizah and Sakriani Sakti",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 ; Conference date: 17-10-2024 Through 19-10-2024",
year = "2024",
doi = "10.1109/O-COCOSDA64382.2024.10800336",
language = "English",
series = "2024 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Ming-Hsiang Su and Jui-Feng Yeh and Yuan-Fu Liao and Chi-Chun Lee and Yu Taso",
booktitle = "2024 27th Conference on the Oriental COCOSDA International Committee for the Co-Ordination and Standardisation of Speech Databases and Assessment Techniques, O-COCOSDA 2024 - Proceedings",
address = "United States",
}