@inproceedings{7f3453dca7c749528937b7d8f2b84273,
title = "Statistical Machine Translation Approach for Lexical Normalization on Indonesian Text",
abstract = "Lexical normalization is an important task to be performed on noisy data, such as social media posts, before using the data for further analysis. We examine the potential of Statistical Machine Translation (SMT) for normalization of Indonesian text using the translation unit on both phrase and character levels. We also used an external corpus to generate additional language model data and pre-normalization rules to enhance the SMT system. The result shows the SMT systems on both phrase and character levels are outperforming various baseline in Word Error Rate (WER) score and Bilingual Understudy Evaluation (BLEU) score. This research also demonstrates the effect of using an external language model and applying pre-normalization rules can further enhance the effectiveness of SMT systems in normalizing Indonesian text.",
keywords = "Indonesian, Lexical normalization, machine translation, social media",
author = "Ajmal Kurnia and Evi Yulianti",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 International Conference on Asian Language Processing, IALP 2020 ; Conference date: 04-12-2020 Through 06-12-2020",
year = "2020",
month = dec,
day = "4",
doi = "10.1109/IALP51396.2020.9310508",
language = "English",
series = "2020 International Conference on Asian Language Processing, IALP 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "288--293",
editor = "Yanfeng Lu and Minghui Dong and Lay-Ki Soon and Gan, {Keng Hoon}",
booktitle = "2020 International Conference on Asian Language Processing, IALP 2020",
address = "United States",
}