@inproceedings{1a21861cd33b4d35a81dab930752915f,
title = "Text Normalization on Code-Mixed Twitter Text using Language Detection",
abstract = "Presence of code-mixed language become a challenge for NLP research that focused on Twitter text normalization. Some challenges include normalize text containing words with more than one language. But recent method for text normalization still has problems related to language, either on identifying language or normalize a word. This research report covers the solution that can be given to overcome those problems. The approach is using language detection module alongside with transformer model. A BERT model tagger was used as a language detection, and two ByT5 models was used as a normalization. The research shows that proposed method has ERR score 1.01 percent lower than baseline.",
keywords = "code-mixed language, language detection, multilingual, text normalization, twitter",
author = "Rizqullah, {Rafi Dwi} and Indra Budi",
note = "Funding Information: ACKNOWLEDGMENT This research was funded by PUTI Pascasarjana Universitas Indonesia, grant number NKB-104/UN2.RST/HKP.05.00/2022. Publisher Copyright: {\textcopyright} 2022 IEEE.; 7th International Conference on Informatics and Computing, ICIC 2022 ; Conference date: 08-12-2022 Through 09-12-2022",
year = "2022",
doi = "10.1109/ICIC56845.2022.10006949",
language = "English",
series = "2022 7th International Conference on Informatics and Computing, ICIC 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2022 7th International Conference on Informatics and Computing, ICIC 2022",
address = "United States",
}