@inproceedings{f665b4b99a9c485d94b2aa6a8b0d1a62,
title = "One Country, 700+ Languages: NLP Challenges for Underrepresented Languages and Dialects in Indonesia",
abstract = "NLP research is impeded by a lack of resources and awareness of the challenges presented by underrepresented languages and dialects. Focusing on the languages spoken in Indonesia, the second most linguistically diverse and the fourth most populous nation of the world, we provide an overview of the current state of NLP research for Indonesia's 700+ languages. We highlight challenges in Indonesian NLP and how these affect the performance of current NLP systems. Finally, we provide general recommendations to help develop NLP technology not only for languages of Indonesia but also other underrepresented languages.",
author = "Aji, {Alham Fikri} and Winata, {Genta Indra} and Fajri Koto and Samuel Cahyawijaya and Ade Romadhony and Rahmad Mahendra and Kemal Kurniawan and David Moeljadi and Prasojo, {Radityo Eko} and Timothy Baldwin and Lau, {Jey Han} and Sebastian Ruder",
note = "Publisher Copyright: {\textcopyright} 2022 Association for Computational Linguistics.; 60th Annual Meeting of the Association for Computational Linguistics, ACL 2022 ; Conference date: 22-05-2022 Through 27-05-2022",
year = "2022",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "7226--7249",
editor = "Smaranda Muresan and Preslav Nakov and Aline Villavicencio",
booktitle = "ACL 2022 - 60th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference (Long Papers)",
address = "United States",
}