@inproceedings{13c1f19db60143f5951aca51f19f9afe,
title = "Semi-Supervised Low-Resource Style Transfer of Indonesian Informal to Formal Language with Iterative Forward-Translation",
abstract = "In its daily use, the Indonesian language is riddled with informality, that is, deviations from the standard in terms of vocabulary, spelling, and word order. On the other hand, current available Indonesian NLP models are typically developed with the standard Indonesian in mind. In this work, we address a style-transfer from informal to formal Indonesian as a lowresource machine translation problem. We build a new dataset of parallel sentences of informal Indonesian and its formal counterpart. We benchmark several strategies to perform style transfer from informal to formal Indonesian. We also explore augmenting the training set with artificial forward-translated data. Since we are dealing with an extremely low-resource setting, we find that a phrase-based machine translation approach outperforms the Transformer-based approach. Alternatively, a pre-trained GPT-2 fined-tuned to this task performed equally well but costs more computational resource. Our findings show a promising step towards leveraging machine translation models for style transfer. Our code and data are available in https://github.com/haryoa/stif-indonesia.",
keywords = "colloquial, Indonesian, machine translation, natural language processing, semi-supervised, style-transfer",
author = "Wibowo, {Haryo Akbarianto} and Prawiro, {Tatag Aziz} and Muhammad Ihsan and Aji, {Alham Fikri} and Prasojo, {Radityo Eko} and Rahmad Mahendra and Suci Fitriany",
note = "Funding Information: This research was supported by the research grant from Universitas Indonesia, namely Publikasi Terindeks Internasional (PUTI) Saintekkes year 2020 no NKB-2142/UN2.RST/HKP.05.00/2020 Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 International Conference on Asian Language Processing, IALP 2020 ; Conference date: 04-12-2020 Through 06-12-2020",
year = "2020",
month = dec,
day = "4",
doi = "10.1109/IALP51396.2020.9310459",
language = "English",
series = "2020 International Conference on Asian Language Processing, IALP 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "310--315",
editor = "Yanfeng Lu and Minghui Dong and Lay-Ki Soon and Gan, {Keng Hoon}",
booktitle = "2020 International Conference on Asian Language Processing, IALP 2020",
address = "United States",
}