@inproceedings{65db4674790446ae8eec80a7c69c2f38,
title = "Modified DBpedia entities expansion for tagging automatically NER dataset",
abstract = "Developing NER system using machine learning approach needs a big dataset which is costly if the dataset labeling is done manually. The previous works proposed methods in tagging automatically the Indonesian NER dataset using Wikipedia articles as the source of the dataset and DBpedia as the reference of the entity type. However, the quality of the resulting dataset was still inadequate. A method named DBpedia Entities Expansion (DEE) had introduced several rules to expand named entities in DBpedia in order to improve recall, but it had not managed to remove noise that makes precision decline, especially for person names. The objective of this research is to propose the modification to DEE method with the main focus to remove invalid names from the list of person names in the Expanded DBpedia. We call this modification as Modified DEE (M-DEE). The evaluation shows that M-DEE can improve the precision for person names around 3% compared to the original DEE. By adding gazetteers for place and organization names into the Expanded DBpedia created by M-DEE, the margin about 10% of the overall F1-score for all types was achieved.",
keywords = "DBpedia, NER, building dataset, noise reduction",
author = "Ika Alfina and Septiviana Savitri and Fanany, {Mohamad Ivan}",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 9th International Conference on Advanced Computer Science and Information Systems, ICACSIS 2017 ; Conference date: 28-10-2017 Through 29-10-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/ICACSIS.2017.8355036",
language = "English",
series = "2017 International Conference on Advanced Computer Science and Information Systems, ICACSIS 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "216--221",
booktitle = "2017 International Conference on Advanced Computer Science and Information Systems, ICACSIS 2017",
address = "United States",
}