@inproceedings{8becdfce2c0b4a5181efd39b9caac4cc,
title = "An analysis of the proportion of feature subsampling on XG boost - A case study of claim prediction in car insurance",
abstract = "Claim prediction is one of the important elements in the insurance. The increasing frequency of claim makes the data volume also increases to become big data. So, we need the right machine learning method to help insurance companies manage big data more efficiently. XGBoost is a machine learning model based on decision trees. XGBoost can be applied for claim prediction case in the form of two-class or multi-class classification. We may select a subset of features in building the XGBoost model especially for data with a large number of features. In this paper, we examine the influence of the proportion of features on the accuracy of the XGBoost model. Our simulations show that by randomly using 1/5 of features, the XGBoost model can produce accuracy comparable to the model that uses all features. It means that the XGBoost model is scalable in terms of the proportion of features.",
author = "Waf{\'i}yatul Khusna and Hendri Murf{\'i}",
note = "Publisher Copyright: {\textcopyright} 2020 American Institute of Physics Inc.. All rights reserved. Copyright: Copyright 2020 Elsevier B.V., All rights reserved.; 2020 International Conference on Science and Applied Science, ICSAS 2020 ; Conference date: 07-07-2020",
year = "2020",
month = nov,
day = "16",
doi = "10.1063/5.0031366",
language = "English",
series = "AIP Conference Proceedings",
publisher = "American Institute of Physics Inc.",
editor = "Budi Purnama and Nugraha, {Dewanta Arya} and Fuad Anwar",
booktitle = "International Conference on Science and Applied Science, ICSAS 2020",
}