@inproceedings{a40e03e86e54490eb7a90bbbe3391725,
title = "Investigating the Stability of SMOTE-Based Oversampling on COVID-19 Data",
abstract = "Predictive analytic methods for medical diagnosis can be helpful in supporting decision-making of medical treatment, which in turn reduce the need for medical experts{\textquoteright} attention. However, imbalanced data problems often exist in medical diagnosis datasets and negatively impact the models{\textquoteright} predictive performance. The results of learning algorithms on imbalanced data are biased and often cause over-fitting of the majority class. The Synthetic Minority Over-sampling Technique (SMOTE) was proposed to deal with this over-fitting challenge. The application of SMOTE requires the over-sampling of the minority class(es). However, there are vague guidelines on how much oversampling on the minority class is suitable. Therefore, experiments on oversampling using SMOTE with different oversampling ratio setups are done on a medical diagnosis dataset. It is observed that the increase in oversampling rate will reduce the accuracy and precision. Oversampling to a uniform level and excessive oversampling can cause poorer performance. Both recall and precision should be considered based on the costs when deciding the best oversampling percentage.",
keywords = "Boosting, COVID-19, Data Pre-processing, Data Sampling, SMOTE",
author = "Tan, {Jih Soong} and Yee, {Hui Jia} and Ivan Boo and Tan, {Ian K. T.} and Helmi Zakariah",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; Computing Conference 2023 ; Conference date: 22-06-2023 Through 23-06-2023",
year = "2023",
month = aug,
day = "20",
doi = "10.1007/978-3-031-37963-5_33",
language = "English",
isbn = "9783031379628",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer",
pages = "470--480",
editor = "Kohei Arai",
booktitle = "Intelligent Computing. SAI 2023",
}