@inproceedings{b5a1b9c0aed04380aac045c61abe4c16,
title = "Using K-means clustering to detect anomalous file removes",
abstract = "One of the purposes of a data archive is to preserve irreplaceable data for future studies and generations. There are a number of ways that data can be lost from an archive, including accidental or malicious deletion of data. While there is a lot of software that can check for specific known threats or problems on a system, detecting non-specific anomalous behavior, such as unusual file removal patterns, is harder. One approach to detecting this kind of problem is machine learning. Machine learning algorithms can build a statistical model of what constitutes normal behavior and then flag data points that are outliers. To help protect the 87 petabytes of data in the National Center for Atmospheric Research's data archive, we explored our file removal patterns and implemented a k-means clustering solution to detect anomalous file removes. This approach can also be used to detect other anomalies, such as operational inconsistencies.",
keywords = "Analysis, Archive, Cybersecurity, Data science, Machine learning, Metrics",
author = "B. Anderson and M. Genty",
note = "Publisher Copyright: CSREA Press {\textcopyright}.; 2018 International Conference on Artificial Intelligence, ICAI 2018 at 2018 World Congress in Computer Science, Computer Engineering and Applied Computing, CSCE 2018 ; Conference date: 30-07-2018 Through 02-08-2018",
year = "2018",
language = "English",
series = "2018 World Congress in Computer Science, Computer Engineering and Applied Computing, CSCE 2018 - Proceedings of the 2018 International Conference on Artificial Intelligence, ICAI 2018",
publisher = "CSREA Press",
pages = "454--458",
editor = "Arabnia, \{Hamid R.\} and \{de la Fuente\}, David and Kozerenko, \{Elena B.\} and Olivas, \{Jose A.\} and Tinetti, \{Fernando G.\}",
booktitle = "2018 World Congress in Computer Science, Computer Engineering and Applied Computing, CSCE 2018 - Proceedings of the 2018 International Conference on Artificial Intelligence, ICAI 2018",
}