@inproceedings{792d263cbf864cd38d3ddceac58fcc66,
title = "Using data science to understand tape-based archive workloads",
abstract = "Data storage needs continue to grow in most fields, and the cost per byte for tape remains lower than the cost for disk, making tape storage a good candidate for cost-effective long-term storage. However, the workloads suitable for tape archives differ from those for disk file systems, and archives must handle internally generated workloads that can be more demanding than those generated by end users (e.g., migration of data from an old tape technology to a new one). To better understand the variegated workloads, we have followed the first steps in the data science methodology. For anyone considering the use or deployment of a tape-based data archive or for anyone interested in details of data archives in the context of data science, this paper describes key aspects of data archive workloads.",
keywords = "Analysis, Archive, Data science, Metrics",
author = "Bill Anderson and Marc Genty and Hart, \{David L.\} and Erich Thanhardt",
note = "Publisher Copyright: Copyright {\textcopyright} 2015 ACM.; 4th Annual Conference on Extreme Science and Engineering Discovery Environment, XSEDE 2015 ; Conference date: 26-07-2015 Through 30-07-2015",
year = "2015",
month = jul,
day = "26",
doi = "10.1145/2792745.2792776",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of the XSEDE 2015 Conference",
address = "United States",
}