@inproceedings{cd8efaba30fa4fa79efbb36af117af6e,
title = "Error-Controlled Lossy Compression Optimized for High Compression Ratios of Scientific Datasets",
abstract = "Today's scientific simulations require a significant reduction of the data size because of extremely large volumes of data they produce and the limitation of storage bandwidth and space. If the compression is set to reach a high compression ratio, however, the reconstructed data are often distorted too much to tolerate. In this paper, we explore a new compression strategy that can effectively control the data distortion when significantly reducing the data size. The contribution is threefold. (1) We propose an adaptive compression framework to select either our improved Lorenzo prediction method or our optimized linear regression method dynamically in different regions of the dataset. (2) We explore how to select them accurately based on the data features in each block to obtain the best compression quality. (3) We analyze the effectiveness of our solution in details using four real-world scientific datasets with 100+ fields. Evaluation results confirm that our new adaptive solution can significantly improve the rate distortion for the lossy compression with fairly high compression ratios. The compression ratio of our compressor is 1.5X\textasciitilde{}8X as high as that of two other leading lossy compressors (SZ and ZFP) with the same peak single-to-noise ratio (PSNR), in the high-compression cases. Parallel experiments with 8,192 cores and 24 TB of data shows that our solution obtains 1.86X dumping performance and 1.95X loading performance compared with the second-best lossy compressor, respectively.",
author = "Xin Liang and Sheng Di and Dingwen Tao and Sihuan Li and Shaomeng Li and Hanqi Guo and Zizhong Chen and Franck Cappello",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE International Conference on Big Data, Big Data 2018 ; Conference date: 10-12-2018 Through 13-12-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1109/BigData.2018.8622520",
language = "English",
series = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "438--447",
editor = "Naoki Abe and Huan Liu and Calton Pu and Xiaohua Hu and Nesreen Ahmed and Mu Qiao and Yang Song and Donald Kossmann and Bing Liu and Kisung Lee and Jiliang Tang and Jingrui He and Jeffrey Saltz",
booktitle = "Proceedings - 2018 IEEE International Conference on Big Data, Big Data 2018",
address = "United States",
}