@inproceedings{ff053d8ec877475984097851408bc98d,
title = "Data deduplication using dynamic chunking algorithm",
abstract = "Data deduplication is widely used in storage systems to prevent duplicated data blocks. In this paper, we suggest a dynamic chunking approach using fixed-length chunking and file similarity technique. The fixed-length chunking struggles with boundary shift problem and shows poor performance when handling duplicated data files. The key idea of this work is to utilize duplicated data information in the file similarity information. We can easily find several duplicated point by comparing hash key value and file offset within file similarity information. We consider these duplicated points as a hint for starting position of chunking. With this approach, we can significantly improve the performance of data deduplication system using fixed-length chunking. In experiment result, the proposed dynamic chunking results in significant performance improvement for deduplication processing capability and shows fast processing time comparable to that of fixed length chunking.",
keywords = "Chunking algorithm, Deduplication, File similarity, Metadata",
author = "Moon, {Young Chan} and Jung, {Ho Min} and Chuck Yoo and Ko, {Young Woong}",
year = "2012",
doi = "10.1007/978-3-642-34707-8_7",
language = "English",
isbn = "9783642347061",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
number = "PART 2",
pages = "59--68",
booktitle = "Computational Collective Intelligence",
edition = "PART 2",
note = "4th International Conference on Computational Collective Intelligence, ICCCI 2012 ; Conference date: 28-11-2012 Through 30-11-2012",
}