From 25e1dea6ee4929211bcc7340e3e0164b9a5c60e7 Mon Sep 17 00:00:00 2001 From: Richard Raya Date: Wed, 20 Nov 2024 01:51:38 -0300 Subject: [PATCH] zram: Restore deduplication feature Change-Id: I09c77366b1ac0f6cc7c6f0f28197e48d2e1d3270 Signed-off-by: Richard Raya --- Documentation/ABI/testing/sysfs-block-zram | 10 + Documentation/blockdev/zram.txt | 3 + drivers/block/zram/Kconfig | 14 ++ drivers/block/zram/Makefile | 2 + drivers/block/zram/zram_dedup.c | 255 +++++++++++++++++++++ drivers/block/zram/zram_dedup.h | 45 ++++ drivers/block/zram/zram_drv.c | 176 +++++++++++--- drivers/block/zram/zram_drv.h | 38 ++- 8 files changed, 509 insertions(+), 34 deletions(-) create mode 100644 drivers/block/zram/zram_dedup.c create mode 100644 drivers/block/zram/zram_dedup.h diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram index 14b2bf2e5105..c374ab73bb8e 100644 --- a/Documentation/ABI/testing/sysfs-block-zram +++ b/Documentation/ABI/testing/sysfs-block-zram @@ -99,6 +99,16 @@ Description: device for zram to write incompressible pages. For using, user should enable CONFIG_ZRAM_WRITEBACK. +What: /sys/block/zram/use_dedup +Date: March 2017 +Contact: Joonsoo Kim +Description: + The use_dedup file is read-write and specifies deduplication + feature is used or not. If enabled, duplicated data is + managed by reference count and will not be stored in memory + twice. Benefit of this feature largely depends on the workload + so keep attention when use. + What: /sys/block/zram/idle Date: November 2018 Contact: Minchan Kim diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index dce85efb177e..157e5c702f55 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -172,6 +172,7 @@ comp_algorithm RW show and change the compression algorithm compact WO trigger memory compaction debug_stat RO this file is used for zram debugging purposes backing_dev RW set up backend storage for zram to write out +use_dedup RW show and set deduplication feature idle WO mark allocated slot as idle User space is advised to use the following files to read the device statistics. @@ -221,6 +222,8 @@ line of text and contains the following stats separated by whitespace: same_pages the number of same element filled pages written to this disk. No memory is allocated for such pages. pages_compacted the number of pages freed during compaction + dup_data_size deduplicated data size + meta_data_size the amount of metadata allocated for deduplication feature huge_pages the number of incompressible pages File /sys/block/zram/bd_stat diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 7571329101c7..23143e6757ba 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -40,6 +40,20 @@ config ZRAM_DEF_COMP default "zstd" if ZRAM_DEF_COMP_ZSTD default "lz4" if ZRAM_DEF_COMP_LZ4 +config ZRAM_DEDUP + bool "Deduplication support for ZRAM data" + depends on ZRAM && LIBCRC32C + default n + help + Deduplicate ZRAM data to reduce amount of memory consumption. + Advantage largely depends on the workload. In some cases, this + option reduces memory usage to the half. However, if there is no + duplicated data, the amount of memory consumption would be + increased due to additional metadata usage. And, there is + computation time trade-off. Please check the benefit before + enabling this option. Experiment shows the positive effect when + the zram is used as blockdev and is used to store build output. + config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" depends on ZRAM diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index a60493b21678..d7204ef6ee53 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,2 +1,4 @@ zram-y := zcomp.o zram_drv.o +zram-$(CONFIG_ZRAM_DEDUP) += zram_dedup.o + obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_dedup.c b/drivers/block/zram/zram_dedup.c new file mode 100644 index 000000000000..1e18fe422683 --- /dev/null +++ b/drivers/block/zram/zram_dedup.c @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2017 Joonsoo Kim. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zram_drv.h" + +/* One slot will contain 128 pages theoretically */ +#define ZRAM_HASH_SHIFT 7 +#define ZRAM_HASH_SIZE_MIN (1 << 10) +#define ZRAM_HASH_SIZE_MAX (1 << 31) + +u64 zram_dedup_dup_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.dup_data_size); +} + +u64 zram_dedup_meta_size(struct zram *zram) +{ + return (u64)atomic64_read(&zram->stats.meta_data_size); +} + +static u32 zram_dedup_checksum(unsigned char *mem) +{ + return crc32c(0, mem, PAGE_SIZE); +} + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum) +{ + struct zram_hash *hash; + struct rb_root *rb_root; + struct rb_node **rb_node, *parent = NULL; + struct zram_entry *entry; + + if (!zram_dedup_enabled(zram)) + return; + + new->checksum = checksum; + hash = &zram->hash[checksum % zram->hash_size]; + rb_root = &hash->rb_root; + + spin_lock(&hash->lock); + rb_node = &rb_root->rb_node; + while (*rb_node) { + parent = *rb_node; + entry = rb_entry(parent, struct zram_entry, rb_node); + if (checksum < entry->checksum) + rb_node = &parent->rb_left; + else if (checksum > entry->checksum) + rb_node = &parent->rb_right; + else + rb_node = &parent->rb_left; + } + + rb_link_node(&new->rb_node, parent, rb_node); + rb_insert_color(&new->rb_node, rb_root); + spin_unlock(&hash->lock); +} + +static bool zram_dedup_match(struct zram *zram, struct zram_entry *entry, + unsigned char *mem) +{ + bool match = false; + unsigned char *cmem; + struct zcomp_strm *zstrm; + + cmem = zs_map_object(zram->mem_pool, entry->handle, ZS_MM_RO); + if (entry->len == PAGE_SIZE) { + match = !memcmp(mem, cmem, PAGE_SIZE); + } else { + zstrm = zcomp_stream_get(zram->comp); + if (!zcomp_decompress(zstrm, cmem, entry->len, zstrm->buffer)) + match = !memcmp(mem, zstrm->buffer, PAGE_SIZE); + zcomp_stream_put(zram->comp); + } + zs_unmap_object(zram->mem_pool, entry->handle); + + return match; +} + +static unsigned long zram_dedup_put(struct zram *zram, + struct zram_entry *entry) +{ + struct zram_hash *hash; + u32 checksum; + unsigned long val; + + checksum = entry->checksum; + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + + val = --entry->refcount; + if (!entry->refcount) + rb_erase(&entry->rb_node, &hash->rb_root); + else + atomic64_sub(entry->len, &zram->stats.dup_data_size); + + spin_unlock(&hash->lock); + + return val; +} + +static struct zram_entry *__zram_dedup_get(struct zram *zram, + struct zram_hash *hash, unsigned char *mem, + struct zram_entry *entry) +{ + struct zram_entry *tmp, *prev = NULL; + struct rb_node *rb_node; + + /* find left-most entry with same checksum */ + while ((rb_node = rb_prev(&entry->rb_node))) { + tmp = rb_entry(rb_node, struct zram_entry, rb_node); + if (tmp->checksum != entry->checksum) + break; + + entry = tmp; + } + +again: + entry->refcount++; + atomic64_add(entry->len, &zram->stats.dup_data_size); + spin_unlock(&hash->lock); + + if (prev) + zram_entry_free(zram, prev); + + if (zram_dedup_match(zram, entry, mem)) + return entry; + + spin_lock(&hash->lock); + tmp = NULL; + rb_node = rb_next(&entry->rb_node); + if (rb_node) + tmp = rb_entry(rb_node, struct zram_entry, rb_node); + + if (tmp && (tmp->checksum == entry->checksum)) { + prev = entry; + entry = tmp; + goto again; + } + + spin_unlock(&hash->lock); + zram_entry_free(zram, entry); + + return NULL; +} + +static struct zram_entry *zram_dedup_get(struct zram *zram, + unsigned char *mem, u32 checksum) +{ + struct zram_hash *hash; + struct zram_entry *entry; + struct rb_node *rb_node; + + hash = &zram->hash[checksum % zram->hash_size]; + + spin_lock(&hash->lock); + rb_node = hash->rb_root.rb_node; + while (rb_node) { + entry = rb_entry(rb_node, struct zram_entry, rb_node); + if (checksum == entry->checksum) + return __zram_dedup_get(zram, hash, mem, entry); + + if (checksum < entry->checksum) + rb_node = rb_node->rb_left; + else + rb_node = rb_node->rb_right; + } + spin_unlock(&hash->lock); + + return NULL; +} + +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum) +{ + void *mem; + struct zram_entry *entry; + + if (!zram_dedup_enabled(zram)) + return NULL; + + mem = kmap_atomic(page); + *checksum = zram_dedup_checksum(mem); + + entry = zram_dedup_get(zram, mem, *checksum); + kunmap_atomic(mem); + + return entry; +} + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len) +{ + if (!zram_dedup_enabled(zram)) + return; + + entry->handle = handle; + entry->refcount = 1; + entry->len = len; +} + +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry) +{ + if (!zram_dedup_enabled(zram)) + return true; + + if (zram_dedup_put(zram, entry)) + return false; + + return true; +} + +int zram_dedup_init(struct zram *zram, size_t num_pages) +{ + int i; + struct zram_hash *hash; + + if (!zram_dedup_enabled(zram)) + return 0; + + zram->hash_size = num_pages >> ZRAM_HASH_SHIFT; + zram->hash_size = min_t(size_t, ZRAM_HASH_SIZE_MAX, zram->hash_size); + zram->hash_size = max_t(size_t, ZRAM_HASH_SIZE_MIN, zram->hash_size); + zram->hash = vzalloc(zram->hash_size * sizeof(struct zram_hash)); + if (!zram->hash) { + pr_err("Error allocating zram entry hash\n"); + return -ENOMEM; + } + + for (i = 0; i < zram->hash_size; i++) { + hash = &zram->hash[i]; + spin_lock_init(&hash->lock); + hash->rb_root = RB_ROOT; + } + + return 0; +} + +void zram_dedup_deinit(struct zram *zram) +{ + vfree(zram->hash); + zram->hash = NULL; + zram->hash_size = 0; +} diff --git a/drivers/block/zram/zram_dedup.h b/drivers/block/zram/zram_dedup.h new file mode 100644 index 000000000000..afdc4843833d --- /dev/null +++ b/drivers/block/zram/zram_dedup.h @@ -0,0 +1,45 @@ +#ifndef _ZRAM_DEDUP_H_ +#define _ZRAM_DEDUP_H_ + +struct zram; +struct zram_entry; + +#ifdef CONFIG_ZRAM_DEDUP + +u64 zram_dedup_dup_size(struct zram *zram); +u64 zram_dedup_meta_size(struct zram *zram); + +void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum); +struct zram_entry *zram_dedup_find(struct zram *zram, struct page *page, + u32 *checksum); + +void zram_dedup_init_entry(struct zram *zram, struct zram_entry *entry, + unsigned long handle, unsigned int len); +bool zram_dedup_put_entry(struct zram *zram, struct zram_entry *entry); + +int zram_dedup_init(struct zram *zram, size_t num_pages); +void zram_dedup_deinit(struct zram *zram); +#else + +static inline u64 zram_dedup_dup_size(struct zram *zram) { return 0; } +static inline u64 zram_dedup_meta_size(struct zram *zram) { return 0; } + +static inline void zram_dedup_insert(struct zram *zram, struct zram_entry *new, + u32 checksum) { } +static inline struct zram_entry *zram_dedup_find(struct zram *zram, + struct page *page, u32 *checksum) { return NULL; } + +static inline void zram_dedup_init_entry(struct zram *zram, + struct zram_entry *entry, unsigned long handle, + unsigned int len) { } +static inline bool zram_dedup_put_entry(struct zram *zram, + struct zram_entry *entry) { return true; } + +static inline int zram_dedup_init(struct zram *zram, + size_t num_pages) { return 0; } +static inline void zram_dedup_deinit(struct zram *zram) { } + +#endif + +#endif /* _ZRAM_DEDUP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cb45878692c0..3a0a0c2e006e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -84,14 +84,15 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -static unsigned long zram_get_handle(struct zram *zram, u32 index) +static struct zram_entry *zram_get_entry(struct zram *zram, u32 index) { - return zram->table[index].handle; + return zram->table[index].entry; } -static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) +static void zram_set_entry(struct zram *zram, u32 index, + struct zram_entry *entry) { - zram->table[index].handle = handle; + zram->table[index].entry = entry; } /* flag operations require table entry bit_spin_lock() being held */ @@ -1040,6 +1041,41 @@ static ssize_t comp_algorithm_store(struct device *dev, return len; } +static ssize_t use_dedup_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->use_dedup; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", (int)val); +} + +#ifdef CONFIG_ZRAM_DEDUP +static ssize_t use_dedup_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int val; + struct zram *zram = dev_to_zram(dev); + + if (kstrtoint(buf, 10, &val) || (val != 0 && val != 1)) + return -EINVAL; + + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't change dedup usage for initialized device\n"); + return -EBUSY; + } + zram->use_dedup = val; + up_write(&zram->init_lock); + return len; +} +#endif + static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1096,7 +1132,7 @@ static ssize_t mm_stat_show(struct device *dev, max_used = atomic_long_read(&zram->stats.max_used_pages); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", + "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), mem_used << PAGE_SHIFT, @@ -1104,6 +1140,8 @@ static ssize_t mm_stat_show(struct device *dev, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), atomic_long_read(&pool_stats.pages_compacted), + zram_dedup_dup_size(zram), + zram_dedup_meta_size(zram), (u64)atomic64_read(&zram->stats.huge_pages), (u64)atomic64_read(&zram->stats.huge_pages_since)); up_read(&zram->init_lock); @@ -1156,6 +1194,56 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat); +static unsigned long zram_entry_handle(struct zram *zram, + struct zram_entry *entry) +{ + if (zram_dedup_enabled(zram)) + return entry->handle; + else + return (unsigned long)entry; +} + +static struct zram_entry *zram_entry_alloc(struct zram *zram, + unsigned int len, gfp_t flags) +{ + struct zram_entry *entry; + unsigned long handle; + + handle = zs_malloc(zram->mem_pool, len, flags); + if (!handle) + return NULL; + + if (!zram_dedup_enabled(zram)) + return (struct zram_entry *)handle; + + entry = kzalloc(sizeof(*entry), + flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_CMA)); + if (!entry) { + zs_free(zram->mem_pool, handle); + return NULL; + } + + zram_dedup_init_entry(zram, entry, handle, len); + atomic64_add(sizeof(*entry), &zram->stats.meta_data_size); + + return entry; +} + +void zram_entry_free(struct zram *zram, struct zram_entry *entry) +{ + if (!zram_dedup_put_entry(zram, entry)) + return; + + zs_free(zram->mem_pool, zram_entry_handle(zram, entry)); + + if (!zram_dedup_enabled(zram)) + return; + + kfree(entry); + + atomic64_sub(sizeof(*entry), &zram->stats.meta_data_size); +} + static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1166,6 +1254,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize) zram_free_page(zram, index); zs_destroy_pool(zram->mem_pool); + zram_dedup_deinit(zram); vfree(zram->table); } @@ -1187,6 +1276,12 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); + if (zram_dedup_init(zram, num_pages)) { + vfree(zram->table); + zs_destroy_pool(zram->mem_pool); + return false; + } + return true; } @@ -1197,7 +1292,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) */ static void zram_free_page(struct zram *zram, size_t index) { - unsigned long handle; + struct zram_entry *entry; #ifdef CONFIG_ZRAM_MEMORY_TRACKING zram->table[index].ac_time = 0; @@ -1226,17 +1321,17 @@ static void zram_free_page(struct zram *zram, size_t index) goto out; } - handle = zram_get_handle(zram, index); - if (!handle) + entry = zram_get_entry(zram, index); + if (!entry) return; - zs_free(zram->mem_pool, handle); + zram_entry_free(zram, entry); atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); - zram_set_handle(zram, index, 0); + zram_set_entry(zram, index, NULL); zram_set_obj_size(zram, index, 0); WARN_ON_ONCE(zram->table[index].flags & ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); @@ -1245,8 +1340,8 @@ out: static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { - struct zcomp_strm *zstrm; - unsigned long handle; + struct zcomp_strm *zstrm; + struct zram_entry *entry; unsigned int size; void *src, *dst; int ret; @@ -1265,12 +1360,12 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, bio, partial_io); } - handle = zram_get_handle(zram, index); - if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { + entry = zram_get_entry(zram, index); + if (!entry || zram_test_flag(zram, index, ZRAM_SAME)) { unsigned long value; void *mem; - value = handle ? zram_get_element(zram, index) : 0; + value = entry ? zram_get_element(zram, index) : 0; mem = kmap_atomic(page); zram_fill_page(mem, PAGE_SIZE, value); kunmap_atomic(mem); @@ -1283,7 +1378,8 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, if (size != PAGE_SIZE) zstrm = zcomp_stream_get(zram->comp); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); + src = zs_map_object(zram->mem_pool, + zram_entry_handle(zram, entry), ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); copy_page(dst, src); @@ -1295,7 +1391,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, kunmap_atomic(dst); zcomp_stream_put(zram->comp); } - zs_unmap_object(zram->mem_pool, handle); + zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ @@ -1343,11 +1439,12 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, { int ret = 0; unsigned long alloced_pages; - unsigned long handle = 0; + struct zram_entry *entry = NULL; unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; struct page *page = bvec->bv_page; + u32 checksum; unsigned long element = 0; enum zram_pageflags flags = 0; @@ -1361,6 +1458,12 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, } kunmap_atomic(mem); + entry = zram_dedup_find(zram, page, &checksum); + if (entry) { + comp_len = entry->len; + goto out; + } + compress_again: zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); @@ -1370,39 +1473,40 @@ compress_again: if (unlikely(ret)) { zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); - zs_free(zram->mem_pool, handle); + if (entry) + zram_entry_free(zram, entry); return ret; } if (comp_len >= huge_class_size) comp_len = PAGE_SIZE; /* - * handle allocation has 2 paths: + * entry allocation has 2 paths: * a) fast path is executed with preemption disabled (for * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, * since we can't sleep; * b) slow path enables preemption and attempts to allocate * the page with __GFP_DIRECT_RECLAIM bit set. we have to * put per-cpu compression stream and, thus, to re-do - * the compression once handle is allocated. + * the compression once entry is allocated. * - * if we have a 'non-null' handle here then we are coming - * from the slow path and handle has already been allocated. + * if we have a 'non-null' entry here then we are coming + * from the slow path and entry has already been allocated. */ - if (!handle) - handle = zs_malloc(zram->mem_pool, comp_len, + if (!entry) + entry = zram_entry_alloc(zram, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | __GFP_MOVABLE | __GFP_CMA); - if (!handle) { + if (!entry) { zcomp_stream_put(zram->comp); atomic64_inc(&zram->stats.writestall); - handle = zs_malloc(zram->mem_pool, comp_len, + entry = zram_entry_alloc(zram, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE | __GFP_CMA); - if (handle) + if (entry) goto compress_again; return -ENOMEM; } @@ -1412,11 +1516,12 @@ compress_again: if (zram->limit_pages && alloced_pages > zram->limit_pages) { zcomp_stream_put(zram->comp); - zs_free(zram->mem_pool, handle); + zram_entry_free(zram, entry); return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); + dst = zs_map_object(zram->mem_pool, + zram_entry_handle(zram, entry), ZS_MM_WO); src = zstrm->buffer; if (comp_len == PAGE_SIZE) @@ -1426,8 +1531,9 @@ compress_again: kunmap_atomic(src); zcomp_stream_put(zram->comp); - zs_unmap_object(zram->mem_pool, handle); + zs_unmap_object(zram->mem_pool, zram_entry_handle(zram, entry)); atomic64_add(comp_len, &zram->stats.compr_data_size); + zram_dedup_insert(zram, entry, checksum); out: /* * Free memory associated with this sector @@ -1446,7 +1552,7 @@ out: zram_set_flag(zram, index, flags); zram_set_element(zram, index, element); } else { - zram_set_handle(zram, index, handle); + zram_set_entry(zram, index, entry); zram_set_obj_size(zram, index, comp_len); } zram_slot_unlock(zram, index); @@ -1867,6 +1973,11 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif +#ifdef CONFIG_ZRAM_DEDUP +static DEVICE_ATTR_RW(use_dedup); +#else +static DEVICE_ATTR_RO(use_dedup); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1884,6 +1995,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, #endif + &dev_attr_use_dedup.attr, &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, #ifdef CONFIG_ZRAM_WRITEBACK diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index a958ea7505a0..e1b598c9341f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,8 +18,10 @@ #include #include #include +#include #include "zcomp.h" +#include "zram_dedup.h" #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) @@ -56,10 +58,18 @@ enum zram_pageflags { /*-- Data structures */ +struct zram_entry { + struct rb_node rb_node; + u32 len; + u32 checksum; + unsigned long refcount; + unsigned long handle; +}; + /* Allocated for each disk page */ struct zram_table_entry { union { - unsigned long handle; + struct zram_entry *entry; unsigned long element; }; unsigned long flags; @@ -82,6 +92,11 @@ struct zram_stats { atomic64_t pages_stored; /* no. of pages currently stored */ atomic_long_t max_used_pages; /* no. of maximum pages stored */ atomic64_t writestall; /* no. of write slow paths */ + atomic64_t dup_data_size; /* + * compressed size of pages + * duplicated + */ + atomic64_t meta_data_size; /* size of zram_entries */ atomic64_t miss_free; /* no. of missed free */ #ifdef CONFIG_ZRAM_WRITEBACK atomic64_t bd_count; /* no. of pages in backing device */ @@ -90,11 +105,18 @@ struct zram_stats { #endif }; +struct zram_hash { + spinlock_t lock; + struct rb_root rb_root; +}; + struct zram { struct zram_table_entry *table; struct zs_pool *mem_pool; struct zcomp *comp; struct gendisk *disk; + struct zram_hash *hash; + size_t hash_size; /* Prevent concurrent execution of device init */ struct rw_semaphore init_lock; /* @@ -113,8 +135,9 @@ struct zram { * zram is claimed so open request will be failed */ bool claim; /* Protected by bdev->bd_mutex */ + bool use_dedup; #ifdef CONFIG_ZRAM_WRITEBACK - struct file *backing_dev; + struct file *backing_dev; spinlock_t wb_limit_lock; bool wb_limit_enable; u64 bd_wb_limit; @@ -126,4 +149,15 @@ struct zram { struct dentry *debugfs_dir; #endif }; + +static inline bool zram_dedup_enabled(struct zram *zram) +{ +#ifdef CONFIG_ZRAM_DEDUP + return zram->use_dedup; +#else + return false; +#endif +} + +void zram_entry_free(struct zram *zram, struct zram_entry *entry); #endif