2013-11-10 23:13:19 +08:00
|
|
|
/*
|
|
|
|
* fs/f2fs/inline.c
|
|
|
|
* Copyright (c) 2013, Intel Corporation
|
|
|
|
* Authors: Huajun Li <huajun.li@intel.com>
|
|
|
|
* Haicheng Li <haicheng.li@intel.com>
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
|
|
|
|
|
|
|
#include "f2fs.h"
|
2015-10-15 11:34:49 -07:00
|
|
|
#include "node.h"
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2015-04-23 10:27:21 -07:00
|
|
|
bool f2fs_may_inline_data(struct inode *inode)
|
2013-11-10 23:13:19 +08:00
|
|
|
{
|
2014-10-06 17:39:50 -07:00
|
|
|
if (f2fs_is_atomic_file(inode))
|
|
|
|
return false;
|
|
|
|
|
2015-03-19 13:23:48 +08:00
|
|
|
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
|
2013-11-10 23:13:19 +08:00
|
|
|
return false;
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
if (i_size_read(inode) > MAX_INLINE_DATA(inode))
|
2014-11-11 14:10:01 -08:00
|
|
|
return false;
|
|
|
|
|
2017-09-05 16:54:24 -07:00
|
|
|
if (f2fs_encrypted_file(inode))
|
2015-04-21 20:39:58 -07:00
|
|
|
return false;
|
|
|
|
|
2013-11-10 23:13:19 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-04-23 10:27:21 -07:00
|
|
|
bool f2fs_may_inline_dentry(struct inode *inode)
|
|
|
|
{
|
|
|
|
if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!S_ISDIR(inode->i_mode))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
void read_inline_data(struct page *page, struct page *ipage)
|
2013-11-10 23:13:19 +08:00
|
|
|
{
|
2017-07-19 00:19:05 +08:00
|
|
|
struct inode *inode = page->mapping->host;
|
2013-11-10 23:13:19 +08:00
|
|
|
void *src_addr, *dst_addr;
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
if (PageUptodate(page))
|
|
|
|
return;
|
2013-12-30 18:36:23 +08:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_bug_on(F2FS_P_SB(page), page->index);
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE);
|
2013-11-10 23:13:19 +08:00
|
|
|
|
|
|
|
/* Copy the whole inline data block */
|
2017-07-19 00:19:05 +08:00
|
|
|
src_addr = inline_data_addr(inode, ipage);
|
2014-10-18 23:41:38 -07:00
|
|
|
dst_addr = kmap_atomic(page);
|
2017-07-19 00:19:05 +08:00
|
|
|
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
|
2014-10-26 22:59:27 -07:00
|
|
|
flush_dcache_page(page);
|
2014-10-18 23:41:38 -07:00
|
|
|
kunmap_atomic(dst_addr);
|
2016-06-30 18:49:15 -07:00
|
|
|
if (!PageUptodate(page))
|
|
|
|
SetPageUptodate(page);
|
2014-10-23 19:48:09 -07:00
|
|
|
}
|
|
|
|
|
2017-03-10 20:43:20 +08:00
|
|
|
void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
|
2015-01-26 20:22:55 +08:00
|
|
|
{
|
2015-03-10 13:16:25 +08:00
|
|
|
void *addr;
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
if (from >= MAX_INLINE_DATA(inode))
|
2017-03-10 20:43:20 +08:00
|
|
|
return;
|
2015-03-10 13:16:25 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
addr = inline_data_addr(inode, ipage);
|
2015-03-10 13:16:25 +08:00
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(ipage, NODE, true);
|
2017-07-19 00:19:05 +08:00
|
|
|
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
|
2016-05-20 16:32:49 -07:00
|
|
|
set_page_dirty(ipage);
|
2017-03-10 20:43:20 +08:00
|
|
|
|
|
|
|
if (from == 0)
|
|
|
|
clear_inode_flag(inode, FI_DATA_EXIST);
|
2015-01-26 20:22:55 +08:00
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
int f2fs_read_inline_data(struct inode *inode, struct page *page)
|
|
|
|
{
|
|
|
|
struct page *ipage;
|
|
|
|
|
|
|
|
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
|
|
|
|
if (IS_ERR(ipage)) {
|
|
|
|
unlock_page(page);
|
|
|
|
return PTR_ERR(ipage);
|
|
|
|
}
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
if (!f2fs_has_inline_data(inode)) {
|
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (page->index)
|
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time
ago with promise that one day it will be possible to implement page
cache with bigger chunks than PAGE_SIZE.
This promise never materialized. And unlikely will.
We have many places where PAGE_CACHE_SIZE assumed to be equal to
PAGE_SIZE. And it's constant source of confusion on whether
PAGE_CACHE_* or PAGE_* constant should be used in a particular case,
especially on the border between fs and mm.
Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much
breakage to be doable.
Let's stop pretending that pages in page cache are special. They are
not.
The changes are pretty straight-forward:
- <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>;
- PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN};
- page_cache_get() -> get_page();
- page_cache_release() -> put_page();
This patch contains automated changes generated with coccinelle using
script below. For some reason, coccinelle doesn't patch header files.
I've called spatch for them manually.
The only adjustment after coccinelle is revert of changes to
PAGE_CAHCE_ALIGN definition: we are going to drop it later.
There are few places in the code where coccinelle didn't reach. I'll
fix them manually in a separate patch. Comments and documentation also
will be addressed with the separate patch.
virtual patch
@@
expression E;
@@
- E << (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
expression E;
@@
- E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT)
+ E
@@
@@
- PAGE_CACHE_SHIFT
+ PAGE_SHIFT
@@
@@
- PAGE_CACHE_SIZE
+ PAGE_SIZE
@@
@@
- PAGE_CACHE_MASK
+ PAGE_MASK
@@
expression E;
@@
- PAGE_CACHE_ALIGN(E)
+ PAGE_ALIGN(E)
@@
expression E;
@@
- page_cache_get(E)
+ get_page(E)
@@
expression E;
@@
- page_cache_release(E)
+ put_page(E)
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
|
|
|
zero_user_segment(page, 0, PAGE_SIZE);
|
2014-10-23 19:48:09 -07:00
|
|
|
else
|
|
|
|
read_inline_data(page, ipage);
|
|
|
|
|
2016-06-30 18:49:15 -07:00
|
|
|
if (!PageUptodate(page))
|
|
|
|
SetPageUptodate(page);
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
unlock_page(page);
|
2013-11-10 23:13:19 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
|
2013-11-10 23:13:19 +08:00
|
|
|
{
|
|
|
|
struct f2fs_io_info fio = {
|
2015-04-23 14:38:15 -07:00
|
|
|
.sbi = F2FS_I_SB(dn->inode),
|
2013-11-10 23:13:19 +08:00
|
|
|
.type = DATA,
|
2016-06-05 14:31:55 -05:00
|
|
|
.op = REQ_OP_WRITE,
|
2016-11-01 07:40:10 -06:00
|
|
|
.op_flags = REQ_SYNC | REQ_PRIO,
|
2015-04-23 14:38:15 -07:00
|
|
|
.page = page,
|
2015-04-23 12:04:33 -07:00
|
|
|
.encrypted_page = NULL,
|
2017-08-02 23:21:48 +08:00
|
|
|
.io_type = FS_DATA_IO,
|
2013-11-10 23:13:19 +08:00
|
|
|
};
|
2014-11-25 11:34:02 -08:00
|
|
|
int dirty, err;
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
if (!f2fs_exist_data(dn->inode))
|
|
|
|
goto clear_out;
|
2014-08-18 14:41:11 -07:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
err = f2fs_reserve_block(dn, 0);
|
2014-04-16 14:22:50 +09:00
|
|
|
if (err)
|
2014-10-23 19:48:09 -07:00
|
|
|
return err;
|
2013-11-10 23:13:19 +08:00
|
|
|
|
f2fs: fix to do sanity check with reserved blkaddr of inline inode
[ Upstream commit 4dbe38dc386910c668c75ae616b99b823b59f3eb ]
As Wen Xu reported in bugzilla, after image was injected with random data
by fuzzing, inline inode would contain invalid reserved blkaddr, then
during inline conversion, we will encounter illegal memory accessing
reported by KASAN, the root cause of this is when writing out converted
inline page, we will use invalid reserved blkaddr to update sit bitmap,
result in accessing memory beyond sit bitmap boundary.
In order to fix this issue, let's do sanity check with reserved block
address of inline inode to avoid above condition.
https://bugzilla.kernel.org/show_bug.cgi?id=200179
[ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0
[ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741
[ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1
[ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 1428.846860] Call Trace:
[ 1428.846868] dump_stack+0x71/0xab
[ 1428.846875] print_address_description+0x6b/0x290
[ 1428.846881] kasan_report+0x28e/0x390
[ 1428.846888] ? update_sit_entry+0x80/0x7f0
[ 1428.846898] update_sit_entry+0x80/0x7f0
[ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70
[ 1428.846914] ? f2fs_get_node_info+0x14f/0x590
[ 1428.846920] do_write_page+0xc8/0x150
[ 1428.846928] f2fs_outplace_write_data+0xfe/0x210
[ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170
[ 1428.846941] ? radix_tree_tag_clear+0xff/0x130
[ 1428.846946] ? __mod_node_page_state+0x22/0xa0
[ 1428.846951] ? inc_zone_page_state+0x54/0x100
[ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0
[ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0
[ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0
[ 1428.846978] ? __get_node_page+0x335/0x6b0
[ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500
[ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0
[ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40
[ 1428.847005] ? kasan_kmalloc+0xa6/0xd0
[ 1428.847024] f2fs_file_mmap+0x79/0xc0
[ 1428.847029] mmap_region+0x58b/0x880
[ 1428.847037] ? arch_get_unmapped_area+0x370/0x370
[ 1428.847042] do_mmap+0x55b/0x7a0
[ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0
[ 1428.847055] ? vma_is_stack_for_current+0x50/0x50
[ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160
[ 1428.847068] ? do_sys_open+0x206/0x2a0
[ 1428.847073] ? __fget+0xb4/0x100
[ 1428.847079] ksys_mmap_pgoff+0x278/0x360
[ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50
[ 1428.847091] do_syscall_64+0x73/0x160
[ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847102] RIP: 0033:0x7fb1430766ba
[ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00
[ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009
[ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba
[ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000
[ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000
[ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000
[ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000
[ 1428.847252] Allocated by task 2683:
[ 1428.847372] kasan_kmalloc+0xa6/0xd0
[ 1428.847380] kmem_cache_alloc+0xc8/0x1e0
[ 1428.847385] getname_flags+0x73/0x2b0
[ 1428.847390] user_path_at_empty+0x1d/0x40
[ 1428.847395] vfs_statx+0xc1/0x150
[ 1428.847401] __do_sys_newlstat+0x7e/0xd0
[ 1428.847405] do_syscall_64+0x73/0x160
[ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847466] Freed by task 2683:
[ 1428.847566] __kasan_slab_free+0x137/0x190
[ 1428.847571] kmem_cache_free+0x85/0x1e0
[ 1428.847575] filename_lookup+0x191/0x280
[ 1428.847580] vfs_statx+0xc1/0x150
[ 1428.847585] __do_sys_newlstat+0x7e/0xd0
[ 1428.847590] do_syscall_64+0x73/0x160
[ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847648] The buggy address belongs to the object at ffff880194483300
which belongs to the cache names_cache of size 4096
[ 1428.847946] The buggy address is located 576 bytes inside of
4096-byte region [ffff880194483300, ffff880194484300)
[ 1428.848234] The buggy address belongs to the page:
[ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0
[ 1428.848606] flags: 0x17fff8000008100(slab|head)
[ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380
[ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 1428.849122] page dumped because: kasan: bad access detected
[ 1428.849305] Memory state around the buggy address:
[ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849985] ^
[ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850498] ==================================================================
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-30 18:13:40 +08:00
|
|
|
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
|
|
|
|
f2fs_put_dnode(dn);
|
|
|
|
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
|
|
|
|
f2fs_msg(fio.sbi->sb, KERN_WARNING,
|
|
|
|
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
|
|
|
|
"run fsck to fix.",
|
|
|
|
__func__, dn->inode->i_ino, dn->data_blkaddr);
|
2019-06-20 11:36:14 +08:00
|
|
|
return -EFSCORRUPTED;
|
f2fs: fix to do sanity check with reserved blkaddr of inline inode
[ Upstream commit 4dbe38dc386910c668c75ae616b99b823b59f3eb ]
As Wen Xu reported in bugzilla, after image was injected with random data
by fuzzing, inline inode would contain invalid reserved blkaddr, then
during inline conversion, we will encounter illegal memory accessing
reported by KASAN, the root cause of this is when writing out converted
inline page, we will use invalid reserved blkaddr to update sit bitmap,
result in accessing memory beyond sit bitmap boundary.
In order to fix this issue, let's do sanity check with reserved block
address of inline inode to avoid above condition.
https://bugzilla.kernel.org/show_bug.cgi?id=200179
[ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0
[ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741
[ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1
[ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 1428.846860] Call Trace:
[ 1428.846868] dump_stack+0x71/0xab
[ 1428.846875] print_address_description+0x6b/0x290
[ 1428.846881] kasan_report+0x28e/0x390
[ 1428.846888] ? update_sit_entry+0x80/0x7f0
[ 1428.846898] update_sit_entry+0x80/0x7f0
[ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70
[ 1428.846914] ? f2fs_get_node_info+0x14f/0x590
[ 1428.846920] do_write_page+0xc8/0x150
[ 1428.846928] f2fs_outplace_write_data+0xfe/0x210
[ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170
[ 1428.846941] ? radix_tree_tag_clear+0xff/0x130
[ 1428.846946] ? __mod_node_page_state+0x22/0xa0
[ 1428.846951] ? inc_zone_page_state+0x54/0x100
[ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0
[ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0
[ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0
[ 1428.846978] ? __get_node_page+0x335/0x6b0
[ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500
[ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0
[ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40
[ 1428.847005] ? kasan_kmalloc+0xa6/0xd0
[ 1428.847024] f2fs_file_mmap+0x79/0xc0
[ 1428.847029] mmap_region+0x58b/0x880
[ 1428.847037] ? arch_get_unmapped_area+0x370/0x370
[ 1428.847042] do_mmap+0x55b/0x7a0
[ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0
[ 1428.847055] ? vma_is_stack_for_current+0x50/0x50
[ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160
[ 1428.847068] ? do_sys_open+0x206/0x2a0
[ 1428.847073] ? __fget+0xb4/0x100
[ 1428.847079] ksys_mmap_pgoff+0x278/0x360
[ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50
[ 1428.847091] do_syscall_64+0x73/0x160
[ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847102] RIP: 0033:0x7fb1430766ba
[ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00
[ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009
[ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba
[ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000
[ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000
[ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000
[ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000
[ 1428.847252] Allocated by task 2683:
[ 1428.847372] kasan_kmalloc+0xa6/0xd0
[ 1428.847380] kmem_cache_alloc+0xc8/0x1e0
[ 1428.847385] getname_flags+0x73/0x2b0
[ 1428.847390] user_path_at_empty+0x1d/0x40
[ 1428.847395] vfs_statx+0xc1/0x150
[ 1428.847401] __do_sys_newlstat+0x7e/0xd0
[ 1428.847405] do_syscall_64+0x73/0x160
[ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847466] Freed by task 2683:
[ 1428.847566] __kasan_slab_free+0x137/0x190
[ 1428.847571] kmem_cache_free+0x85/0x1e0
[ 1428.847575] filename_lookup+0x191/0x280
[ 1428.847580] vfs_statx+0xc1/0x150
[ 1428.847585] __do_sys_newlstat+0x7e/0xd0
[ 1428.847590] do_syscall_64+0x73/0x160
[ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847648] The buggy address belongs to the object at ffff880194483300
which belongs to the cache names_cache of size 4096
[ 1428.847946] The buggy address is located 576 bytes inside of
4096-byte region [ffff880194483300, ffff880194484300)
[ 1428.848234] The buggy address belongs to the page:
[ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0
[ 1428.848606] flags: 0x17fff8000008100(slab|head)
[ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380
[ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 1428.849122] page dumped because: kasan: bad access detected
[ 1428.849305] Memory state around the buggy address:
[ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849985] ^
[ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850498] ==================================================================
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-30 18:13:40 +08:00
|
|
|
}
|
|
|
|
|
2016-02-03 10:26:13 +08:00
|
|
|
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
|
2014-10-23 19:48:09 -07:00
|
|
|
|
2016-02-19 16:02:51 +08:00
|
|
|
read_inline_data(page, dn->inode_page);
|
2015-07-25 00:29:17 -07:00
|
|
|
set_page_dirty(page);
|
|
|
|
|
2014-11-25 11:34:02 -08:00
|
|
|
/* clear dirty state */
|
|
|
|
dirty = clear_page_dirty_for_io(page);
|
|
|
|
|
2013-11-10 23:13:19 +08:00
|
|
|
/* write data page to try to make data consistent */
|
|
|
|
set_page_writeback(page);
|
f2fs: trace old block address for CoWed page
This patch enables to trace old block address of CoWed page for better
debugging.
f2fs_submit_page_mbio: dev = (1,0), ino = 1, page_index = 0x1d4f0, oldaddr = 0xfe8ab, newaddr = 0xfee90 rw = WRITE_SYNC, type = NODE
f2fs_submit_page_mbio: dev = (1,0), ino = 1, page_index = 0x1d4f8, oldaddr = 0xfe8b0, newaddr = 0xfee91 rw = WRITE_SYNC, type = NODE
f2fs_submit_page_mbio: dev = (1,0), ino = 1, page_index = 0x1d4fa, oldaddr = 0xfe8ae, newaddr = 0xfee92 rw = WRITE_SYNC, type = NODE
f2fs_submit_page_mbio: dev = (1,0), ino = 134824, page_index = 0x96, oldaddr = 0xf049b, newaddr = 0x2bbe rw = WRITE, type = DATA
f2fs_submit_page_mbio: dev = (1,0), ino = 134824, page_index = 0x97, oldaddr = 0xf049c, newaddr = 0x2bbf rw = WRITE, type = DATA
f2fs_submit_page_mbio: dev = (1,0), ino = 134824, page_index = 0x98, oldaddr = 0xf049d, newaddr = 0x2bc0 rw = WRITE, type = DATA
f2fs_submit_page_mbio: dev = (1,0), ino = 135260, page_index = 0x47, oldaddr = 0xffffffff, newaddr = 0xf2631 rw = WRITE, type = DATA
f2fs_submit_page_mbio: dev = (1,0), ino = 135260, page_index = 0x48, oldaddr = 0xffffffff, newaddr = 0xf2632 rw = WRITE, type = DATA
f2fs_submit_page_mbio: dev = (1,0), ino = 135260, page_index = 0x49, oldaddr = 0xffffffff, newaddr = 0xf2633 rw = WRITE, type = DATA
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:36:38 +08:00
|
|
|
fio.old_blkaddr = dn->data_blkaddr;
|
2017-03-24 20:05:13 -04:00
|
|
|
set_inode_flag(dn->inode, FI_HOT_DATA);
|
2015-04-23 14:38:15 -07:00
|
|
|
write_data_page(dn, &fio);
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA, true);
|
2016-10-11 22:57:01 +08:00
|
|
|
if (dirty) {
|
2014-11-25 11:34:02 -08:00
|
|
|
inode_dec_dirty_pages(dn->inode);
|
2016-10-11 22:57:01 +08:00
|
|
|
remove_dirty_inode(dn->inode);
|
|
|
|
}
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2014-11-25 17:27:38 -08:00
|
|
|
/* this converted inline_data should be recovered. */
|
2016-05-20 10:13:22 -07:00
|
|
|
set_inode_flag(dn->inode, FI_APPEND_WRITE);
|
2014-11-25 17:27:38 -08:00
|
|
|
|
2013-11-10 23:13:19 +08:00
|
|
|
/* clear inline data and flag after data writeback */
|
2017-03-10 20:43:20 +08:00
|
|
|
truncate_inline_inode(dn->inode, dn->inode_page, 0);
|
2016-01-25 05:57:05 -08:00
|
|
|
clear_inline_node(dn->inode_page);
|
2014-10-23 19:48:09 -07:00
|
|
|
clear_out:
|
|
|
|
stat_dec_inline_inode(dn->inode);
|
2017-03-10 20:43:20 +08:00
|
|
|
clear_inode_flag(dn->inode, FI_INLINE_DATA);
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_put_dnode(dn);
|
|
|
|
return 0;
|
2013-11-10 23:13:19 +08:00
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
int f2fs_convert_inline_inode(struct inode *inode)
|
2013-11-10 23:13:19 +08:00
|
|
|
{
|
2014-10-23 19:48:09 -07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
struct page *ipage, *page;
|
|
|
|
int err = 0;
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2015-12-22 11:09:35 -08:00
|
|
|
if (!f2fs_has_inline_data(inode))
|
|
|
|
return 0;
|
|
|
|
|
2016-04-29 16:11:53 -07:00
|
|
|
page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
|
2014-10-23 19:48:09 -07:00
|
|
|
if (!page)
|
|
|
|
return -ENOMEM;
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
|
|
|
|
ipage = get_node_page(sbi, inode->i_ino);
|
|
|
|
if (IS_ERR(ipage)) {
|
2014-11-17 16:06:55 -08:00
|
|
|
err = PTR_ERR(ipage);
|
|
|
|
goto out;
|
2014-08-07 16:32:25 -07:00
|
|
|
}
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
set_new_dnode(&dn, inode, ipage, ipage, 0);
|
|
|
|
|
|
|
|
if (f2fs_has_inline_data(inode))
|
|
|
|
err = f2fs_convert_inline_page(&dn, page);
|
|
|
|
|
|
|
|
f2fs_put_dnode(&dn);
|
2014-11-17 16:06:55 -08:00
|
|
|
out:
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_unlock_op(sbi);
|
|
|
|
|
|
|
|
f2fs_put_page(page, 1);
|
2015-12-22 13:23:35 -08:00
|
|
|
|
2021-03-04 09:21:18 +00:00
|
|
|
if (!err)
|
|
|
|
f2fs_balance_fs(sbi, dn.node_changed);
|
2015-12-22 13:23:35 -08:00
|
|
|
|
2013-11-10 23:13:19 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
int f2fs_write_inline_data(struct inode *inode, struct page *page)
|
2013-11-10 23:13:19 +08:00
|
|
|
{
|
|
|
|
void *src_addr, *dst_addr;
|
|
|
|
struct dnode_of_data dn;
|
2017-09-11 16:30:28 +09:00
|
|
|
struct address_space *mapping = page_mapping(page);
|
|
|
|
unsigned long flags;
|
2013-11-10 23:13:19 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
|
|
|
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2014-10-15 10:16:54 -07:00
|
|
|
if (!f2fs_has_inline_data(inode)) {
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
return -EAGAIN;
|
2014-10-15 10:16:54 -07:00
|
|
|
}
|
|
|
|
|
2014-10-23 19:48:09 -07:00
|
|
|
f2fs_bug_on(F2FS_I_SB(inode), page->index);
|
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
|
2014-10-18 23:41:38 -07:00
|
|
|
src_addr = kmap_atomic(page);
|
2017-07-19 00:19:05 +08:00
|
|
|
dst_addr = inline_data_addr(inode, dn.inode_page);
|
|
|
|
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
|
2014-10-18 23:41:38 -07:00
|
|
|
kunmap_atomic(src_addr);
|
2016-05-20 16:32:49 -07:00
|
|
|
set_page_dirty(dn.inode_page);
|
2013-11-10 23:13:19 +08:00
|
|
|
|
2017-09-11 16:30:28 +09:00
|
|
|
spin_lock_irqsave(&mapping->tree_lock, flags);
|
|
|
|
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
|
|
|
|
PAGECACHE_TAG_DIRTY);
|
|
|
|
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
|
|
|
|
2016-05-20 10:13:22 -07:00
|
|
|
set_inode_flag(inode, FI_APPEND_WRITE);
|
|
|
|
set_inode_flag(inode, FI_DATA_EXIST);
|
2014-10-23 19:48:09 -07:00
|
|
|
|
2016-01-25 05:57:05 -08:00
|
|
|
clear_inline_node(dn.inode_page);
|
2013-11-10 23:13:19 +08:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
return 0;
|
|
|
|
}
|
2013-12-26 12:49:48 +09:00
|
|
|
|
2014-08-07 16:57:17 -07:00
|
|
|
bool recover_inline_data(struct inode *inode, struct page *npage)
|
2013-12-26 12:49:48 +09:00
|
|
|
{
|
2014-09-02 15:31:18 -07:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2013-12-26 12:49:48 +09:00
|
|
|
struct f2fs_inode *ri = NULL;
|
|
|
|
void *src_addr, *dst_addr;
|
|
|
|
struct page *ipage;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The inline_data recovery policy is as follows.
|
|
|
|
* [prev.] [next] of inline_data flag
|
|
|
|
* o o -> recover inline_data
|
|
|
|
* o x -> remove inline_data, and then recover data blocks
|
|
|
|
* x o -> remove inline_data, and then recover inline_data
|
|
|
|
* x x -> recover data blocks
|
|
|
|
*/
|
|
|
|
if (IS_INODE(npage))
|
|
|
|
ri = F2FS_INODE(npage);
|
|
|
|
|
|
|
|
if (f2fs_has_inline_data(inode) &&
|
2014-08-07 16:57:17 -07:00
|
|
|
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
|
2013-12-26 12:49:48 +09:00
|
|
|
process_inline:
|
|
|
|
ipage = get_node_page(sbi, inode->i_ino);
|
2014-09-02 15:52:58 -07:00
|
|
|
f2fs_bug_on(sbi, IS_ERR(ipage));
|
2013-12-26 12:49:48 +09:00
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(ipage, NODE, true);
|
2014-04-29 17:28:32 +09:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
src_addr = inline_data_addr(inode, npage);
|
|
|
|
dst_addr = inline_data_addr(inode, ipage);
|
|
|
|
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
|
2014-10-23 19:48:09 -07:00
|
|
|
|
2016-05-20 10:13:22 -07:00
|
|
|
set_inode_flag(inode, FI_INLINE_DATA);
|
|
|
|
set_inode_flag(inode, FI_DATA_EXIST);
|
2014-10-23 19:48:09 -07:00
|
|
|
|
2016-05-20 16:32:49 -07:00
|
|
|
set_page_dirty(ipage);
|
2013-12-26 12:49:48 +09:00
|
|
|
f2fs_put_page(ipage, 1);
|
2014-08-07 16:57:17 -07:00
|
|
|
return true;
|
2013-12-26 12:49:48 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
if (f2fs_has_inline_data(inode)) {
|
|
|
|
ipage = get_node_page(sbi, inode->i_ino);
|
2014-09-02 15:52:58 -07:00
|
|
|
f2fs_bug_on(sbi, IS_ERR(ipage));
|
2017-03-10 20:43:20 +08:00
|
|
|
truncate_inline_inode(inode, ipage, 0);
|
|
|
|
clear_inode_flag(inode, FI_INLINE_DATA);
|
2013-12-26 12:49:48 +09:00
|
|
|
f2fs_put_page(ipage, 1);
|
2014-08-07 16:57:17 -07:00
|
|
|
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
|
2015-09-21 18:55:49 -04:00
|
|
|
if (truncate_blocks(inode, 0, false))
|
|
|
|
return false;
|
2013-12-26 12:49:48 +09:00
|
|
|
goto process_inline;
|
|
|
|
}
|
2014-08-07 16:57:17 -07:00
|
|
|
return false;
|
2013-12-26 12:49:48 +09:00
|
|
|
}
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
|
2015-05-15 16:26:10 -07:00
|
|
|
struct fscrypt_name *fname, struct page **res_page)
|
2014-09-24 18:17:53 +08:00
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
|
2015-04-27 17:12:39 -07:00
|
|
|
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
|
2014-09-24 18:17:53 +08:00
|
|
|
struct f2fs_dir_entry *de;
|
2014-10-18 22:52:52 -07:00
|
|
|
struct f2fs_dentry_ptr d;
|
2014-10-13 17:26:14 -07:00
|
|
|
struct page *ipage;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry;
|
2015-04-27 17:12:39 -07:00
|
|
|
f2fs_hash_t namehash;
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
ipage = get_node_page(sbi, dir->i_ino);
|
2016-05-25 14:29:11 -07:00
|
|
|
if (IS_ERR(ipage)) {
|
|
|
|
*res_page = ipage;
|
2014-09-24 18:17:53 +08:00
|
|
|
return NULL;
|
2016-05-25 14:29:11 -07:00
|
|
|
}
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-04-24 10:00:08 -07:00
|
|
|
namehash = f2fs_dentry_hash(&name, fname);
|
2015-04-27 17:12:39 -07:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(dir, ipage);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
make_dentry_ptr_inline(dir, &d, inline_dentry);
|
2015-04-27 17:12:39 -07:00
|
|
|
de = find_target_dentry(fname, namehash, NULL, &d);
|
2014-09-24 18:17:53 +08:00
|
|
|
unlock_page(ipage);
|
2014-10-13 17:26:14 -07:00
|
|
|
if (de)
|
|
|
|
*res_page = ipage;
|
|
|
|
else
|
|
|
|
f2fs_put_page(ipage, 0);
|
|
|
|
|
2014-09-24 18:17:53 +08:00
|
|
|
return de;
|
|
|
|
}
|
|
|
|
|
|
|
|
int make_empty_inline_dir(struct inode *inode, struct inode *parent,
|
|
|
|
struct page *ipage)
|
|
|
|
{
|
2014-10-18 23:06:41 -07:00
|
|
|
struct f2fs_dentry_ptr d;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry;
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(inode, ipage);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
make_dentry_ptr_inline(inode, &d, inline_dentry);
|
2014-10-18 23:06:41 -07:00
|
|
|
do_make_empty_dir(inode, parent, &d);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
set_page_dirty(ipage);
|
|
|
|
|
|
|
|
/* update i_size to MAX_INLINE_DATA */
|
2017-07-19 00:19:05 +08:00
|
|
|
if (i_size_read(inode) < MAX_INLINE_DATA(inode))
|
|
|
|
f2fs_i_size_write(inode, MAX_INLINE_DATA(inode));
|
2014-09-24 18:17:53 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-14 18:14:06 +08:00
|
|
|
/*
|
|
|
|
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
|
|
|
|
* release ipage in this function.
|
|
|
|
*/
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry)
|
2014-09-24 18:17:53 +08:00
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
struct f2fs_dentry_block *dentry_blk;
|
2017-07-16 15:08:54 +08:00
|
|
|
struct f2fs_dentry_ptr src, dst;
|
2014-09-24 18:17:53 +08:00
|
|
|
int err;
|
|
|
|
|
2016-04-29 16:11:53 -07:00
|
|
|
page = f2fs_grab_cache_page(dir->i_mapping, 0, false);
|
2015-07-14 18:14:06 +08:00
|
|
|
if (!page) {
|
|
|
|
f2fs_put_page(ipage, 1);
|
2014-09-24 18:17:53 +08:00
|
|
|
return -ENOMEM;
|
2015-07-14 18:14:06 +08:00
|
|
|
}
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
set_new_dnode(&dn, dir, ipage, NULL, 0);
|
|
|
|
err = f2fs_reserve_block(&dn, 0);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
f2fs: fix to do sanity check with reserved blkaddr of inline inode
[ Upstream commit 4dbe38dc386910c668c75ae616b99b823b59f3eb ]
As Wen Xu reported in bugzilla, after image was injected with random data
by fuzzing, inline inode would contain invalid reserved blkaddr, then
during inline conversion, we will encounter illegal memory accessing
reported by KASAN, the root cause of this is when writing out converted
inline page, we will use invalid reserved blkaddr to update sit bitmap,
result in accessing memory beyond sit bitmap boundary.
In order to fix this issue, let's do sanity check with reserved block
address of inline inode to avoid above condition.
https://bugzilla.kernel.org/show_bug.cgi?id=200179
[ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0
[ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741
[ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1
[ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 1428.846860] Call Trace:
[ 1428.846868] dump_stack+0x71/0xab
[ 1428.846875] print_address_description+0x6b/0x290
[ 1428.846881] kasan_report+0x28e/0x390
[ 1428.846888] ? update_sit_entry+0x80/0x7f0
[ 1428.846898] update_sit_entry+0x80/0x7f0
[ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70
[ 1428.846914] ? f2fs_get_node_info+0x14f/0x590
[ 1428.846920] do_write_page+0xc8/0x150
[ 1428.846928] f2fs_outplace_write_data+0xfe/0x210
[ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170
[ 1428.846941] ? radix_tree_tag_clear+0xff/0x130
[ 1428.846946] ? __mod_node_page_state+0x22/0xa0
[ 1428.846951] ? inc_zone_page_state+0x54/0x100
[ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0
[ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0
[ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0
[ 1428.846978] ? __get_node_page+0x335/0x6b0
[ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500
[ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0
[ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40
[ 1428.847005] ? kasan_kmalloc+0xa6/0xd0
[ 1428.847024] f2fs_file_mmap+0x79/0xc0
[ 1428.847029] mmap_region+0x58b/0x880
[ 1428.847037] ? arch_get_unmapped_area+0x370/0x370
[ 1428.847042] do_mmap+0x55b/0x7a0
[ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0
[ 1428.847055] ? vma_is_stack_for_current+0x50/0x50
[ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160
[ 1428.847068] ? do_sys_open+0x206/0x2a0
[ 1428.847073] ? __fget+0xb4/0x100
[ 1428.847079] ksys_mmap_pgoff+0x278/0x360
[ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50
[ 1428.847091] do_syscall_64+0x73/0x160
[ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847102] RIP: 0033:0x7fb1430766ba
[ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00
[ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009
[ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba
[ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000
[ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000
[ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000
[ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000
[ 1428.847252] Allocated by task 2683:
[ 1428.847372] kasan_kmalloc+0xa6/0xd0
[ 1428.847380] kmem_cache_alloc+0xc8/0x1e0
[ 1428.847385] getname_flags+0x73/0x2b0
[ 1428.847390] user_path_at_empty+0x1d/0x40
[ 1428.847395] vfs_statx+0xc1/0x150
[ 1428.847401] __do_sys_newlstat+0x7e/0xd0
[ 1428.847405] do_syscall_64+0x73/0x160
[ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847466] Freed by task 2683:
[ 1428.847566] __kasan_slab_free+0x137/0x190
[ 1428.847571] kmem_cache_free+0x85/0x1e0
[ 1428.847575] filename_lookup+0x191/0x280
[ 1428.847580] vfs_statx+0xc1/0x150
[ 1428.847585] __do_sys_newlstat+0x7e/0xd0
[ 1428.847590] do_syscall_64+0x73/0x160
[ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847648] The buggy address belongs to the object at ffff880194483300
which belongs to the cache names_cache of size 4096
[ 1428.847946] The buggy address is located 576 bytes inside of
4096-byte region [ffff880194483300, ffff880194484300)
[ 1428.848234] The buggy address belongs to the page:
[ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0
[ 1428.848606] flags: 0x17fff8000008100(slab|head)
[ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380
[ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 1428.849122] page dumped because: kasan: bad access detected
[ 1428.849305] Memory state around the buggy address:
[ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849985] ^
[ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850498] ==================================================================
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-30 18:13:40 +08:00
|
|
|
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
|
|
|
|
f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
|
|
|
|
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
|
|
|
|
"run fsck to fix.",
|
|
|
|
__func__, dir->i_ino, dn.data_blkaddr);
|
2019-06-20 11:36:14 +08:00
|
|
|
err = -EFSCORRUPTED;
|
f2fs: fix to do sanity check with reserved blkaddr of inline inode
[ Upstream commit 4dbe38dc386910c668c75ae616b99b823b59f3eb ]
As Wen Xu reported in bugzilla, after image was injected with random data
by fuzzing, inline inode would contain invalid reserved blkaddr, then
during inline conversion, we will encounter illegal memory accessing
reported by KASAN, the root cause of this is when writing out converted
inline page, we will use invalid reserved blkaddr to update sit bitmap,
result in accessing memory beyond sit bitmap boundary.
In order to fix this issue, let's do sanity check with reserved block
address of inline inode to avoid above condition.
https://bugzilla.kernel.org/show_bug.cgi?id=200179
[ 1428.846352] BUG: KASAN: use-after-free in update_sit_entry+0x80/0x7f0
[ 1428.846618] Read of size 4 at addr ffff880194483540 by task a.out/2741
[ 1428.846855] CPU: 0 PID: 2741 Comm: a.out Tainted: G W 4.17.0+ #1
[ 1428.846858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 1428.846860] Call Trace:
[ 1428.846868] dump_stack+0x71/0xab
[ 1428.846875] print_address_description+0x6b/0x290
[ 1428.846881] kasan_report+0x28e/0x390
[ 1428.846888] ? update_sit_entry+0x80/0x7f0
[ 1428.846898] update_sit_entry+0x80/0x7f0
[ 1428.846906] f2fs_allocate_data_block+0x6db/0xc70
[ 1428.846914] ? f2fs_get_node_info+0x14f/0x590
[ 1428.846920] do_write_page+0xc8/0x150
[ 1428.846928] f2fs_outplace_write_data+0xfe/0x210
[ 1428.846935] ? f2fs_do_write_node_page+0x170/0x170
[ 1428.846941] ? radix_tree_tag_clear+0xff/0x130
[ 1428.846946] ? __mod_node_page_state+0x22/0xa0
[ 1428.846951] ? inc_zone_page_state+0x54/0x100
[ 1428.846956] ? __test_set_page_writeback+0x336/0x5d0
[ 1428.846964] f2fs_convert_inline_page+0x407/0x6d0
[ 1428.846971] ? f2fs_read_inline_data+0x3b0/0x3b0
[ 1428.846978] ? __get_node_page+0x335/0x6b0
[ 1428.846987] f2fs_convert_inline_inode+0x41b/0x500
[ 1428.846994] ? f2fs_convert_inline_page+0x6d0/0x6d0
[ 1428.847000] ? kasan_unpoison_shadow+0x31/0x40
[ 1428.847005] ? kasan_kmalloc+0xa6/0xd0
[ 1428.847024] f2fs_file_mmap+0x79/0xc0
[ 1428.847029] mmap_region+0x58b/0x880
[ 1428.847037] ? arch_get_unmapped_area+0x370/0x370
[ 1428.847042] do_mmap+0x55b/0x7a0
[ 1428.847048] vm_mmap_pgoff+0x16f/0x1c0
[ 1428.847055] ? vma_is_stack_for_current+0x50/0x50
[ 1428.847062] ? __fsnotify_update_child_dentry_flags.part.1+0x160/0x160
[ 1428.847068] ? do_sys_open+0x206/0x2a0
[ 1428.847073] ? __fget+0xb4/0x100
[ 1428.847079] ksys_mmap_pgoff+0x278/0x360
[ 1428.847085] ? find_mergeable_anon_vma+0x50/0x50
[ 1428.847091] do_syscall_64+0x73/0x160
[ 1428.847098] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847102] RIP: 0033:0x7fb1430766ba
[ 1428.847103] Code: 89 f5 41 54 49 89 fc 55 53 74 35 49 63 e8 48 63 da 4d 89 f9 49 89 e8 4d 63 d6 48 89 da 4c 89 ee 4c 89 e7 b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 56 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 1f 00
[ 1428.847162] RSP: 002b:00007ffc651d9388 EFLAGS: 00000246 ORIG_RAX: 0000000000000009
[ 1428.847167] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fb1430766ba
[ 1428.847170] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000
[ 1428.847173] RBP: 0000000000000003 R08: 0000000000000003 R09: 0000000000000000
[ 1428.847176] R10: 0000000000008002 R11: 0000000000000246 R12: 0000000000000000
[ 1428.847179] R13: 0000000000001000 R14: 0000000000008002 R15: 0000000000000000
[ 1428.847252] Allocated by task 2683:
[ 1428.847372] kasan_kmalloc+0xa6/0xd0
[ 1428.847380] kmem_cache_alloc+0xc8/0x1e0
[ 1428.847385] getname_flags+0x73/0x2b0
[ 1428.847390] user_path_at_empty+0x1d/0x40
[ 1428.847395] vfs_statx+0xc1/0x150
[ 1428.847401] __do_sys_newlstat+0x7e/0xd0
[ 1428.847405] do_syscall_64+0x73/0x160
[ 1428.847411] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847466] Freed by task 2683:
[ 1428.847566] __kasan_slab_free+0x137/0x190
[ 1428.847571] kmem_cache_free+0x85/0x1e0
[ 1428.847575] filename_lookup+0x191/0x280
[ 1428.847580] vfs_statx+0xc1/0x150
[ 1428.847585] __do_sys_newlstat+0x7e/0xd0
[ 1428.847590] do_syscall_64+0x73/0x160
[ 1428.847596] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1428.847648] The buggy address belongs to the object at ffff880194483300
which belongs to the cache names_cache of size 4096
[ 1428.847946] The buggy address is located 576 bytes inside of
4096-byte region [ffff880194483300, ffff880194484300)
[ 1428.848234] The buggy address belongs to the page:
[ 1428.848366] page:ffffea0006512000 count:1 mapcount:0 mapping:ffff8801f3586380 index:0x0 compound_mapcount: 0
[ 1428.848606] flags: 0x17fff8000008100(slab|head)
[ 1428.848737] raw: 017fff8000008100 dead000000000100 dead000000000200 ffff8801f3586380
[ 1428.848931] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 1428.849122] page dumped because: kasan: bad access detected
[ 1428.849305] Memory state around the buggy address:
[ 1428.849436] ffff880194483400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849620] ffff880194483480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849804] >ffff880194483500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.849985] ^
[ 1428.850120] ffff880194483580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850303] ffff880194483600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1428.850498] ==================================================================
Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-30 18:13:40 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA, true);
|
2017-07-19 00:19:05 +08:00
|
|
|
zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2014-10-18 23:41:38 -07:00
|
|
|
dentry_blk = kmap_atomic(page);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2023-01-22 23:04:14 -08:00
|
|
|
/*
|
|
|
|
* Start by zeroing the full block, to ensure that all unused space is
|
|
|
|
* zeroed and no uninitialized memory is leaked to disk.
|
|
|
|
*/
|
|
|
|
memset(dentry_blk, 0, F2FS_BLKSIZE);
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
make_dentry_ptr_inline(dir, &src, inline_dentry);
|
|
|
|
make_dentry_ptr_block(dir, &dst, dentry_blk);
|
2017-07-16 15:08:54 +08:00
|
|
|
|
2014-09-24 18:17:53 +08:00
|
|
|
/* copy data from inline dentry block to new dentry block */
|
2017-07-16 15:08:54 +08:00
|
|
|
memcpy(dst.bitmap, src.bitmap, src.nr_bitmap);
|
|
|
|
memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
|
|
|
|
memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2014-10-18 23:41:38 -07:00
|
|
|
kunmap_atomic(dentry_blk);
|
2016-06-30 18:49:15 -07:00
|
|
|
if (!PageUptodate(page))
|
|
|
|
SetPageUptodate(page);
|
2014-09-24 18:17:53 +08:00
|
|
|
set_page_dirty(page);
|
|
|
|
|
|
|
|
/* clear inline dir and flag after data writeback */
|
2017-03-10 20:43:20 +08:00
|
|
|
truncate_inline_inode(dir, ipage, 0);
|
2014-10-23 19:48:09 -07:00
|
|
|
|
2014-10-13 20:00:16 -07:00
|
|
|
stat_dec_inline_dir(dir);
|
2016-05-20 10:13:22 -07:00
|
|
|
clear_inode_flag(dir, FI_INLINE_DENTRY);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2016-05-20 09:52:20 -07:00
|
|
|
f2fs_i_depth_write(dir, 1);
|
2016-05-20 16:32:49 -07:00
|
|
|
if (i_size_read(dir) < PAGE_SIZE)
|
2016-05-20 09:22:03 -07:00
|
|
|
f2fs_i_size_write(dir, PAGE_SIZE);
|
2014-09-24 18:17:53 +08:00
|
|
|
out:
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
{
|
|
|
|
struct f2fs_dentry_ptr d;
|
|
|
|
unsigned long bit_pos = 0;
|
|
|
|
int err = 0;
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
make_dentry_ptr_inline(dir, &d, inline_dentry);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
|
|
|
|
while (bit_pos < d.max) {
|
|
|
|
struct f2fs_dir_entry *de;
|
|
|
|
struct qstr new_name;
|
|
|
|
nid_t ino;
|
|
|
|
umode_t fake_mode;
|
|
|
|
|
|
|
|
if (!test_bit_le(bit_pos, d.bitmap)) {
|
|
|
|
bit_pos++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
de = &d.dentry[bit_pos];
|
2016-04-27 22:22:20 +08:00
|
|
|
|
|
|
|
if (unlikely(!de->name_len)) {
|
|
|
|
bit_pos++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
new_name.name = d.filename[bit_pos];
|
2016-10-11 10:36:12 -07:00
|
|
|
new_name.len = le16_to_cpu(de->name_len);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
|
|
|
|
ino = le32_to_cpu(de->ino);
|
|
|
|
fake_mode = get_de_type(de) << S_SHIFT;
|
|
|
|
|
2016-08-28 18:57:55 +08:00
|
|
|
err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
ino, fake_mode);
|
|
|
|
if (err)
|
|
|
|
goto punch_dentry_pages;
|
|
|
|
|
|
|
|
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
punch_dentry_pages:
|
|
|
|
truncate_inode_pages(&dir->i_data, 0);
|
|
|
|
truncate_blocks(dir, 0, false);
|
|
|
|
remove_dirty_inode(dir);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry)
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
{
|
2017-07-19 00:19:05 +08:00
|
|
|
void *backup_dentry;
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
int err;
|
|
|
|
|
2016-09-23 21:30:09 +08:00
|
|
|
backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir),
|
2017-07-19 00:19:05 +08:00
|
|
|
MAX_INLINE_DATA(dir), GFP_F2FS_ZERO);
|
2016-05-14 19:03:53 +08:00
|
|
|
if (!backup_dentry) {
|
|
|
|
f2fs_put_page(ipage, 1);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
return -ENOMEM;
|
2016-05-14 19:03:53 +08:00
|
|
|
}
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir));
|
2017-03-10 20:43:20 +08:00
|
|
|
truncate_inline_inode(dir, ipage, 0);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
|
|
|
|
unlock_page(ipage);
|
|
|
|
|
|
|
|
err = f2fs_add_inline_entries(dir, backup_dentry);
|
|
|
|
if (err)
|
|
|
|
goto recover;
|
|
|
|
|
|
|
|
lock_page(ipage);
|
|
|
|
|
|
|
|
stat_dec_inline_dir(dir);
|
2016-05-20 10:13:22 -07:00
|
|
|
clear_inode_flag(dir, FI_INLINE_DENTRY);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
kfree(backup_dentry);
|
|
|
|
return 0;
|
|
|
|
recover:
|
|
|
|
lock_page(ipage);
|
2018-06-21 22:38:28 +08:00
|
|
|
f2fs_wait_on_page_writeback(ipage, NODE, true);
|
2017-07-19 00:19:05 +08:00
|
|
|
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
|
2016-05-20 09:52:20 -07:00
|
|
|
f2fs_i_depth_write(dir, 0);
|
2017-07-19 00:19:05 +08:00
|
|
|
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
|
2016-05-20 16:32:49 -07:00
|
|
|
set_page_dirty(ipage);
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
|
|
|
|
kfree(backup_dentry);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry)
|
f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:
1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir
ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root 0 Feb 19 15:12 1
-rw-r--r-- 1 root root 0 Feb 19 15:12 10
-rw-r--r-- 1 root root 0 Feb 19 15:12 100
-????????? ? ? ? ? ? 101
-????????? ? ? ? ? ? 102
-????????? ? ? ? ? ? 103
...
The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.
By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.
However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.
This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-02-22 18:29:18 +08:00
|
|
|
{
|
|
|
|
if (!F2FS_I(dir)->i_dir_level)
|
|
|
|
return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
|
|
|
|
else
|
|
|
|
return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
|
|
|
|
}
|
|
|
|
|
2016-08-28 18:57:55 +08:00
|
|
|
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
|
|
|
|
const struct qstr *orig_name,
|
|
|
|
struct inode *inode, nid_t ino, umode_t mode)
|
2014-09-24 18:17:53 +08:00
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
|
|
|
|
struct page *ipage;
|
|
|
|
unsigned int bit_pos;
|
|
|
|
f2fs_hash_t name_hash;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry = NULL;
|
2015-02-16 16:17:20 +08:00
|
|
|
struct f2fs_dentry_ptr d;
|
2016-08-28 18:57:55 +08:00
|
|
|
int slots = GET_DENTRY_SLOTS(new_name->len);
|
2015-03-30 15:07:16 -07:00
|
|
|
struct page *page = NULL;
|
2014-09-24 18:17:53 +08:00
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
ipage = get_node_page(sbi, dir->i_ino);
|
|
|
|
if (IS_ERR(ipage))
|
|
|
|
return PTR_ERR(ipage);
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(dir, ipage);
|
|
|
|
make_dentry_ptr_inline(dir, &d, inline_dentry);
|
2017-07-16 15:08:54 +08:00
|
|
|
|
|
|
|
bit_pos = room_for_filename(d.bitmap, slots, d.max);
|
|
|
|
if (bit_pos >= d.max) {
|
2017-06-09 06:32:54 +08:00
|
|
|
err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
|
2015-07-14 18:14:06 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = -EAGAIN;
|
2014-09-24 18:17:53 +08:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2015-03-30 15:07:16 -07:00
|
|
|
if (inode) {
|
|
|
|
down_write(&F2FS_I(inode)->i_sem);
|
2016-08-28 18:57:55 +08:00
|
|
|
page = init_inode_metadata(inode, dir, new_name,
|
|
|
|
orig_name, ipage);
|
2015-03-30 15:07:16 -07:00
|
|
|
if (IS_ERR(page)) {
|
|
|
|
err = PTR_ERR(page);
|
|
|
|
goto fail;
|
|
|
|
}
|
2014-09-24 18:17:53 +08:00
|
|
|
}
|
2014-10-13 19:42:53 -07:00
|
|
|
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(ipage, NODE, true);
|
2015-02-16 16:17:20 +08:00
|
|
|
|
2017-04-24 10:00:08 -07:00
|
|
|
name_hash = f2fs_dentry_hash(new_name, NULL);
|
2016-08-28 18:57:55 +08:00
|
|
|
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
|
2015-02-16 16:17:20 +08:00
|
|
|
|
2014-09-24 18:17:53 +08:00
|
|
|
set_page_dirty(ipage);
|
|
|
|
|
|
|
|
/* we don't need to mark_inode_dirty now */
|
2015-03-30 15:07:16 -07:00
|
|
|
if (inode) {
|
2016-05-20 09:52:20 -07:00
|
|
|
f2fs_i_pino_write(inode, dir->i_ino);
|
2015-03-30 15:07:16 -07:00
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
}
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
update_parent_metadata(dir, inode, 0);
|
|
|
|
fail:
|
2015-03-30 15:07:16 -07:00
|
|
|
if (inode)
|
|
|
|
up_write(&F2FS_I(inode)->i_sem);
|
2014-09-24 18:17:53 +08:00
|
|
|
out:
|
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
|
|
|
|
struct inode *dir, struct inode *inode)
|
|
|
|
{
|
2017-07-16 15:08:54 +08:00
|
|
|
struct f2fs_dentry_ptr d;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry;
|
2014-09-24 18:17:53 +08:00
|
|
|
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
|
|
|
|
unsigned int bit_pos;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
lock_page(page);
|
2016-01-20 23:43:51 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, NODE, true);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(dir, page);
|
|
|
|
make_dentry_ptr_inline(dir, &d, inline_dentry);
|
2017-07-16 15:08:54 +08:00
|
|
|
|
|
|
|
bit_pos = dentry - d.dentry;
|
2014-09-24 18:17:53 +08:00
|
|
|
for (i = 0; i < slots; i++)
|
2017-07-16 15:08:54 +08:00
|
|
|
__clear_bit_le(bit_pos + i, d.bitmap);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
set_page_dirty(page);
|
2016-06-01 21:18:25 -07:00
|
|
|
f2fs_put_page(page, 1);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2016-09-14 07:48:04 -07:00
|
|
|
dir->i_ctime = dir->i_mtime = current_time(dir);
|
2016-10-14 11:51:23 -07:00
|
|
|
f2fs_mark_inode_dirty_sync(dir, false);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
if (inode)
|
2016-06-01 21:18:25 -07:00
|
|
|
f2fs_drop_nlink(dir, inode);
|
2014-09-24 18:17:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool f2fs_empty_inline_dir(struct inode *dir)
|
|
|
|
{
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
|
|
|
|
struct page *ipage;
|
|
|
|
unsigned int bit_pos = 2;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry;
|
2017-07-16 15:08:54 +08:00
|
|
|
struct f2fs_dentry_ptr d;
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
ipage = get_node_page(sbi, dir->i_ino);
|
|
|
|
if (IS_ERR(ipage))
|
|
|
|
return false;
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(dir, ipage);
|
|
|
|
make_dentry_ptr_inline(dir, &d, inline_dentry);
|
2017-07-16 15:08:54 +08:00
|
|
|
|
|
|
|
bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
|
2017-07-16 15:08:54 +08:00
|
|
|
if (bit_pos < d.max)
|
2014-09-24 18:17:53 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-04-27 16:26:24 -07:00
|
|
|
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
|
2015-05-15 16:26:10 -07:00
|
|
|
struct fscrypt_str *fstr)
|
2014-09-24 18:17:53 +08:00
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(file);
|
|
|
|
struct page *ipage = NULL;
|
2014-10-18 22:52:52 -07:00
|
|
|
struct f2fs_dentry_ptr d;
|
2017-07-19 00:19:05 +08:00
|
|
|
void *inline_dentry = NULL;
|
2016-10-29 18:46:34 +08:00
|
|
|
int err;
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-07-16 15:08:54 +08:00
|
|
|
make_dentry_ptr_inline(inode, &d, inline_dentry);
|
|
|
|
|
|
|
|
if (ctx->pos == d.max)
|
2014-09-24 18:17:53 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-10-15 21:29:51 -07:00
|
|
|
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
|
2014-09-24 18:17:53 +08:00
|
|
|
if (IS_ERR(ipage))
|
|
|
|
return PTR_ERR(ipage);
|
|
|
|
|
2019-03-12 15:44:27 +08:00
|
|
|
/*
|
|
|
|
* f2fs_readdir was protected by inode.i_rwsem, it is safe to access
|
|
|
|
* ipage without page's lock held.
|
|
|
|
*/
|
|
|
|
unlock_page(ipage);
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
inline_dentry = inline_data_addr(inode, ipage);
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2017-04-04 13:01:22 +03:00
|
|
|
make_dentry_ptr_inline(inode, &d, inline_dentry);
|
2014-10-18 22:52:52 -07:00
|
|
|
|
2016-10-29 18:46:34 +08:00
|
|
|
err = f2fs_fill_dentries(ctx, &d, 0, fstr);
|
|
|
|
if (!err)
|
2017-07-16 15:08:54 +08:00
|
|
|
ctx->pos = d.max;
|
2014-09-24 18:17:53 +08:00
|
|
|
|
2019-03-12 15:44:27 +08:00
|
|
|
f2fs_put_page(ipage, 0);
|
2016-10-29 18:46:34 +08:00
|
|
|
return err < 0 ? err : 0;
|
2014-09-24 18:17:53 +08:00
|
|
|
}
|
2015-10-15 11:34:49 -07:00
|
|
|
|
|
|
|
int f2fs_inline_data_fiemap(struct inode *inode,
|
|
|
|
struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
|
|
|
|
{
|
|
|
|
__u64 byteaddr, ilen;
|
|
|
|
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
|
|
|
|
FIEMAP_EXTENT_LAST;
|
|
|
|
struct node_info ni;
|
|
|
|
struct page *ipage;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
|
|
|
|
if (IS_ERR(ipage))
|
|
|
|
return PTR_ERR(ipage);
|
|
|
|
|
|
|
|
if (!f2fs_has_inline_data(inode)) {
|
|
|
|
err = -EAGAIN;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2017-07-19 00:19:05 +08:00
|
|
|
ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode));
|
2015-10-15 11:34:49 -07:00
|
|
|
if (start >= ilen)
|
|
|
|
goto out;
|
|
|
|
if (start + len < ilen)
|
|
|
|
ilen = start + len;
|
|
|
|
ilen -= start;
|
|
|
|
|
|
|
|
get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
|
|
|
|
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
|
2017-07-19 00:19:05 +08:00
|
|
|
byteaddr += (char *)inline_data_addr(inode, ipage) -
|
|
|
|
(char *)F2FS_INODE(ipage);
|
2015-10-15 11:34:49 -07:00
|
|
|
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
|
|
|
|
out:
|
|
|
|
f2fs_put_page(ipage, 1);
|
|
|
|
return err;
|
|
|
|
}
|