539 lines
14 KiB
C
Raw Permalink Normal View History

/*
* dat.c - NILFS disk address translation.
*
* Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Written by Koji Sato.
*/
#include <linux/types.h>
#include <linux/buffer_head.h>
#include <linux/string.h>
#include <linux/errno.h>
#include "nilfs.h"
#include "mdt.h"
#include "alloc.h"
#include "dat.h"
#define NILFS_CNO_MIN ((__u64)1)
#define NILFS_CNO_MAX (~(__u64)0)
/**
* struct nilfs_dat_info - on-memory private data of DAT file
* @mi: on-memory private data of metadata file
* @palloc_cache: persistent object allocator cache of DAT file
* @shadow: shadow map of DAT file
*/
struct nilfs_dat_info {
struct nilfs_mdt_info mi;
struct nilfs_palloc_cache palloc_cache;
struct nilfs_shadow_map shadow;
};
static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
{
return (struct nilfs_dat_info *)NILFS_MDT(dat);
}
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
int ret;
ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
create, &req->pr_entry_bh);
if (unlikely(ret == -ENOENT)) {
nilfs_msg(dat->i_sb, KERN_ERR,
"DAT doesn't have a block to manage vblocknr = %llu",
(unsigned long long)req->pr_entry_nr);
/*
* Return internal code -EINVAL to notify bmap layer of
* metadata corruption.
*/
ret = -EINVAL;
}
return ret;
}
static void nilfs_dat_commit_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
mark_buffer_dirty(req->pr_entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(req->pr_entry_bh);
}
static void nilfs_dat_abort_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
brelse(req->pr_entry_bh);
}
int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
int ret;
nilfs2: fix incorrect inode allocation from reserved inodes commit 93aef9eda1cea9e84ab2453fcceb8addad0e46f1 upstream. If the bitmap block that manages the inode allocation status is corrupted, nilfs_ifile_create_inode() may allocate a new inode from the reserved inode area where it should not be allocated. Previous fix commit d325dc6eb763 ("nilfs2: fix use-after-free bug of struct nilfs_root"), fixed the problem that reserved inodes with inode numbers less than NILFS_USER_INO (=11) were incorrectly reallocated due to bitmap corruption, but since the start number of non-reserved inodes is read from the super block and may change, in which case inode allocation may occur from the extended reserved inode area. If that happens, access to that inode will cause an IO error, causing the file system to degrade to an error state. Fix this potential issue by adding a wraparound option to the common metadata object allocation routine and by modifying nilfs_ifile_create_inode() to disable the option so that it only allocates inodes with inode numbers greater than or equal to the inode number read in "nilfs->ns_first_ino", regardless of the bitmap status of reserved inodes. Link: https://lkml.kernel.org/r/20240623051135.4180-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Cc: Hillf Danton <hdanton@sina.com> Cc: Jan Kara <jack@suse.cz> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> (cherry picked from commit de9d81daaca2b7b3c853bf2ff729353e84f06b18) Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
2024-06-23 14:11:35 +09:00
ret = nilfs_palloc_prepare_alloc_entry(dat, req, true);
if (ret < 0)
return ret;
ret = nilfs_dat_prepare_entry(dat, req, 1);
if (ret < 0)
nilfs_palloc_abort_alloc_entry(dat, req);
return ret;
}
void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
entry->de_blocknr = cpu_to_le64(0);
kunmap_atomic(kaddr);
nilfs_palloc_commit_alloc_entry(dat, req);
nilfs_dat_commit_entry(dat, req);
}
void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req)
{
nilfs_dat_abort_entry(dat, req);
nilfs_palloc_abort_alloc_entry(dat, req);
}
static void nilfs_dat_commit_free(struct inode *dat,
struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
entry->de_blocknr = cpu_to_le64(0);
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
nilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry() commit f0a0ccda18d6fd826d7c7e7ad48a6ed61c20f8b4 upstream. Syzbot reported a null-ptr-deref bug: NILFS (loop0): segctord starting. Construction interval = 5 seconds, CP frequency < 30 seconds general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 3603 Comm: segctord Not tainted 6.1.0-rc2-syzkaller-00105-gb229b6ca5abb #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:nilfs_palloc_commit_free_entry+0xe5/0x6b0 fs/nilfs2/alloc.c:608 Code: 00 00 00 00 fc ff df 80 3c 02 00 0f 85 cd 05 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 73 08 49 8d 7e 10 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 26 05 00 00 49 8b 46 10 be a6 00 00 00 48 c7 c7 RSP: 0018:ffffc90003dff830 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: ffff88802594e218 RCX: 000000000000000d RDX: 0000000000000002 RSI: 0000000000002000 RDI: 0000000000000010 RBP: ffff888071880222 R08: 0000000000000005 R09: 000000000000003f R10: 000000000000000d R11: 0000000000000000 R12: ffff888071880158 R13: ffff88802594e220 R14: 0000000000000000 R15: 0000000000000004 FS: 0000000000000000(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb1c08316a8 CR3: 0000000018560000 CR4: 0000000000350ee0 Call Trace: <TASK> nilfs_dat_commit_free fs/nilfs2/dat.c:114 [inline] nilfs_dat_commit_end+0x464/0x5f0 fs/nilfs2/dat.c:193 nilfs_dat_commit_update+0x26/0x40 fs/nilfs2/dat.c:236 nilfs_btree_commit_update_v+0x87/0x4a0 fs/nilfs2/btree.c:1940 nilfs_btree_commit_propagate_v fs/nilfs2/btree.c:2016 [inline] nilfs_btree_propagate_v fs/nilfs2/btree.c:2046 [inline] nilfs_btree_propagate+0xa00/0xd60 fs/nilfs2/btree.c:2088 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_file_data+0x45/0xd0 fs/nilfs2/segment.c:568 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1018 nilfs_segctor_scan_file+0x3f4/0x6f0 fs/nilfs2/segment.c:1067 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1197 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1503 [inline] nilfs_segctor_do_construct+0x12fc/0x6af0 fs/nilfs2/segment.c:2045 nilfs_segctor_construct+0x8e3/0xb30 fs/nilfs2/segment.c:2379 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2487 [inline] nilfs_segctor_thread+0x3c3/0xf30 fs/nilfs2/segment.c:2570 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 </TASK> ... If DAT metadata file is corrupted on disk, there is a case where req->pr_desc_bh is NULL and blocknr is 0 at nilfs_dat_commit_end() during a b-tree operation that cascadingly updates ancestor nodes of the b-tree, because nilfs_dat_commit_alloc() for a lower level block can initialize the blocknr on the same DAT entry between nilfs_dat_prepare_end() and nilfs_dat_commit_end(). If this happens, nilfs_dat_commit_end() calls nilfs_dat_commit_free() without valid buffer heads in req->pr_desc_bh and req->pr_bitmap_bh, and causes the NULL pointer dereference above in nilfs_palloc_commit_free_entry() function, which leads to a crash. Fix this by adding a NULL check on req->pr_desc_bh and req->pr_bitmap_bh before nilfs_palloc_commit_free_entry() in nilfs_dat_commit_free(). This also calls nilfs_error() in that case to notify that there is a fatal flaw in the filesystem metadata and prevent further operations. Link: https://lkml.kernel.org/r/00000000000097c20205ebaea3d6@google.com Link: https://lkml.kernel.org/r/20221114040441.1649940-1-zhangpeng362@huawei.com Link: https://lkml.kernel.org/r/20221119120542.17204-1-konishi.ryusuke@gmail.com Signed-off-by: ZhangPeng <zhangpeng362@huawei.com> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Reported-by: syzbot+ebe05ee8e98f755f61d0@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2022-11-19 21:05:42 +09:00
if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
nilfs_error(dat->i_sb,
"state inconsistency probably due to duplicate use of vblocknr = %llu",
(unsigned long long)req->pr_entry_nr);
return;
}
nilfs_palloc_commit_free_entry(dat, req);
}
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
return nilfs_dat_prepare_entry(dat, req, 0);
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
sector_t blocknr)
{
struct nilfs_dat_entry *entry;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
}
int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
sector_t blocknr;
void *kaddr;
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
if (ret < 0)
return ret;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (blocknr == 0) {
ret = nilfs_palloc_prepare_free_entry(dat, req);
if (ret < 0) {
nilfs_dat_abort_entry(dat, req);
return ret;
}
}
return 0;
}
void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
int dead)
{
struct nilfs_dat_entry *entry;
__u64 start, end;
sector_t blocknr;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
end = start = le64_to_cpu(entry->de_start);
if (!dead) {
end = nilfs_mdt_cno(dat);
WARN_ON(start > end);
}
entry->de_end = cpu_to_le64(end);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (blocknr == 0)
nilfs_dat_commit_free(dat, req);
else
nilfs_dat_commit_entry(dat, req);
}
void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
__u64 start;
sector_t blocknr;
void *kaddr;
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
req->pr_entry_bh, kaddr);
start = le64_to_cpu(entry->de_start);
blocknr = le64_to_cpu(entry->de_blocknr);
kunmap_atomic(kaddr);
if (start == nilfs_mdt_cno(dat) && blocknr == 0)
nilfs_palloc_abort_free_entry(dat, req);
nilfs_dat_abort_entry(dat, req);
}
int nilfs_dat_prepare_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq)
{
int ret;
ret = nilfs_dat_prepare_end(dat, oldreq);
if (!ret) {
ret = nilfs_dat_prepare_alloc(dat, newreq);
if (ret < 0)
nilfs_dat_abort_end(dat, oldreq);
}
return ret;
}
void nilfs_dat_commit_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq, int dead)
{
nilfs_dat_commit_end(dat, oldreq, dead);
nilfs_dat_commit_alloc(dat, newreq);
}
void nilfs_dat_abort_update(struct inode *dat,
struct nilfs_palloc_req *oldreq,
struct nilfs_palloc_req *newreq)
{
nilfs_dat_abort_end(dat, oldreq);
nilfs_dat_abort_alloc(dat, newreq);
}
/**
* nilfs_dat_mark_dirty -
* @dat: DAT file inode
* @vblocknr: virtual block number
*
* Description:
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
{
struct nilfs_palloc_req req;
int ret;
req.pr_entry_nr = vblocknr;
ret = nilfs_dat_prepare_entry(dat, &req, 0);
if (ret == 0)
nilfs_dat_commit_entry(dat, &req);
return ret;
}
/**
* nilfs_dat_freev - free virtual block numbers
* @dat: DAT file inode
* @vblocknrs: array of virtual block numbers
* @nitems: number of virtual block numbers
*
* Description: nilfs_dat_freev() frees the virtual block numbers specified by
* @vblocknrs and @nitems.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - The virtual block number have not been allocated.
*/
int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
{
return nilfs_palloc_freev(dat, vblocknrs, nitems);
}
/**
* nilfs_dat_move - change a block number
* @dat: DAT file inode
* @vblocknr: virtual block number
* @blocknr: block number
*
* Description: nilfs_dat_move() changes the block number associated with
* @vblocknr to @blocknr.
*
* Return Value: On success, 0 is returned. On error, one of the following
* negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*/
int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
void *kaddr;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
if (ret < 0)
return ret;
/*
* The given disk block number (blocknr) is not yet written to
* the device at this point.
*
* To prevent nilfs_dat_translate() from returning the
* uncommitted block number, this makes a copy of the entry
* buffer and redirects nilfs_dat_translate() to the copy.
*/
if (!buffer_nilfs_redirected(entry_bh)) {
ret = nilfs_mdt_freeze_buffer(dat, entry_bh);
if (ret) {
brelse(entry_bh);
return ret;
}
}
kaddr = kmap_atomic(entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
nilfs_crit(dat->i_sb,
"%s: invalid vblocknr = %llu, [%llu, %llu)",
__func__, (unsigned long long)vblocknr,
(unsigned long long)le64_to_cpu(entry->de_start),
(unsigned long long)le64_to_cpu(entry->de_end));
kunmap_atomic(kaddr);
brelse(entry_bh);
return -EINVAL;
}
WARN_ON(blocknr == 0);
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr);
mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(entry_bh);
return 0;
}
/**
* nilfs_dat_translate - translate a virtual block number to a block number
* @dat: DAT file inode
* @vblocknr: virtual block number
* @blocknrp: pointer to a block number
*
* Description: nilfs_dat_translate() maps the virtual block number @vblocknr
* to the corresponding block number.
*
* Return Value: On success, 0 is returned and the block number associated
* with @vblocknr is stored in the place pointed by @blocknrp. On error, one
* of the following negative error codes is returned.
*
* %-EIO - I/O error.
*
* %-ENOMEM - Insufficient amount of memory available.
*
* %-ENOENT - A block number associated with @vblocknr does not exist.
*/
int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
{
struct buffer_head *entry_bh, *bh;
struct nilfs_dat_entry *entry;
sector_t blocknr;
void *kaddr;
int ret;
ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
if (ret < 0)
return ret;
if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) {
bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh);
if (bh) {
WARN_ON(!buffer_uptodate(bh));
brelse(entry_bh);
entry_bh = bh;
}
}
kaddr = kmap_atomic(entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
blocknr = le64_to_cpu(entry->de_blocknr);
if (blocknr == 0) {
ret = -ENOENT;
goto out;
}
*blocknrp = blocknr;
out:
kunmap_atomic(kaddr);
brelse(entry_bh);
return ret;
}
ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
size_t nvi)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
struct nilfs_vinfo *vinfo = buf;
__u64 first, last;
void *kaddr;
unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
int i, j, n, ret;
for (i = 0; i < nvi; i += n) {
ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr,
0, &entry_bh);
if (ret < 0)
return ret;
kaddr = kmap_atomic(entry_bh->b_page);
/* last virtual block number in this block */
first = vinfo->vi_vblocknr;
do_div(first, entries_per_block);
first *= entries_per_block;
last = first + entries_per_block - 1;
for (j = i, n = 0;
j < nvi && vinfo->vi_vblocknr >= first &&
vinfo->vi_vblocknr <= last;
j++, n++, vinfo = (void *)vinfo + visz) {
entry = nilfs_palloc_block_get_entry(
dat, vinfo->vi_vblocknr, entry_bh, kaddr);
vinfo->vi_start = le64_to_cpu(entry->de_start);
vinfo->vi_end = le64_to_cpu(entry->de_end);
vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
}
kunmap_atomic(kaddr);
brelse(entry_bh);
}
return nvi;
}
/**
* nilfs_dat_read - read or get dat inode
* @sb: super block instance
* @entry_size: size of a dat entry
* @raw_inode: on-disk dat inode
* @inodep: buffer to store the inode
*/
int nilfs_dat_read(struct super_block *sb, size_t entry_size,
struct nilfs_inode *raw_inode, struct inode **inodep)
{
static struct lock_class_key dat_lock_key;
struct inode *dat;
struct nilfs_dat_info *di;
int err;
if (entry_size > sb->s_blocksize) {
nilfs_err(sb, "too large DAT entry size: %zu bytes",
entry_size);
return -EINVAL;
} else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
nilfs_err(sb, "too small DAT entry size: %zu bytes",
entry_size);
return -EINVAL;
}
dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO);
if (unlikely(!dat))
return -ENOMEM;
if (!(dat->i_state & I_NEW))
goto out;
err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di));
if (err)
goto failed;
err = nilfs_palloc_init_blockgroup(dat, entry_size);
if (err)
goto failed;
di = NILFS_DAT_I(dat);
lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
nilfs_palloc_setup_cache(dat, &di->palloc_cache);
err = nilfs_mdt_setup_shadow_map(dat, &di->shadow);
if (err)
goto failed;
err = nilfs_read_inode_common(dat, raw_inode);
if (err)
goto failed;
unlock_new_inode(dat);
out:
*inodep = dat;
return 0;
failed:
iget_failed(dat);
return err;
}