msm-4.14/fs/posix_acl.c

955 lines
21 KiB
C
Raw Normal View History

/*
* Copyright (C) 2002,2003 by Andreas Gruenbacher <a.gruenbacher@computer.org>
*
* Fixes from William Schumacher incorporated on 15 March 2001.
* (Reported by Charles Bertsch, <CBertsch@microtest.com>).
*/
/*
* This file contains generic functions for manipulating
* POSIX 1003.1e draft standard 17 ACLs.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
static struct posix_acl **acl_by_type(struct inode *inode, int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
return &inode->i_acl;
case ACL_TYPE_DEFAULT:
return &inode->i_default_acl;
default:
BUG();
}
}
struct posix_acl *get_cached_acl(struct inode *inode, int type)
{
struct posix_acl **p = acl_by_type(inode, type);
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
struct posix_acl *acl;
for (;;) {
rcu_read_lock();
acl = rcu_dereference(*p);
if (!acl || is_uncached_acl(acl) ||
f2fs: updates on v4.16-rc1 Pull f2fs updates from Jaegeuk Kim: "In this round, we've followed up to support some generic features such as cgroup, block reservation, linking fscrypt_ops, delivering write_hints, and some ioctls. And, we could fix some corner cases in terms of power-cut recovery and subtle deadlocks. Enhancements: - bitmap operations to handle NAT blocks - readahead to improve readdir speed - switch to use fscrypt_* - apply write hints for direct IO - add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid - modify b_avail and b_free to consider root reserved blocks - support cgroup writeback - support FIEMAP_FLAG_XATTR for fibmap - add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents - add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks - support inode creation time Bug fixs: - sysfile-based quota operations - memory footprint accounting - allow to write data on partial preallocation case - fix deadlock case on fallocate - fix to handle fill_super errors - fix missing inode updates of fsync'ed file - recover renamed file which was fsycn'ed before - drop inmemory pages in corner error case - keep last_disk_size correctly - recover missing i_inline flags during roll-forward Various clean-up patches were added as well" Cherry-pick from origin/upstream-f2fs-stable-linux-4.14.y: 00f0eaaadfe0 f2fs: support inode creation time 3e7444adf953 f2fs: rebuild sit page from sit info in mem 76688790c3bf f2fs: stop issuing discard if fs is readonly ae93dca26413 f2fs: clean up duplicated assignment in init_discard_policy 0052bba1313e f2fs: use GFP_F2FS_ZERO for cleanup 6271336cfa80 f2fs: allow to recover node blocks given updated checkpoint e003a2d15352 f2fs: recover some i_inline flags 3cafae53f3ef f2fs: correct removexattr behavior for null valued extended attribute 78d7fa9ac21f f2fs: drop page cache after fs shutdown 150b61cee574 f2fs: stop gc/discard thread after fs shutdown cf27ccc41e86 f2fs: hanlde error case in f2fs_ioc_shutdown 1526117cdaa6 f2fs: split need_inplace_update cd576d7b7a95 f2fs: fix to update last_disk_size correctly 7a57bd3313c2 f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup 95eb6a6ceb04 f2fs: clean up error path of fill_super 63c949c97384 f2fs: avoid hungtask when GC encrypted block if io_bits is set 583d13d10c8a f2fs: allow quota to use reserved blocks fbe371d3cdb2 f2fs: fix to drop all inmem pages correctly 7e08ce43562d f2fs: speed up defragment on sparse file 0f914cab8ce3 f2fs: support F2FS_IOC_PRECACHE_EXTENTS ed1311e58555 f2fs: add an ioctl to disable GC for specific file b08974ab5e0a f2fs: prevent newly created inode from being dirtied incorrectly e8a8acf602a3 f2fs: support FIEMAP_FLAG_XATTR 042aeed690a3 f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock 9cf9c37ebe90 f2fs: check node page again in write end io b9eedb48132e f2fs: fix to caclulate required free section correctly 75ae50cf1539 f2fs: handle newly created page when revoking inmem pages 871b97493627 f2fs: add resgid and resuid to reserve root blocks 0cf361acdb47 f2fs: implement cgroup writeback support 196d52cf4ebe f2fs: remove unused pend_list_tag 6e899a83f5e0 f2fs: avoid high cpu usage in discard thread bb1af976c2a2 f2fs: make local functions static ad658936ea9d f2fs: add reserved blocks for root user c6e64f1ff11c f2fs: check segment type in __f2fs_replace_block 88cdc60b7308 f2fs: update inode info to inode page for new file 4203e9fbd857 f2fs: show precise # of blocks that user/root can use 47dc137291e3 f2fs: clean up unneeded declaration 27f9e55195b1 f2fs: continue to do direct IO if we only preallocate partial blocks f2f137831464 f2fs: enable quota at remount from r to w d507f30065b3 f2fs: skip stop_checkpoint for user data writes 4b242ffcdb1f f2fs: fix missing error number for xattr operation c6c76a0e6154 f2fs: recover directory operations by fsync 5943e3992eed f2fs: return error during fill_super 93579c97259b f2fs: fix an error case of missing update inode page 3d753c15af04 f2fs: fix potential hangtask in f2fs_trace_pid 625f066c5d18 f2fs: no need return value in restore summary process f76c831abdd7 f2fs: use unlikely for release case 0408ad5efb28 f2fs: don't return value in truncate_data_blocks_range 62e507cd2b91 f2fs: clean up f2fs_map_blocks 233b197757c0 f2fs: clean up hash codes 58d550e5da7c f2fs: fix error handling in fill_super 35d78e6fc851 f2fs: spread f2fs_k{m,z}alloc fecf31ce115a f2fs: inject fault to kvmalloc 41af39db9fd0 f2fs: inject fault to kzalloc 9fecb4159dc1 f2fs: remove a redundant conditional expression 8e56c02ee9fa f2fs: apply write hints to select the type of segment for direct write a4015f91473e f2fs: switch to fscrypt_prepare_setattr() 56351ec774b8 f2fs: switch to fscrypt_prepare_lookup() 51f2caabf9a8 f2fs: switch to fscrypt_prepare_rename() f9a35b22b914 f2fs: switch to fscrypt_prepare_link() 787bd2632d66 f2fs: switch to fscrypt_file_open() eb9d8ee0fdb8 posix_acl: convert posix_acl.a_refcount from atomic_t to refcount_t bd0bb8ab0c90 f2fs: remove repeated f2fs_bug_on d1c0441c02cb f2fs: remove an excess variable 3f12c94d1b3c f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem 39685b35e80a f2fs: remove unused parameter b83577043a48 f2fs: still write data if preallocate only partial blocks b61cf217182b f2fs: introduce sysfs readdir_ra to readahead inode block in readdir 44ed9b2d125c f2fs: fix concurrent problem for updating free bitmap 08be3792ef4b f2fs: remove unneeded memory footprint accounting 33362399b3fc f2fs: no need to read nat block if nat_block_bitmap is set 01bb5c8b1f32 f2fs: reserve nid resource for quota sysfile Change-Id: Ie0beb18a04fc300d1591d64c7ae542a478644e26 Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
2017-11-16 16:59:14 +08:00
refcount_inc_not_zero(&acl->a_refcount))
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
break;
rcu_read_unlock();
cpu_relax();
}
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
rcu_read_unlock();
return acl;
}
EXPORT_SYMBOL(get_cached_acl);
struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
{
return rcu_dereference(*acl_by_type(inode, type));
}
EXPORT_SYMBOL(get_cached_acl_rcu);
void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
{
struct posix_acl **p = acl_by_type(inode, type);
struct posix_acl *old;
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
old = xchg(p, posix_acl_dup(acl));
if (!is_uncached_acl(old))
posix_acl_release(old);
}
EXPORT_SYMBOL(set_cached_acl);
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
static void __forget_cached_acl(struct posix_acl **p)
{
struct posix_acl *old;
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
old = xchg(p, ACL_NOT_CACHED);
if (!is_uncached_acl(old))
posix_acl_release(old);
}
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
void forget_cached_acl(struct inode *inode, int type)
{
__forget_cached_acl(acl_by_type(inode, type));
}
EXPORT_SYMBOL(forget_cached_acl);
void forget_all_cached_acls(struct inode *inode)
{
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
__forget_cached_acl(&inode->i_acl);
__forget_cached_acl(&inode->i_default_acl);
}
EXPORT_SYMBOL(forget_all_cached_acls);
struct posix_acl *get_acl(struct inode *inode, int type)
{
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
void *sentinel;
struct posix_acl **p;
struct posix_acl *acl;
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
/*
* The sentinel is used to detect when another operation like
* set_cached_acl() or forget_cached_acl() races with get_acl().
* It is guaranteed that is_uncached_acl(sentinel) is true.
*/
acl = get_cached_acl(inode, type);
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
if (!is_uncached_acl(acl))
return acl;
if (!IS_POSIXACL(inode))
return NULL;
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
sentinel = uncached_acl_sentinel(current);
p = acl_by_type(inode, type);
/*
* If the ACL isn't being read yet, set our sentinel. Otherwise, the
* current value of the ACL will not be ACL_NOT_CACHED and so our own
* sentinel will not be set; another task will update the cache. We
* could wait for that other task to complete its job, but it's easier
* to just call ->get_acl to fetch the ACL ourself. (This is going to
* be an unlikely race.)
*/
if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED)
/* fall through */ ;
/*
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
* Normally, the ACL returned by ->get_acl will be cached.
* A filesystem can prevent that by calling
* forget_cached_acl(inode, type) in ->get_acl.
*
* If the filesystem doesn't have a get_acl() function at all, we'll
* just create the negative cache entry.
*/
if (!inode->i_op->get_acl) {
set_cached_acl(inode, type, NULL);
return NULL;
}
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 14:38:37 +01:00
acl = inode->i_op->get_acl(inode, type);
if (IS_ERR(acl)) {
/*
* Remove our sentinel so that we don't block future attempts
* to cache the ACL.
*/
cmpxchg(p, sentinel, ACL_NOT_CACHED);
return acl;
}
/*
* Cache the result, but only if our sentinel is still in place.
*/
posix_acl_dup(acl);
if (unlikely(cmpxchg(p, sentinel, acl) != sentinel))
posix_acl_release(acl);
return acl;
}
EXPORT_SYMBOL(get_acl);
/*
* Init a fresh posix_acl
*/
void
posix_acl_init(struct posix_acl *acl, int count)
{
f2fs: updates on v4.16-rc1 Pull f2fs updates from Jaegeuk Kim: "In this round, we've followed up to support some generic features such as cgroup, block reservation, linking fscrypt_ops, delivering write_hints, and some ioctls. And, we could fix some corner cases in terms of power-cut recovery and subtle deadlocks. Enhancements: - bitmap operations to handle NAT blocks - readahead to improve readdir speed - switch to use fscrypt_* - apply write hints for direct IO - add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid - modify b_avail and b_free to consider root reserved blocks - support cgroup writeback - support FIEMAP_FLAG_XATTR for fibmap - add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents - add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks - support inode creation time Bug fixs: - sysfile-based quota operations - memory footprint accounting - allow to write data on partial preallocation case - fix deadlock case on fallocate - fix to handle fill_super errors - fix missing inode updates of fsync'ed file - recover renamed file which was fsycn'ed before - drop inmemory pages in corner error case - keep last_disk_size correctly - recover missing i_inline flags during roll-forward Various clean-up patches were added as well" Cherry-pick from origin/upstream-f2fs-stable-linux-4.14.y: 00f0eaaadfe0 f2fs: support inode creation time 3e7444adf953 f2fs: rebuild sit page from sit info in mem 76688790c3bf f2fs: stop issuing discard if fs is readonly ae93dca26413 f2fs: clean up duplicated assignment in init_discard_policy 0052bba1313e f2fs: use GFP_F2FS_ZERO for cleanup 6271336cfa80 f2fs: allow to recover node blocks given updated checkpoint e003a2d15352 f2fs: recover some i_inline flags 3cafae53f3ef f2fs: correct removexattr behavior for null valued extended attribute 78d7fa9ac21f f2fs: drop page cache after fs shutdown 150b61cee574 f2fs: stop gc/discard thread after fs shutdown cf27ccc41e86 f2fs: hanlde error case in f2fs_ioc_shutdown 1526117cdaa6 f2fs: split need_inplace_update cd576d7b7a95 f2fs: fix to update last_disk_size correctly 7a57bd3313c2 f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup 95eb6a6ceb04 f2fs: clean up error path of fill_super 63c949c97384 f2fs: avoid hungtask when GC encrypted block if io_bits is set 583d13d10c8a f2fs: allow quota to use reserved blocks fbe371d3cdb2 f2fs: fix to drop all inmem pages correctly 7e08ce43562d f2fs: speed up defragment on sparse file 0f914cab8ce3 f2fs: support F2FS_IOC_PRECACHE_EXTENTS ed1311e58555 f2fs: add an ioctl to disable GC for specific file b08974ab5e0a f2fs: prevent newly created inode from being dirtied incorrectly e8a8acf602a3 f2fs: support FIEMAP_FLAG_XATTR 042aeed690a3 f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock 9cf9c37ebe90 f2fs: check node page again in write end io b9eedb48132e f2fs: fix to caclulate required free section correctly 75ae50cf1539 f2fs: handle newly created page when revoking inmem pages 871b97493627 f2fs: add resgid and resuid to reserve root blocks 0cf361acdb47 f2fs: implement cgroup writeback support 196d52cf4ebe f2fs: remove unused pend_list_tag 6e899a83f5e0 f2fs: avoid high cpu usage in discard thread bb1af976c2a2 f2fs: make local functions static ad658936ea9d f2fs: add reserved blocks for root user c6e64f1ff11c f2fs: check segment type in __f2fs_replace_block 88cdc60b7308 f2fs: update inode info to inode page for new file 4203e9fbd857 f2fs: show precise # of blocks that user/root can use 47dc137291e3 f2fs: clean up unneeded declaration 27f9e55195b1 f2fs: continue to do direct IO if we only preallocate partial blocks f2f137831464 f2fs: enable quota at remount from r to w d507f30065b3 f2fs: skip stop_checkpoint for user data writes 4b242ffcdb1f f2fs: fix missing error number for xattr operation c6c76a0e6154 f2fs: recover directory operations by fsync 5943e3992eed f2fs: return error during fill_super 93579c97259b f2fs: fix an error case of missing update inode page 3d753c15af04 f2fs: fix potential hangtask in f2fs_trace_pid 625f066c5d18 f2fs: no need return value in restore summary process f76c831abdd7 f2fs: use unlikely for release case 0408ad5efb28 f2fs: don't return value in truncate_data_blocks_range 62e507cd2b91 f2fs: clean up f2fs_map_blocks 233b197757c0 f2fs: clean up hash codes 58d550e5da7c f2fs: fix error handling in fill_super 35d78e6fc851 f2fs: spread f2fs_k{m,z}alloc fecf31ce115a f2fs: inject fault to kvmalloc 41af39db9fd0 f2fs: inject fault to kzalloc 9fecb4159dc1 f2fs: remove a redundant conditional expression 8e56c02ee9fa f2fs: apply write hints to select the type of segment for direct write a4015f91473e f2fs: switch to fscrypt_prepare_setattr() 56351ec774b8 f2fs: switch to fscrypt_prepare_lookup() 51f2caabf9a8 f2fs: switch to fscrypt_prepare_rename() f9a35b22b914 f2fs: switch to fscrypt_prepare_link() 787bd2632d66 f2fs: switch to fscrypt_file_open() eb9d8ee0fdb8 posix_acl: convert posix_acl.a_refcount from atomic_t to refcount_t bd0bb8ab0c90 f2fs: remove repeated f2fs_bug_on d1c0441c02cb f2fs: remove an excess variable 3f12c94d1b3c f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem 39685b35e80a f2fs: remove unused parameter b83577043a48 f2fs: still write data if preallocate only partial blocks b61cf217182b f2fs: introduce sysfs readdir_ra to readahead inode block in readdir 44ed9b2d125c f2fs: fix concurrent problem for updating free bitmap 08be3792ef4b f2fs: remove unneeded memory footprint accounting 33362399b3fc f2fs: no need to read nat block if nat_block_bitmap is set 01bb5c8b1f32 f2fs: reserve nid resource for quota sysfile Change-Id: Ie0beb18a04fc300d1591d64c7ae542a478644e26 Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
2017-11-16 16:59:14 +08:00
refcount_set(&acl->a_refcount, 1);
acl->a_count = count;
}
EXPORT_SYMBOL(posix_acl_init);
/*
* Allocate a new ACL with the specified number of entries.
*/
struct posix_acl *
posix_acl_alloc(int count, gfp_t flags)
{
const size_t size = sizeof(struct posix_acl) +
count * sizeof(struct posix_acl_entry);
struct posix_acl *acl = kmalloc(size, flags);
if (acl)
posix_acl_init(acl, count);
return acl;
}
EXPORT_SYMBOL(posix_acl_alloc);
/*
* Clone an ACL.
*/
static struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
struct posix_acl *clone = NULL;
if (acl) {
int size = sizeof(struct posix_acl) + acl->a_count *
sizeof(struct posix_acl_entry);
clone = kmemdup(acl, size, flags);
if (clone)
f2fs: updates on v4.16-rc1 Pull f2fs updates from Jaegeuk Kim: "In this round, we've followed up to support some generic features such as cgroup, block reservation, linking fscrypt_ops, delivering write_hints, and some ioctls. And, we could fix some corner cases in terms of power-cut recovery and subtle deadlocks. Enhancements: - bitmap operations to handle NAT blocks - readahead to improve readdir speed - switch to use fscrypt_* - apply write hints for direct IO - add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid - modify b_avail and b_free to consider root reserved blocks - support cgroup writeback - support FIEMAP_FLAG_XATTR for fibmap - add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents - add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks - support inode creation time Bug fixs: - sysfile-based quota operations - memory footprint accounting - allow to write data on partial preallocation case - fix deadlock case on fallocate - fix to handle fill_super errors - fix missing inode updates of fsync'ed file - recover renamed file which was fsycn'ed before - drop inmemory pages in corner error case - keep last_disk_size correctly - recover missing i_inline flags during roll-forward Various clean-up patches were added as well" Cherry-pick from origin/upstream-f2fs-stable-linux-4.14.y: 00f0eaaadfe0 f2fs: support inode creation time 3e7444adf953 f2fs: rebuild sit page from sit info in mem 76688790c3bf f2fs: stop issuing discard if fs is readonly ae93dca26413 f2fs: clean up duplicated assignment in init_discard_policy 0052bba1313e f2fs: use GFP_F2FS_ZERO for cleanup 6271336cfa80 f2fs: allow to recover node blocks given updated checkpoint e003a2d15352 f2fs: recover some i_inline flags 3cafae53f3ef f2fs: correct removexattr behavior for null valued extended attribute 78d7fa9ac21f f2fs: drop page cache after fs shutdown 150b61cee574 f2fs: stop gc/discard thread after fs shutdown cf27ccc41e86 f2fs: hanlde error case in f2fs_ioc_shutdown 1526117cdaa6 f2fs: split need_inplace_update cd576d7b7a95 f2fs: fix to update last_disk_size correctly 7a57bd3313c2 f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup 95eb6a6ceb04 f2fs: clean up error path of fill_super 63c949c97384 f2fs: avoid hungtask when GC encrypted block if io_bits is set 583d13d10c8a f2fs: allow quota to use reserved blocks fbe371d3cdb2 f2fs: fix to drop all inmem pages correctly 7e08ce43562d f2fs: speed up defragment on sparse file 0f914cab8ce3 f2fs: support F2FS_IOC_PRECACHE_EXTENTS ed1311e58555 f2fs: add an ioctl to disable GC for specific file b08974ab5e0a f2fs: prevent newly created inode from being dirtied incorrectly e8a8acf602a3 f2fs: support FIEMAP_FLAG_XATTR 042aeed690a3 f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock 9cf9c37ebe90 f2fs: check node page again in write end io b9eedb48132e f2fs: fix to caclulate required free section correctly 75ae50cf1539 f2fs: handle newly created page when revoking inmem pages 871b97493627 f2fs: add resgid and resuid to reserve root blocks 0cf361acdb47 f2fs: implement cgroup writeback support 196d52cf4ebe f2fs: remove unused pend_list_tag 6e899a83f5e0 f2fs: avoid high cpu usage in discard thread bb1af976c2a2 f2fs: make local functions static ad658936ea9d f2fs: add reserved blocks for root user c6e64f1ff11c f2fs: check segment type in __f2fs_replace_block 88cdc60b7308 f2fs: update inode info to inode page for new file 4203e9fbd857 f2fs: show precise # of blocks that user/root can use 47dc137291e3 f2fs: clean up unneeded declaration 27f9e55195b1 f2fs: continue to do direct IO if we only preallocate partial blocks f2f137831464 f2fs: enable quota at remount from r to w d507f30065b3 f2fs: skip stop_checkpoint for user data writes 4b242ffcdb1f f2fs: fix missing error number for xattr operation c6c76a0e6154 f2fs: recover directory operations by fsync 5943e3992eed f2fs: return error during fill_super 93579c97259b f2fs: fix an error case of missing update inode page 3d753c15af04 f2fs: fix potential hangtask in f2fs_trace_pid 625f066c5d18 f2fs: no need return value in restore summary process f76c831abdd7 f2fs: use unlikely for release case 0408ad5efb28 f2fs: don't return value in truncate_data_blocks_range 62e507cd2b91 f2fs: clean up f2fs_map_blocks 233b197757c0 f2fs: clean up hash codes 58d550e5da7c f2fs: fix error handling in fill_super 35d78e6fc851 f2fs: spread f2fs_k{m,z}alloc fecf31ce115a f2fs: inject fault to kvmalloc 41af39db9fd0 f2fs: inject fault to kzalloc 9fecb4159dc1 f2fs: remove a redundant conditional expression 8e56c02ee9fa f2fs: apply write hints to select the type of segment for direct write a4015f91473e f2fs: switch to fscrypt_prepare_setattr() 56351ec774b8 f2fs: switch to fscrypt_prepare_lookup() 51f2caabf9a8 f2fs: switch to fscrypt_prepare_rename() f9a35b22b914 f2fs: switch to fscrypt_prepare_link() 787bd2632d66 f2fs: switch to fscrypt_file_open() eb9d8ee0fdb8 posix_acl: convert posix_acl.a_refcount from atomic_t to refcount_t bd0bb8ab0c90 f2fs: remove repeated f2fs_bug_on d1c0441c02cb f2fs: remove an excess variable 3f12c94d1b3c f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem 39685b35e80a f2fs: remove unused parameter b83577043a48 f2fs: still write data if preallocate only partial blocks b61cf217182b f2fs: introduce sysfs readdir_ra to readahead inode block in readdir 44ed9b2d125c f2fs: fix concurrent problem for updating free bitmap 08be3792ef4b f2fs: remove unneeded memory footprint accounting 33362399b3fc f2fs: no need to read nat block if nat_block_bitmap is set 01bb5c8b1f32 f2fs: reserve nid resource for quota sysfile Change-Id: Ie0beb18a04fc300d1591d64c7ae542a478644e26 Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
2017-11-16 16:59:14 +08:00
refcount_set(&clone->a_refcount, 1);
}
return clone;
}
/*
* Check if an acl is valid. Returns 0 if it is, or -E... otherwise.
*/
int
posix_acl_valid(struct user_namespace *user_ns, const struct posix_acl *acl)
{
const struct posix_acl_entry *pa, *pe;
int state = ACL_USER_OBJ;
int needs_mask = 0;
FOREACH_ACL_ENTRY(pa, acl, pe) {
if (pa->e_perm & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
return -EINVAL;
switch (pa->e_tag) {
case ACL_USER_OBJ:
if (state == ACL_USER_OBJ) {
state = ACL_USER;
break;
}
return -EINVAL;
case ACL_USER:
if (state != ACL_USER)
return -EINVAL;
if (!kuid_has_mapping(user_ns, pa->e_uid))
return -EINVAL;
needs_mask = 1;
break;
case ACL_GROUP_OBJ:
if (state == ACL_USER) {
state = ACL_GROUP;
break;
}
return -EINVAL;
case ACL_GROUP:
if (state != ACL_GROUP)
return -EINVAL;
if (!kgid_has_mapping(user_ns, pa->e_gid))
2012-02-07 18:52:57 -08:00
return -EINVAL;
needs_mask = 1;
break;
case ACL_MASK:
if (state != ACL_GROUP)
return -EINVAL;
state = ACL_OTHER;
break;
case ACL_OTHER:
if (state == ACL_OTHER ||
(state == ACL_GROUP && !needs_mask)) {
state = 0;
break;
}
return -EINVAL;
default:
return -EINVAL;
}
}
if (state == 0)
return 0;
return -EINVAL;
}
EXPORT_SYMBOL(posix_acl_valid);
/*
* Returns 0 if the acl can be exactly represented in the traditional
* file mode permission bits, or else 1. Returns -E... on error.
*/
int
posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
{
const struct posix_acl_entry *pa, *pe;
umode_t mode = 0;
int not_equiv = 0;
/*
* A null ACL can always be presented as mode bits.
*/
if (!acl)
return 0;
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
mode |= (pa->e_perm & S_IRWXO) << 6;
break;
case ACL_GROUP_OBJ:
mode |= (pa->e_perm & S_IRWXO) << 3;
break;
case ACL_OTHER:
mode |= pa->e_perm & S_IRWXO;
break;
case ACL_MASK:
mode = (mode & ~S_IRWXG) |
((pa->e_perm & S_IRWXO) << 3);
not_equiv = 1;
break;
case ACL_USER:
case ACL_GROUP:
not_equiv = 1;
break;
default:
return -EINVAL;
}
}
if (mode_p)
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
return not_equiv;
}
EXPORT_SYMBOL(posix_acl_equiv_mode);
/*
* Create an ACL representing the file mode permission bits of an inode.
*/
struct posix_acl *
posix_acl_from_mode(umode_t mode, gfp_t flags)
{
struct posix_acl *acl = posix_acl_alloc(3, flags);
if (!acl)
return ERR_PTR(-ENOMEM);
acl->a_entries[0].e_tag = ACL_USER_OBJ;
acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6;
acl->a_entries[1].e_tag = ACL_GROUP_OBJ;
acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3;
acl->a_entries[2].e_tag = ACL_OTHER;
acl->a_entries[2].e_perm = (mode & S_IRWXO);
return acl;
}
EXPORT_SYMBOL(posix_acl_from_mode);
/*
* Return 0 if current is granted want access to the inode
* by the acl. Returns -E... otherwise.
*/
int
posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
int found = 0;
want &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch(pa->e_tag) {
case ACL_USER_OBJ:
/* (May have been checked already) */
2012-02-07 18:52:57 -08:00
if (uid_eq(inode->i_uid, current_fsuid()))
goto check_perm;
break;
case ACL_USER:
2012-02-07 18:52:57 -08:00
if (uid_eq(pa->e_uid, current_fsuid()))
goto mask;
break;
case ACL_GROUP_OBJ:
if (in_group_p(inode->i_gid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
}
break;
case ACL_GROUP:
2012-02-07 18:52:57 -08:00
if (in_group_p(pa->e_gid)) {
found = 1;
if ((pa->e_perm & want) == want)
goto mask;
}
break;
case ACL_MASK:
break;
case ACL_OTHER:
if (found)
return -EACCES;
else
goto check_perm;
default:
return -EIO;
}
}
return -EIO;
mask:
for (mask_obj = pa+1; mask_obj != pe; mask_obj++) {
if (mask_obj->e_tag == ACL_MASK) {
if ((pa->e_perm & mask_obj->e_perm & want) == want)
return 0;
return -EACCES;
}
}
check_perm:
if ((pa->e_perm & want) == want)
return 0;
return -EACCES;
}
/*
* Modify acl when creating a new inode. The caller must ensure the acl is
* only referenced once.
*
* mode_p initially must contain the mode parameter to the open() / creat()
* system calls. All permissions that are not granted by the acl are removed.
* The permissions in the acl are changed to reflect the mode_p parameter.
*/
static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
{
struct posix_acl_entry *pa, *pe;
struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
umode_t mode = *mode_p;
int not_equiv = 0;
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch(pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm &= (mode >> 6) | ~S_IRWXO;
mode &= (pa->e_perm << 6) | ~S_IRWXU;
break;
case ACL_USER:
case ACL_GROUP:
not_equiv = 1;
break;
case ACL_GROUP_OBJ:
group_obj = pa;
break;
case ACL_OTHER:
pa->e_perm &= mode | ~S_IRWXO;
mode &= pa->e_perm | ~S_IRWXO;
break;
case ACL_MASK:
mask_obj = pa;
not_equiv = 1;
break;
default:
return -EIO;
}
}
if (mask_obj) {
mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
} else {
if (!group_obj)
return -EIO;
group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
}
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
return not_equiv;
}
/*
* Modify the ACL for the chmod syscall.
*/
static int __posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode)
{
struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
struct posix_acl_entry *pa, *pe;
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch(pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm = (mode & S_IRWXU) >> 6;
break;
case ACL_USER:
case ACL_GROUP:
break;
case ACL_GROUP_OBJ:
group_obj = pa;
break;
case ACL_MASK:
mask_obj = pa;
break;
case ACL_OTHER:
pa->e_perm = (mode & S_IRWXO);
break;
default:
return -EIO;
}
}
if (mask_obj) {
mask_obj->e_perm = (mode & S_IRWXG) >> 3;
} else {
if (!group_obj)
return -EIO;
group_obj->e_perm = (mode & S_IRWXG) >> 3;
}
return 0;
}
int
__posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p)
{
struct posix_acl *clone = posix_acl_clone(*acl, gfp);
int err = -ENOMEM;
if (clone) {
err = posix_acl_create_masq(clone, mode_p);
if (err < 0) {
posix_acl_release(clone);
clone = NULL;
}
}
posix_acl_release(*acl);
*acl = clone;
return err;
}
EXPORT_SYMBOL(__posix_acl_create);
int
__posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
{
struct posix_acl *clone = posix_acl_clone(*acl, gfp);
int err = -ENOMEM;
if (clone) {
err = __posix_acl_chmod_masq(clone, mode);
if (err) {
posix_acl_release(clone);
clone = NULL;
}
}
posix_acl_release(*acl);
*acl = clone;
return err;
}
EXPORT_SYMBOL(__posix_acl_chmod);
int
posix_acl_chmod(struct inode *inode, umode_t mode)
{
struct posix_acl *acl;
int ret = 0;
if (!IS_POSIXACL(inode))
return 0;
if (!inode->i_op->set_acl)
return -EOPNOTSUPP;
acl = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR_OR_NULL(acl)) {
if (acl == ERR_PTR(-EOPNOTSUPP))
return 0;
return PTR_ERR(acl);
}
ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
if (ret)
return ret;
ret = inode->i_op->set_acl(inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
return ret;
}
EXPORT_SYMBOL(posix_acl_chmod);
int
posix_acl_create(struct inode *dir, umode_t *mode,
struct posix_acl **default_acl, struct posix_acl **acl)
{
struct posix_acl *p;
struct posix_acl *clone;
int ret;
*acl = NULL;
*default_acl = NULL;
if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
return 0;
p = get_acl(dir, ACL_TYPE_DEFAULT);
if (!p || p == ERR_PTR(-EOPNOTSUPP)) {
*mode &= ~current_umask();
return 0;
}
if (IS_ERR(p))
return PTR_ERR(p);
ret = -ENOMEM;
clone = posix_acl_clone(p, GFP_NOFS);
if (!clone)
goto err_release;
ret = posix_acl_create_masq(clone, mode);
if (ret < 0)
goto err_release_clone;
if (ret == 0)
posix_acl_release(clone);
else
*acl = clone;
if (!S_ISDIR(*mode))
posix_acl_release(p);
else
*default_acl = p;
return 0;
err_release_clone:
posix_acl_release(clone);
err_release:
posix_acl_release(p);
return ret;
}
EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
*
* Update the file mode when setting an ACL: compute the new file permission
* bits based on the ACL. In addition, if the ACL is equivalent to the new
* file mode, set *acl to NULL to indicate that no ACL should be set.
*
* As with chmod, clear the setgit bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
* Called from set_acl inode operations.
*/
int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
struct posix_acl **acl)
{
umode_t mode = inode->i_mode;
int error;
error = posix_acl_equiv_mode(*acl, &mode);
if (error < 0)
return error;
if (error == 0)
*acl = NULL;
if (!in_group_p(inode->i_gid) &&
!capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
}
EXPORT_SYMBOL(posix_acl_update_mode);
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
static void posix_acl_fix_xattr_userns(
struct user_namespace *to, struct user_namespace *from,
void *value, size_t size)
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
int count;
kuid_t uid;
kgid_t gid;
if (!value)
return;
if (size < sizeof(struct posix_acl_xattr_header))
return;
if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
return;
count = posix_acl_xattr_count(size);
if (count < 0)
return;
if (count == 0)
return;
for (end = entry + count; entry != end; entry++) {
switch(le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(from, le32_to_cpu(entry->e_id));
entry->e_id = cpu_to_le32(from_kuid(to, uid));
break;
case ACL_GROUP:
gid = make_kgid(from, le32_to_cpu(entry->e_id));
entry->e_id = cpu_to_le32(from_kgid(to, gid));
break;
default:
break;
}
}
}
void posix_acl_fix_xattr_from_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
if (user_ns == &init_user_ns)
return;
posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
}
void posix_acl_fix_xattr_to_user(void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
if (user_ns == &init_user_ns)
return;
posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
}
/*
* Convert from extended attribute to in-memory representation.
*/
struct posix_acl *
posix_acl_from_xattr(struct user_namespace *user_ns,
const void *value, size_t size)
{
const struct posix_acl_xattr_header *header = value;
const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end;
int count;
struct posix_acl *acl;
struct posix_acl_entry *acl_e;
if (!value)
return NULL;
if (size < sizeof(struct posix_acl_xattr_header))
return ERR_PTR(-EINVAL);
if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
return ERR_PTR(-EOPNOTSUPP);
count = posix_acl_xattr_count(size);
if (count < 0)
return ERR_PTR(-EINVAL);
if (count == 0)
return NULL;
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
acl_e = acl->a_entries;
for (end = entry + count; entry != end; acl_e++, entry++) {
acl_e->e_tag = le16_to_cpu(entry->e_tag);
acl_e->e_perm = le16_to_cpu(entry->e_perm);
switch(acl_e->e_tag) {
case ACL_USER_OBJ:
case ACL_GROUP_OBJ:
case ACL_MASK:
case ACL_OTHER:
break;
case ACL_USER:
acl_e->e_uid =
make_kuid(user_ns,
le32_to_cpu(entry->e_id));
if (!uid_valid(acl_e->e_uid))
goto fail;
break;
case ACL_GROUP:
acl_e->e_gid =
make_kgid(user_ns,
le32_to_cpu(entry->e_id));
if (!gid_valid(acl_e->e_gid))
goto fail;
break;
default:
goto fail;
}
}
return acl;
fail:
posix_acl_release(acl);
return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL (posix_acl_from_xattr);
/*
* Convert from in-memory to extended attribute representation.
*/
int
posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
void *buffer, size_t size)
{
struct posix_acl_xattr_header *ext_acl = buffer;
struct posix_acl_xattr_entry *ext_entry;
int real_size, n;
real_size = posix_acl_xattr_size(acl->a_count);
if (!buffer)
return real_size;
if (real_size > size)
return -ERANGE;
ext_entry = (void *)(ext_acl + 1);
ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
for (n=0; n < acl->a_count; n++, ext_entry++) {
const struct posix_acl_entry *acl_e = &acl->a_entries[n];
ext_entry->e_tag = cpu_to_le16(acl_e->e_tag);
ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
switch(acl_e->e_tag) {
case ACL_USER:
ext_entry->e_id =
cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
break;
case ACL_GROUP:
ext_entry->e_id =
cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
break;
default:
ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
break;
}
}
return real_size;
}
EXPORT_SYMBOL (posix_acl_to_xattr);
static int
posix_acl_xattr_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *value, size_t size)
{
struct posix_acl *acl;
int error;
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
acl = get_acl(inode, handler->flags);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
return -ENODATA;
error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
posix_acl_release(acl);
return error;
}
int
set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
{
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
return -EOPNOTSUPP;
if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (acl) {
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull userns vfs updates from Eric Biederman: "This tree contains some very long awaited work on generalizing the user namespace support for mounting filesystems to include filesystems with a backing store. The real world target is fuse but the goal is to update the vfs to allow any filesystem to be supported. This patchset is based on a lot of code review and testing to approach that goal. While looking at what is needed to support the fuse filesystem it became clear that there were things like xattrs for security modules that needed special treatment. That the resolution of those concerns would not be fuse specific. That sorting out these general issues made most sense at the generic level, where the right people could be drawn into the conversation, and the issues could be solved for everyone. At a high level what this patchset does a couple of simple things: - Add a user namespace owner (s_user_ns) to struct super_block. - Teach the vfs to handle filesystem uids and gids not mapping into to kuids and kgids and being reported as INVALID_UID and INVALID_GID in vfs data structures. By assigning a user namespace owner filesystems that are mounted with only user namespace privilege can be detected. This allows security modules and the like to know which mounts may not be trusted. This also allows the set of uids and gids that are communicated to the filesystem to be capped at the set of kuids and kgids that are in the owning user namespace of the filesystem. One of the crazier corner casees this handles is the case of inodes whose i_uid or i_gid are not mapped into the vfs. Most of the code simply doesn't care but it is easy to confuse the inode writeback path so no operation that could cause an inode write-back is permitted for such inodes (aka only reads are allowed). This set of changes starts out by cleaning up the code paths involved in user namespace permirted mounts. Then when things are clean enough adds code that cleanly sets s_user_ns. Then additional restrictions are added that are possible now that the filesystem superblock contains owner information. These changes should not affect anyone in practice, but there are some parts of these restrictions that are changes in behavior. - Andy's restriction on suid executables that does not honor the suid bit when the path is from another mount namespace (think /proc/[pid]/fd/) or when the filesystem was mounted by a less privileged user. - The replacement of the user namespace implicit setting of MNT_NODEV with implicitly setting SB_I_NODEV on the filesystem superblock instead. Using SB_I_NODEV is a stronger form that happens to make this state user invisible. The user visibility can be managed but it caused problems when it was introduced from applications reasonably expecting mount flags to be what they were set to. There is a little bit of work remaining before it is safe to support mounting filesystems with backing store in user namespaces, beyond what is in this set of changes. - Verifying the mounter has permission to read/write the block device during mount. - Teaching the integrity modules IMA and EVM to handle filesystems mounted with only user namespace root and to reduce trust in their security xattrs accordingly. - Capturing the mounters credentials and using that for permission checks in d_automount and the like. (Given that overlayfs already does this, and we need the work in d_automount it make sense to generalize this case). Furthermore there are a few changes that are on the wishlist: - Get all filesystems supporting posix acls using the generic posix acls so that posix_acl_fix_xattr_from_user and posix_acl_fix_xattr_to_user may be removed. [Maintainability] - Reducing the permission checks in places such as remount to allow the superblock owner to perform them. - Allowing the superblock owner to chown files with unmapped uids and gids to something that is mapped so the files may be treated normally. I am not considering even obvious relaxations of permission checks until it is clear there are no more corner cases that need to be locked down and handled generically. Many thanks to Seth Forshee who kept this code alive, and putting up with me rewriting substantial portions of what he did to handle more corner cases, and for his diligent testing and reviewing of my changes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (30 commits) fs: Call d_automount with the filesystems creds fs: Update i_[ug]id_(read|write) to translate relative to s_user_ns evm: Translate user/group ids relative to s_user_ns when computing HMAC dquot: For now explicitly don't support filesystems outside of init_user_ns quota: Handle quota data stored in s_user_ns in quota_setxquota quota: Ensure qids map to the filesystem vfs: Don't create inodes with a uid or gid unknown to the vfs vfs: Don't modify inodes with a uid or gid unknown to the vfs cred: Reject inodes with invalid ids in set_create_file_as() fs: Check for invalid i_uid in may_follow_link() vfs: Verify acls are valid within superblock's s_user_ns. userns: Handle -1 in k[ug]id_has_mapping when !CONFIG_USER_NS fs: Refuse uid/gid changes which don't map into s_user_ns selinux: Add support for unprivileged mounts from user namespaces Smack: Handle labels consistently in untrusted mounts Smack: Add support for unprivileged mounts from user namespaces fs: Treat foreign mounts as nosuid fs: Limit file caps to the user namespace of the super block userns: Remove the now unnecessary FS_USERNS_DEV_MOUNT flag userns: Remove implicit MNT_NODEV fragility. ...
2016-07-29 15:54:19 -07:00
int ret = posix_acl_valid(inode->i_sb->s_user_ns, acl);
if (ret)
return ret;
}
return inode->i_op->set_acl(inode, acl, type);
}
EXPORT_SYMBOL(set_posix_acl);
static int
posix_acl_xattr_set(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
{
struct posix_acl *acl = NULL;
int ret;
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
ret = set_posix_acl(inode, handler->flags, acl);
posix_acl_release(acl);
return ret;
}
static bool
posix_acl_xattr_list(struct dentry *dentry)
{
return IS_POSIXACL(d_backing_inode(dentry));
}
const struct xattr_handler posix_acl_access_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
.set = posix_acl_xattr_set,
};
EXPORT_SYMBOL_GPL(posix_acl_access_xattr_handler);
const struct xattr_handler posix_acl_default_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
.set = posix_acl_xattr_set,
};
EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler);
int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
int error;
if (type == ACL_TYPE_ACCESS) {
error = posix_acl_update_mode(inode,
&inode->i_mode, &acl);
if (error)
return error;
}
inode->i_ctime = current_time(inode);
set_cached_acl(inode, type, acl);
return 0;
}
int simple_acl_create(struct inode *dir, struct inode *inode)
{
struct posix_acl *default_acl, *acl;
int error;
error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (error)
return error;
set_cached_acl(inode, ACL_TYPE_DEFAULT, default_acl);
set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
if (default_acl)
posix_acl_release(default_acl);
if (acl)
posix_acl_release(acl);
return 0;
}