mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
* google/upstream-f2fs-stable-linux-4.14.y: f2fs: fix to do sanity check on .cp_pack_total_block_count f2fs: make gc_urgent and gc_segment_mode sysfs node readable f2fs: use aggressive GC policy during f2fs_disable_checkpoint() f2fs: fix compressed file start atomic write may cause data corruption f2fs: initialize sbi->gc_mode explicitly f2fs: introduce gc_urgent_mid mode f2fs: compress: fix to print raw data size in error path of lz4 decompression f2fs: remove redundant parameter judgment f2fs: use spin_lock to avoid hang f2fs: don't get FREEZE lock in f2fs_evict_inode in frozen fs f2fs: remove unnecessary read for F2FS_FITS_IN_INODE f2fs: fix to do sanity check on curseg->alloc_type f2fs: fix to avoid potential deadlock f2fs: quota: fix loop condition at f2fs_quota_sync() f2fs: Restore rwsem lockdep support f2fs: fix missing free nid in f2fs_handle_failed_inode f2fs: add a way to limit roll forward recovery time f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy f2fs: adjust readahead block number during recovery f2fs: fix to unlock page correctly in error path of is_alive() f2fs: expose discard related parameters in sysfs f2fs: move discard parameters into discard_cmd_control f2fs: fix to enable ATGC correctly via gc_idle sysfs interface f2fs: move f2fs to use reader-unfair rwsems f2fs: do not allow partial truncation on pinned file f2fs: remove redunant invalidate compress pages f2fs: Simplify bool conversion f2fs: don't drop compressed page cache in .{invalidate,release}page f2fs: fix to reserve space for IO align feature f2fs: fix to check available space of CP area correctly in update_ckpt_flags() f2fs: support fault injection to f2fs_trylock_op() f2fs: clean up __find_inline_xattr() with __find_xattr() f2fs: fix to do sanity check on last xattr entry in __f2fs_setxattr() f2fs: do not bother checkpoint by f2fs_get_node_info f2fs: avoid down_write on nat_tree_lock during checkpoint f2fs: compress: fix potential deadlock of compress file f2fs: avoid EINVAL by SBI_NEED_FSCK when pinning a file f2fs: add gc_urgent_high_remaining sysfs node f2fs: fix to do sanity check in is_alive() f2fs: fix to avoid panic in is_alive() if metadata is inconsistent f2fs: fix to do sanity check on inode type during garbage collection f2fs: avoid duplicate call of mark_inode_dirty f2fs: fix remove page failed in invalidate compress pages f2fs: fix the f2fs_file_write_iter tracepoint f2fs: do not expose unwritten blocks to user by DIO f2fs: reduce indentation in f2fs_file_write_iter() f2fs: rework write preallocations f2fs: compress: reduce one page array alloc and free when write compressed page f2fs: show number of pending discard commands f2fs: check nr_pages for readahead f2fs: fix UAF in f2fs_available_free_memory f2fs: invalidate META_MAPPING before IPU/DIO write f2fs: support fault injection for dquot_initialize() f2fs: fix incorrect return value in f2fs_sanity_check_ckpt() f2fs: compress: disallow disabling compress on non-empty compressed file f2fs: compress: fix overwrite may reduce compress ratio unproperly f2fs: multidevice: support direct IO f2fs: introduce fragment allocation mode mount option f2fs: include non-compressed blocks in compr_written_block f2fs: fix wrong condition to trigger background checkpoint correctly f2fs: fix to use WHINT_MODE f2fs: fix up f2fs_lookup tracepoints f2fs: set SBI_NEED_FSCK flag when inconsistent node block found f2fs: introduce excess_dirty_threshold() f2fs: avoid attaching SB_ACTIVE flag during mount f2fs: quota: fix potential deadlock f2fs: should use GFP_NOFS for directory inodes f2fs: should put a page beyond EOF when preparing a write f2fs: deallocate compressed pages when error happens f2fs: enable realtime discard iff device supports discard f2fs: guarantee to write dirty data when enabling checkpoint back f2fs: fix to unmap pages from userspace process in punch_hole() f2fs: fix unexpected ENOENT comes from f2fs_map_blocks() f2fs: fix to account missing .skipped_gc_rwsem f2fs: adjust unlock order for cleanup f2fs: Don't create discard thread when device doesn't support realtime discard f2fs: rebuild nat_bits during umount f2fs: introduce periodic iostat io latency traces f2fs: separate out iostat feature f2fs: compress: do sanity check on cluster f2fs: fix description about main_blkaddr node f2fs: convert S_IRUGO to 0444 f2fs: fix to keep compatibility of fault injection interface f2fs: support fault injection for f2fs_kmem_cache_alloc() f2fs: compress: allow write compress released file after truncate to zero f2fs: correct comment in segment.h f2fs: improve sbi status info in debugfs/f2fs/status f2fs: compress: avoid duplicate counting of valid blocks when read compressed file f2fs: fix to do sanity check for sb/cp fields correctly f2fs: avoid unneeded memory allocation in __add_ino_entry() f2fs: extent cache: support unaligned extent f2fs: Kconfig: clean up config options about compression f2fs: reduce the scope of setting fsck tag when de->name_len is zero f2fs: fix to stop filesystem update once CP failed f2fs: introduce discard_unit mount option f2fs: fix min_seq_blocks can not make sense in some scenes. f2fs: fix to force keeping write barrier for strict fsync mode f2fs: fix wrong checkpoint_changed value in f2fs_remount() f2fs: show sbi status in debugfs/f2fs/status f2fs: turn back remapped address in compressed page endio f2fs: change fiemap way in printing compression chunk f2fs: do not submit NEW_ADDR to read node block f2fs: compress: remove unneeded read when rewrite whole cluster f2fs: don't sleep while grabing nat_tree_lock f2fs: remove allow_outplace_dio() f2fs: make f2fs_write_failed() take struct inode f2fs: quota: fix potential deadlock f2fs: let's keep writing IOs on SBI_NEED_FSCK f2fs: Revert "f2fs: Fix indefinite loop in f2fs_gc() v1" f2fs: avoid to create an empty string as the extension_list f2fs: compress: fix to set zstd compress level correctly f2fs: add sysfs nodes to get GC info for each GC mode f2fs: drop dirty node pages when cp is in error status f2fs: initialize page->private when using for our internal use f2fs: compress: add nocompress extensions support Revert "f2fs: avoid attaching SB_ACTIVE flag during mount/remount" f2fs: remove false alarm on iget failure during GC f2fs: enable extent cache for compression files in read-only f2fs: fix to avoid adding tab before doc section f2fs: introduce f2fs_casefolded_name slab cache f2fs: swap: support migrating swapfile in aligned write mode f2fs: swap: remove dead codes f2fs: compress: add compress_inode to cache compressed blocks f2fs: clean up /sys/fs/f2fs/<disk>/features f2fs: add pin_file in feature list f2fs: Advertise encrypted casefolding in sysfs f2fs: Show casefolding support only when supported f2fs: support RO feature f2fs: logging neatening f2fs: restructure f2fs page.private layout f2fs: introduce FI_COMPRESS_RELEASED instead of using IMMUTABLE bit f2fs: compress: remove unneeded preallocation f2fs: avoid attaching SB_ACTIVE flag during mount/remount f2fs: atgc: export entries for better tunability via sysfs f2fs: compress: fix to disallow temp extension f2fs: let's allow compression for mmap files f2fs: add MODULE_SOFTDEP to ensure crc32 is included in the initramfs f2fs: return success if there is no work to do f2fs: compress: clean up parameter of __f2fs_cluster_blocks() f2fs: compress: remove unneeded f2fs_put_dnode() f2fs: atgc: fix to set default age threshold f2fs: Prevent swap file in LFS mode f2fs: fix to avoid racing on fsync_entry_slab by multi filesystem instances f2fs: add cp_error check in f2fs_write_compressed_pages f2fs: compress: rename __cluster_may_compress f2fs: return EINVAL for hole cases in swap file f2fs: avoid swapon failure by giving a warning first f2fs: compress: fix to assign cc.cluster_idx correctly f2fs: compress: fix race condition of overwrite vs truncate f2fs: compress: fix to free compress page correctly f2fs: support iflag change given the mask f2fs: avoid null pointer access when handling IPU error f2fs: drop inplace IO if fs status is abnormal f2fs: compress: remove unneed check condition f2fs: clean up left deprecated IO trace codes f2fs: avoid using native allocate_segment_by_default() f2fs: remove unnecessary struct declaration f2fs: fix to avoid NULL pointer dereference f2fs: avoid duplicated codes for cleanup f2fs: document: add description about compressed space handling f2fs: clean up build warnings f2fs: fix the periodic wakeups of discard thread f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block() f2fs: fix to avoid GC/mmap race with f2fs_truncate() f2fs: set checkpoint_merge by default f2fs: Fix a hungtask problem in atomic write f2fs: fix to restrict mount condition on readonly block device f2fs: introduce gc_merge mount option f2fs: fix to cover __allocate_new_section() with curseg_lock f2fs: fix wrong alloc_type in f2fs_do_replace_block f2fs: delete empty compress.h f2fs: fix a typo in inode.c f2fs: allow to change discard policy based on cached discard cmds f2fs: fix to avoid touching checkpointed data in get_victim() f2fs: fix to update last i_size if fallocate partially succeeds f2fs: fix error path of f2fs_remount() f2fs: fix wrong comment of nat_tree_lock f2fs: fix to avoid out-of-bounds memory access f2fs: don't start checkpoint thread in readonly mountpoint f2fs: do not use AT_SSR mode in FG_GC & high urgent BG_GC f2fs: add sysfs nodes to get runtime compression stat f2fs: fix to use per-inode maxbytes in f2fs_fiemap f2fs: fix to align to section for fallocate() on pinned file f2fs: expose # of overprivision segments f2fs: fix error handling in f2fs_end_enable_verity() f2fs: fix a redundant call to f2fs_balance_fs if an error occurs f2fs: remove unused file_clear_encrypt() f2fs: check if swapfile is section-alligned f2fs: fix last_lblock check in check_swap_activate_fast f2fs: remove unnecessary IS_SWAPFILE check f2fs: Replace one-element array with flexible-array member f2fs: compress: Allow modular (de)compression algorithms f2fs: check discard command number before traversing discard pending list f2fs: update comments for explicit memory barrier f2fs: remove unused FORCE_FG_GC macro f2fs: avoid unused f2fs_show_compress_options() f2fs: fix panic during f2fs_resize_fs() f2fs: fix to allow migrating fully valid segment f2fs: fix a spelling error f2fs: fix a spacing coding style fs: Enable bmap() function to properly return errors f2fs: remove obsolete f2fs.txt fs-verity: support reading signature with ioctl fs-verity: support reading descriptor with ioctl fs-verity: support reading Merkle tree with ioctl fs-verity: add FS_IOC_READ_VERITY_METADATA ioctl fs-verity: don't pass whole descriptor to fsverity_verify_signature() fs-verity: factor out fsverity_get_descriptor() fs-verity: move structs needed for file signing to UAPI header fs-verity: rename "file measurement" to "file digest" fs-verity: rename fsverity_signed_digest to fsverity_formatted_digest fs-verity: remove filenames from file comments fs-verity: use smp_load_acquire() for ->i_verity_info f2fs: remove FAULT_ALLOC_BIO f2fs: use blkdev_issue_flush in __submit_flush_wait f2fs: remove a few bd_part checks quota: Cleanup list iteration in dqcache_shrink_scan() quota: reclaim least recently used dquots fs: quota: Replace GFP_ATOMIC with GFP_KERNEL in dquot_init quota: Check for register_shrinker() failure. quota: propagate error from __dquot_initialize quota: be aware of error from dquot_initialize Documentation: f2fs: fix typo s/automaic/automatic f2fs: give a warning only for readonly partition f2fs: don't grab superblock freeze for flush/ckpt thread f2fs: add ckpt_thread_ioprio sysfs node f2fs: introduce checkpoint_merge mount option f2fs: relocate inline conversion from mmap() to mkwrite() f2fs: fix a wrong condition in __submit_bio f2fs: remove unnecessary initialization in xattr.c f2fs: fix to avoid inconsistent quota data f2fs: flush data when enabling checkpoint back f2fs: deprecate f2fs_trace_io f2fs: remove unused stat_{inc, dec}_atomic_write f2fs: introduce sb_status sysfs node f2fs: fix to use per-inode maxbytes f2fs: compress: fix potential deadlock libfs: unexport generic_ci_d_compare() and generic_ci_d_hash() f2fs: fix to set/clear I_LINKABLE under i_lock f2fs: fix null page reference in redirty_blocks f2fs: clean up post-read processing f2fs: trival cleanup in move_data_block() f2fs: fix out-of-repair __setattr_copy() f2fs: fix to tag FIEMAP_EXTENT_MERGED in f2fs_fiemap() f2fs: introduce a new per-sb directory in sysfs f2fs: compress: support compress level f2fs: compress: deny setting unsupported compress algorithm f2fs: relocate f2fs_precache_extents() f2fs: enforce the immutable flag on open files f2fs: enhance to update i_mode and acl atomically in f2fs_setattr() f2fs: fix to set inode->i_mode correctly for posix_acl_update_mode f2fs: Replace expression with offsetof() f2fs: handle unallocated section and zone on pinned/atgc f2fs: compress: fix compression chksum f2fs: fix shift-out-of-bounds in sanity_check_raw_super() f2fs: fix race of pending_pages in decompression f2fs: fix to account inline xattr correctly during recovery f2fs: inline: fix wrong inline inode stat f2fs: inline: correct comment in f2fs_recover_inline_data f2fs: don't check PAGE_SIZE again in sanity_check_raw_super() f2fs: convert to F2FS_*_INO macro f2fs: introduce max_io_bytes, a sysfs entry, to limit bio size f2fs: don't allow any writes on readonly mount f2fs: avoid race condition for shrinker count f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE f2fs: add compress_mode mount option f2fs: Remove unnecessary unlikely() f2fs: init dirty_secmap incorrectly f2fs: remove buffer_head which has 32bits limit f2fs: fix wrong block count instead of bytes f2fs: use new conversion functions between blks and bytes f2fs: rename logical_to_blk and blk_to_logical f2fs: fix kbytes written stat for multi-device case f2fs: compress: support chksum f2fs: fix to avoid REQ_TIME and CP_TIME collision f2fs: change to use rwsem for cp_mutex f2fs: Handle casefolding with Encryption fscrypt: Have filesystems handle their d_ops libfs: Add generic function for setting dentry_ops f2fs: Remove the redundancy initialization f2fs: remove writeback_inodes_sb in f2fs_remount f2fs: fix double free of unicode map f2fs: fix compat F2FS_IOC_{MOVE,GARBAGE_COLLECT}_RANGE f2fs: avoid unneeded data copy in f2fs_ioc_move_range() f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl f2fs: add F2FS_IOC_GET_COMPRESS_OPTION ioctl f2fs: move ioctl interface definitions to separated file f2fs: fix to seek incorrect data offset in inline data file f2fs: check fiemap parameters f2fs: call f2fs_get_meta_page_retry for nat page fscrypt: rename DCACHE_ENCRYPTED_NAME to DCACHE_NOKEY_NAME fscrypt: don't call no-key names "ciphertext names" fscrypt: export fscrypt_d_revalidate() f2fs: code cleanup by removing unnecessary check f2fs: wait for sysfs kobject removal before freeing f2fs_sb_info f2fs: fix writecount false positive in releasing compress blocks f2fs: introduce check_swap_activate_fast() f2fs: don't issue flush in f2fs_flush_device_cache() for nobarrier case f2fs: handle errors of f2fs_get_meta_page_nofail f2fs: fix to set SBI_NEED_FSCK flag for inconsistent inode f2fs: reject CASEFOLD inode flag without casefold feature f2fs: fix memory alignment to support 32bit f2fs: fix slab leak of rpages pointer f2fs: compress: fix to disallow enabling compress on non-empty file f2fs: compress: introduce cic/dic slab cache f2fs: compress: introduce page array slab cache f2fs: fix to do sanity check on segment/section count f2fs: fix to check segment boundary during SIT page readahead f2fs: fix uninit-value in f2fs_lookup fs/buffer.c: record blockdev write errors in super_block that it backs vfs: track per-sb writeback errors and report them to syncfs f2fs: remove unneeded parameter in find_in_block() f2fs: fix wrong total_sections check and fsmeta check f2fs: remove duplicated code in sanity_check_area_boundary f2fs: remove unused check on version_bitmap f2fs: relocate blkzoned feature check f2fs: do sanity check on zoned block device path f2fs: add trace exit in exception path f2fs: change return value of reserved_segments to unsigned int f2fs: clean up kvfree f2fs: change virtual mapping way for compression pages f2fs: change return value of f2fs_disable_compressed_file to bool f2fs: change i_compr_blocks of inode to atomic value f2fs: ignore compress mount option on image w/o compression feature f2fs: allocate proper size memory for zstd decompress f2fs: change compr_blocks of superblock info to 64bit f2fs: add block address limit check to compressed file f2fs: check position in move range ioctl f2fs: correct statistic of APP_DIRECT_IO/APP_DIRECT_READ_IO f2fs: support age threshold based garbage collection f2fs: Use generic casefolding support fs: Add standard casefolding support unicode: Add utf8_casefold_hash f2fs: compress: use more readable atomic_t type for {cic,dic}.ref f2fs: fix compile warning f2fs: support 64-bits key in f2fs rb-tree node entry f2fs: inherit mtime of original block during GC f2fs: record average update time of segment f2fs: introduce inmem curseg f2fs: compress: remove unneeded code f2fs: remove duplicated type casting f2fs: support zone capacity less than zone size f2fs: update changes in upstream on GC_URGENT_HIGH f2fs: Return EOF on unaligned end of file DIO read f2fs: fix indefinite loop scanning for free nid f2fs: Fix type of section block count variables f2fs: prepare a waiter before entering io_schedule f2fs: update_sit_entry: Make the judgment condition of f2fs_bug_on more intuitive f2fs: replace test_and_set/clear_bit() with set/clear_bit() f2fs: make file immutable even if releasing zero compression block f2fs: compress: disable compression mount option if compression is off f2fs: compress: add sanity check during compressed cluster read f2fs: use macro instead of f2fs verity version f2fs: fix deadlock between quota writes and checkpoint f2fs: correct comment of f2fs_exist_written_data f2fs: compress: delay temp page allocation f2fs: compress: fix to update isize when overwriting compressed file f2fs: space related cleanup f2fs: fix use-after-free issue f2fs: Change the type of f2fs_flush_inline_data() to void f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl f2fs: segment.h: delete a duplicated word f2fs: compress: fix to avoid memory leak on cc->cpages f2fs: use generic names for generic ioctls f2fs: don't keep meta inode pages used for compressed block migration f2fs: fix error path in do_recover_data() f2fs: fix to wait GCed compressed page writeback f2fs: remove write attribute of main_blkaddr sysfs node f2fs: add GC_URGENT_LOW mode in gc_urgent f2fs: avoid readahead race condition f2fs: fix return value of move_data_block() f2fs: add parameter op_flag in f2fs_submit_page_read() f2fs: split f2fs_allocate_new_segments() f2fs: lost matching-pair of trace in f2fs_truncate_inode_blocks f2fs: fix an oops in f2fs_is_compressed_page f2fs: make trace enter and end in pairs for unlink f2fs: fix to check page dirty status before writeback f2fs: remove the unused compr parameter f2fs: support to trace f2fs_fiemap() f2fs: support to trace f2fs_bmap() f2fs: fix wrong return value of f2fs_bmap_compress() f2fs: remove useless parameter of __insert_free_nid() f2fs: fix typo in comment of f2fs_do_add_link f2fs: fix to wait page writeback before update f2fs: show more debug info for per-temperature log f2fs: add f2fs_gc exception handle in f2fs_ioc_gc_range f2fs: clean up parameter of f2fs_allocate_data_block() f2fs: shrink node_write lock coverage f2fs: add prefix for exported symbols f2fs: use kfree() to free variables allocated by match_strdup() f2fs: get the right gc victim section when section has several segments f2fs: fix a race condition between f2fs_write_end_io and f2fs_del_fsync_node_entry f2fs: remove useless truncate in f2fs_collapse_range() f2fs: use kfree() instead of kvfree() to free superblock data f2fs: avoid checkpatch error f2fs: should avoid inode eviction in synchronous path
1262 lines
30 KiB
C
1262 lines
30 KiB
C
/*
|
|
* linux/fs/open.c
|
|
*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fdtable.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/module.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/securebits.h>
|
|
#include <linux/security.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/falloc.h>
|
|
#include <linux/fs_struct.h>
|
|
#include <linux/ima.h>
|
|
#include <linux/dnotify.h>
|
|
#include <linux/compat.h>
|
|
|
|
#include "internal.h"
|
|
|
|
int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
|
|
unsigned int time_attrs, struct file *filp)
|
|
{
|
|
int ret;
|
|
struct iattr newattrs;
|
|
|
|
/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
|
|
if (length < 0)
|
|
return -EINVAL;
|
|
|
|
newattrs.ia_size = length;
|
|
newattrs.ia_valid = ATTR_SIZE | time_attrs;
|
|
if (filp) {
|
|
newattrs.ia_file = filp;
|
|
newattrs.ia_valid |= ATTR_FILE;
|
|
}
|
|
|
|
/* Remove suid, sgid, and file capabilities on truncate too */
|
|
ret = dentry_needs_remove_privs(dentry);
|
|
if (ret < 0)
|
|
return ret;
|
|
if (ret)
|
|
newattrs.ia_valid |= ret | ATTR_FORCE;
|
|
|
|
inode_lock(dentry->d_inode);
|
|
/* Note any delegations or leases have already been broken: */
|
|
ret = notify_change2(mnt, dentry, &newattrs, NULL);
|
|
inode_unlock(dentry->d_inode);
|
|
return ret;
|
|
}
|
|
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
|
|
struct file *filp)
|
|
{
|
|
return do_truncate2(NULL, dentry, length, time_attrs, filp);
|
|
}
|
|
|
|
long vfs_truncate(const struct path *path, loff_t length)
|
|
{
|
|
struct inode *inode;
|
|
struct vfsmount *mnt;
|
|
struct dentry *upperdentry;
|
|
long error;
|
|
|
|
inode = path->dentry->d_inode;
|
|
mnt = path->mnt;
|
|
|
|
/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
|
|
if (S_ISDIR(inode->i_mode))
|
|
return -EISDIR;
|
|
if (!S_ISREG(inode->i_mode))
|
|
return -EINVAL;
|
|
|
|
error = mnt_want_write(path->mnt);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = inode_permission2(mnt, inode, MAY_WRITE);
|
|
if (error)
|
|
goto mnt_drop_write_and_out;
|
|
|
|
error = -EPERM;
|
|
if (IS_APPEND(inode))
|
|
goto mnt_drop_write_and_out;
|
|
|
|
/*
|
|
* If this is an overlayfs then do as if opening the file so we get
|
|
* write access on the upper inode, not on the overlay inode. For
|
|
* non-overlay filesystems d_real() is an identity function.
|
|
*/
|
|
upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
|
|
error = PTR_ERR(upperdentry);
|
|
if (IS_ERR(upperdentry))
|
|
goto mnt_drop_write_and_out;
|
|
|
|
error = get_write_access(upperdentry->d_inode);
|
|
if (error)
|
|
goto mnt_drop_write_and_out;
|
|
|
|
/*
|
|
* Make sure that there are no leases. get_write_access() protects
|
|
* against the truncate racing with a lease-granting setlease().
|
|
*/
|
|
error = break_lease(inode, O_WRONLY);
|
|
if (error)
|
|
goto put_write_and_out;
|
|
|
|
error = locks_verify_truncate(inode, NULL, length);
|
|
if (!error)
|
|
error = security_path_truncate(path);
|
|
if (!error)
|
|
error = do_truncate2(mnt, path->dentry, length, 0, NULL);
|
|
|
|
put_write_and_out:
|
|
put_write_access(upperdentry->d_inode);
|
|
mnt_drop_write_and_out:
|
|
mnt_drop_write(path->mnt);
|
|
out:
|
|
return error;
|
|
}
|
|
EXPORT_SYMBOL_GPL(vfs_truncate);
|
|
|
|
static long do_sys_truncate(const char __user *pathname, loff_t length)
|
|
{
|
|
unsigned int lookup_flags = LOOKUP_FOLLOW;
|
|
struct path path;
|
|
int error;
|
|
|
|
if (length < 0) /* sorry, but loff_t says... */
|
|
return -EINVAL;
|
|
|
|
retry:
|
|
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
|
|
if (!error) {
|
|
error = vfs_truncate(&path, length);
|
|
path_put(&path);
|
|
}
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
|
|
{
|
|
return do_sys_truncate(path, length);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
|
|
{
|
|
return do_sys_truncate(path, length);
|
|
}
|
|
#endif
|
|
|
|
static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
|
|
{
|
|
struct inode *inode;
|
|
struct dentry *dentry;
|
|
struct vfsmount *mnt;
|
|
struct fd f;
|
|
int error;
|
|
|
|
error = -EINVAL;
|
|
if (length < 0)
|
|
goto out;
|
|
error = -EBADF;
|
|
f = fdget(fd);
|
|
if (!f.file)
|
|
goto out;
|
|
|
|
/* explicitly opened as large or we are on 64-bit box */
|
|
if (f.file->f_flags & O_LARGEFILE)
|
|
small = 0;
|
|
|
|
dentry = f.file->f_path.dentry;
|
|
mnt = f.file->f_path.mnt;
|
|
inode = dentry->d_inode;
|
|
error = -EINVAL;
|
|
if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
|
|
goto out_putf;
|
|
|
|
error = -EINVAL;
|
|
/* Cannot ftruncate over 2^31 bytes without large file support */
|
|
if (small && length > MAX_NON_LFS)
|
|
goto out_putf;
|
|
|
|
error = -EPERM;
|
|
/* Check IS_APPEND on real upper inode */
|
|
if (IS_APPEND(file_inode(f.file)))
|
|
goto out_putf;
|
|
|
|
sb_start_write(inode->i_sb);
|
|
error = locks_verify_truncate(inode, f.file, length);
|
|
if (!error)
|
|
error = security_path_truncate(&f.file->f_path);
|
|
if (!error)
|
|
error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
|
|
sb_end_write(inode->i_sb);
|
|
out_putf:
|
|
fdput(f);
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
|
|
{
|
|
return do_sys_ftruncate(fd, length, 1);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
|
|
{
|
|
return do_sys_ftruncate(fd, length, 1);
|
|
}
|
|
#endif
|
|
|
|
/* LFS versions of truncate are only needed on 32 bit machines */
|
|
#if BITS_PER_LONG == 32
|
|
SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
|
|
{
|
|
return do_sys_truncate(path, length);
|
|
}
|
|
|
|
SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
|
|
{
|
|
return do_sys_ftruncate(fd, length, 0);
|
|
}
|
|
#endif /* BITS_PER_LONG == 32 */
|
|
|
|
|
|
int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
long ret;
|
|
|
|
if (offset < 0 || len <= 0)
|
|
return -EINVAL;
|
|
|
|
/* Return error if mode is not supported */
|
|
if (mode & ~FALLOC_FL_SUPPORTED_MASK)
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Punch hole and zero range are mutually exclusive */
|
|
if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
|
|
(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Punch hole must have keep size set */
|
|
if ((mode & FALLOC_FL_PUNCH_HOLE) &&
|
|
!(mode & FALLOC_FL_KEEP_SIZE))
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Collapse range should only be used exclusively. */
|
|
if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
|
|
(mode & ~FALLOC_FL_COLLAPSE_RANGE))
|
|
return -EINVAL;
|
|
|
|
/* Insert range should only be used exclusively. */
|
|
if ((mode & FALLOC_FL_INSERT_RANGE) &&
|
|
(mode & ~FALLOC_FL_INSERT_RANGE))
|
|
return -EINVAL;
|
|
|
|
/* Unshare range should only be used with allocate mode. */
|
|
if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
|
|
(mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
|
|
return -EINVAL;
|
|
|
|
if (!(file->f_mode & FMODE_WRITE))
|
|
return -EBADF;
|
|
|
|
/*
|
|
* We can only allow pure fallocate on append only files
|
|
*/
|
|
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
|
|
return -EPERM;
|
|
|
|
if (IS_IMMUTABLE(inode))
|
|
return -EPERM;
|
|
|
|
/*
|
|
* We cannot allow any fallocate operation on an active swapfile
|
|
*/
|
|
if (IS_SWAPFILE(inode))
|
|
return -ETXTBSY;
|
|
|
|
/*
|
|
* Revalidate the write permissions, in case security policy has
|
|
* changed since the files were opened.
|
|
*/
|
|
ret = security_file_permission(file, MAY_WRITE);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (S_ISFIFO(inode->i_mode))
|
|
return -ESPIPE;
|
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
return -EISDIR;
|
|
|
|
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
|
|
return -ENODEV;
|
|
|
|
/* Check for wrap through zero too */
|
|
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
|
|
return -EFBIG;
|
|
|
|
if (!file->f_op->fallocate)
|
|
return -EOPNOTSUPP;
|
|
|
|
file_start_write(file);
|
|
ret = file->f_op->fallocate(file, mode, offset, len);
|
|
|
|
/*
|
|
* Create inotify and fanotify events.
|
|
*
|
|
* To keep the logic simple always create events if fallocate succeeds.
|
|
* This implies that events are even created if the file size remains
|
|
* unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
|
|
*/
|
|
if (ret == 0)
|
|
fsnotify_modify(file);
|
|
|
|
file_end_write(file);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(vfs_fallocate);
|
|
|
|
SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
|
|
{
|
|
struct fd f = fdget(fd);
|
|
int error = -EBADF;
|
|
|
|
if (f.file) {
|
|
error = vfs_fallocate(f.file, mode, offset, len);
|
|
fdput(f);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* access() needs to use the real uid/gid, not the effective uid/gid.
|
|
* We do this by temporarily clearing all FS-related capabilities and
|
|
* switching the fsuid/fsgid around to the real ones.
|
|
*/
|
|
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
|
|
{
|
|
const struct cred *old_cred;
|
|
struct cred *override_cred;
|
|
struct path path;
|
|
struct inode *inode;
|
|
struct vfsmount *mnt;
|
|
int res;
|
|
unsigned int lookup_flags = LOOKUP_FOLLOW;
|
|
|
|
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
|
|
return -EINVAL;
|
|
|
|
override_cred = prepare_creds();
|
|
if (!override_cred)
|
|
return -ENOMEM;
|
|
|
|
override_cred->fsuid = override_cred->uid;
|
|
override_cred->fsgid = override_cred->gid;
|
|
|
|
if (!issecure(SECURE_NO_SETUID_FIXUP)) {
|
|
/* Clear the capabilities if we switch to a non-root user */
|
|
kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
|
|
if (!uid_eq(override_cred->uid, root_uid))
|
|
cap_clear(override_cred->cap_effective);
|
|
else
|
|
override_cred->cap_effective =
|
|
override_cred->cap_permitted;
|
|
}
|
|
|
|
/*
|
|
* The new set of credentials can *only* be used in
|
|
* task-synchronous circumstances, and does not need
|
|
* RCU freeing, unless somebody then takes a separate
|
|
* reference to it.
|
|
*
|
|
* NOTE! This is _only_ true because this credential
|
|
* is used purely for override_creds() that installs
|
|
* it as the subjective cred. Other threads will be
|
|
* accessing ->real_cred, not the subjective cred.
|
|
*
|
|
* If somebody _does_ make a copy of this (using the
|
|
* 'get_current_cred()' function), that will clear the
|
|
* non_rcu field, because now that other user may be
|
|
* expecting RCU freeing. But normal thread-synchronous
|
|
* cred accesses will keep things non-RCY.
|
|
*/
|
|
override_cred->non_rcu = 1;
|
|
|
|
old_cred = override_creds(override_cred);
|
|
retry:
|
|
res = user_path_at(dfd, filename, lookup_flags, &path);
|
|
if (res)
|
|
goto out;
|
|
|
|
inode = d_backing_inode(path.dentry);
|
|
mnt = path.mnt;
|
|
|
|
if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
|
|
/*
|
|
* MAY_EXEC on regular files is denied if the fs is mounted
|
|
* with the "noexec" flag.
|
|
*/
|
|
res = -EACCES;
|
|
if (path_noexec(&path))
|
|
goto out_path_release;
|
|
}
|
|
|
|
res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
|
|
/* SuS v2 requires we report a read only fs too */
|
|
if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
|
|
goto out_path_release;
|
|
/*
|
|
* This is a rare case where using __mnt_is_readonly()
|
|
* is OK without a mnt_want/drop_write() pair. Since
|
|
* no actual write to the fs is performed here, we do
|
|
* not need to telegraph to that to anyone.
|
|
*
|
|
* By doing this, we accept that this access is
|
|
* inherently racy and know that the fs may change
|
|
* state before we even see this result.
|
|
*/
|
|
if (__mnt_is_readonly(path.mnt))
|
|
res = -EROFS;
|
|
|
|
out_path_release:
|
|
path_put(&path);
|
|
if (retry_estale(res, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
out:
|
|
revert_creds(old_cred);
|
|
put_cred(override_cred);
|
|
return res;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
|
|
{
|
|
return sys_faccessat(AT_FDCWD, filename, mode);
|
|
}
|
|
|
|
SYSCALL_DEFINE1(chdir, const char __user *, filename)
|
|
{
|
|
struct path path;
|
|
int error;
|
|
unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
|
|
retry:
|
|
error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
|
|
if (error)
|
|
goto dput_and_out;
|
|
|
|
set_fs_pwd(current->fs, &path);
|
|
|
|
dput_and_out:
|
|
path_put(&path);
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
|
|
{
|
|
struct fd f = fdget_raw(fd);
|
|
struct vfsmount *mnt;
|
|
int error;
|
|
|
|
error = -EBADF;
|
|
if (!f.file)
|
|
goto out;
|
|
|
|
mnt = f.file->f_path.mnt;
|
|
|
|
error = -ENOTDIR;
|
|
if (!d_can_lookup(f.file->f_path.dentry))
|
|
goto out_putf;
|
|
|
|
error = inode_permission2(mnt, file_inode(f.file), MAY_EXEC | MAY_CHDIR);
|
|
if (!error)
|
|
set_fs_pwd(current->fs, &f.file->f_path);
|
|
out_putf:
|
|
fdput(f);
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(chroot, const char __user *, filename)
|
|
{
|
|
struct path path;
|
|
int error;
|
|
unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
|
|
retry:
|
|
error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
|
|
if (error)
|
|
goto dput_and_out;
|
|
|
|
error = -EPERM;
|
|
if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
|
|
goto dput_and_out;
|
|
error = security_path_chroot(&path);
|
|
if (error)
|
|
goto dput_and_out;
|
|
|
|
set_fs_root(current->fs, &path);
|
|
error = 0;
|
|
dput_and_out:
|
|
path_put(&path);
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
static int chmod_common(const struct path *path, umode_t mode)
|
|
{
|
|
struct inode *inode = path->dentry->d_inode;
|
|
struct inode *delegated_inode = NULL;
|
|
struct iattr newattrs;
|
|
int error;
|
|
|
|
error = mnt_want_write(path->mnt);
|
|
if (error)
|
|
return error;
|
|
retry_deleg:
|
|
inode_lock(inode);
|
|
error = security_path_chmod(path, mode);
|
|
if (error)
|
|
goto out_unlock;
|
|
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
|
|
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
|
|
error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
|
|
out_unlock:
|
|
inode_unlock(inode);
|
|
if (delegated_inode) {
|
|
error = break_deleg_wait(&delegated_inode);
|
|
if (!error)
|
|
goto retry_deleg;
|
|
}
|
|
mnt_drop_write(path->mnt);
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
|
|
{
|
|
struct fd f = fdget(fd);
|
|
int err = -EBADF;
|
|
|
|
if (f.file) {
|
|
audit_file(f.file);
|
|
err = chmod_common(&f.file->f_path, mode);
|
|
fdput(f);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
|
|
{
|
|
struct path path;
|
|
int error;
|
|
unsigned int lookup_flags = LOOKUP_FOLLOW;
|
|
retry:
|
|
error = user_path_at(dfd, filename, lookup_flags, &path);
|
|
if (!error) {
|
|
error = chmod_common(&path, mode);
|
|
path_put(&path);
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
|
|
{
|
|
return sys_fchmodat(AT_FDCWD, filename, mode);
|
|
}
|
|
|
|
static int chown_common(const struct path *path, uid_t user, gid_t group)
|
|
{
|
|
struct inode *inode = path->dentry->d_inode;
|
|
struct inode *delegated_inode = NULL;
|
|
int error;
|
|
struct iattr newattrs;
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
|
|
uid = make_kuid(current_user_ns(), user);
|
|
gid = make_kgid(current_user_ns(), group);
|
|
|
|
retry_deleg:
|
|
newattrs.ia_valid = ATTR_CTIME;
|
|
if (user != (uid_t) -1) {
|
|
if (!uid_valid(uid))
|
|
return -EINVAL;
|
|
newattrs.ia_valid |= ATTR_UID;
|
|
newattrs.ia_uid = uid;
|
|
}
|
|
if (group != (gid_t) -1) {
|
|
if (!gid_valid(gid))
|
|
return -EINVAL;
|
|
newattrs.ia_valid |= ATTR_GID;
|
|
newattrs.ia_gid = gid;
|
|
}
|
|
if (!S_ISDIR(inode->i_mode))
|
|
newattrs.ia_valid |=
|
|
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
|
|
inode_lock(inode);
|
|
error = security_path_chown(path, uid, gid);
|
|
if (!error)
|
|
error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
|
|
inode_unlock(inode);
|
|
if (delegated_inode) {
|
|
error = break_deleg_wait(&delegated_inode);
|
|
if (!error)
|
|
goto retry_deleg;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
|
|
gid_t, group, int, flag)
|
|
{
|
|
struct path path;
|
|
int error = -EINVAL;
|
|
int lookup_flags;
|
|
|
|
if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
|
|
goto out;
|
|
|
|
lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
|
|
if (flag & AT_EMPTY_PATH)
|
|
lookup_flags |= LOOKUP_EMPTY;
|
|
retry:
|
|
error = user_path_at(dfd, filename, lookup_flags, &path);
|
|
if (error)
|
|
goto out;
|
|
error = mnt_want_write(path.mnt);
|
|
if (error)
|
|
goto out_release;
|
|
error = chown_common(&path, user, group);
|
|
mnt_drop_write(path.mnt);
|
|
out_release:
|
|
path_put(&path);
|
|
if (retry_estale(error, lookup_flags)) {
|
|
lookup_flags |= LOOKUP_REVAL;
|
|
goto retry;
|
|
}
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
|
|
{
|
|
return sys_fchownat(AT_FDCWD, filename, user, group, 0);
|
|
}
|
|
|
|
SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
|
|
{
|
|
return sys_fchownat(AT_FDCWD, filename, user, group,
|
|
AT_SYMLINK_NOFOLLOW);
|
|
}
|
|
|
|
SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
|
|
{
|
|
struct fd f = fdget(fd);
|
|
int error = -EBADF;
|
|
|
|
if (!f.file)
|
|
goto out;
|
|
|
|
error = mnt_want_write_file_path(f.file);
|
|
if (error)
|
|
goto out_fput;
|
|
audit_file(f.file);
|
|
error = chown_common(&f.file->f_path, user, group);
|
|
mnt_drop_write_file_path(f.file);
|
|
out_fput:
|
|
fdput(f);
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
int open_check_o_direct(struct file *f)
|
|
{
|
|
/* NB: we're sure to have correct a_ops only after f_op->open */
|
|
if (f->f_flags & O_DIRECT) {
|
|
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
|
|
return -EINVAL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int do_dentry_open(struct file *f,
|
|
struct inode *inode,
|
|
int (*open)(struct inode *, struct file *),
|
|
const struct cred *cred)
|
|
{
|
|
static const struct file_operations empty_fops = {};
|
|
int error;
|
|
|
|
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
|
|
FMODE_PREAD | FMODE_PWRITE;
|
|
|
|
path_get(&f->f_path);
|
|
f->f_inode = inode;
|
|
f->f_mapping = inode->i_mapping;
|
|
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
|
|
f->f_sb_err = file_sample_sb_err(f);
|
|
|
|
if (unlikely(f->f_flags & O_PATH)) {
|
|
f->f_mode = FMODE_PATH;
|
|
f->f_op = &empty_fops;
|
|
return 0;
|
|
}
|
|
|
|
/* Any file opened for execve()/uselib() has to be a regular file. */
|
|
if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
|
|
error = -EACCES;
|
|
goto cleanup_file;
|
|
}
|
|
|
|
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
|
|
error = get_write_access(inode);
|
|
if (unlikely(error))
|
|
goto cleanup_file;
|
|
error = __mnt_want_write(f->f_path.mnt);
|
|
if (unlikely(error)) {
|
|
put_write_access(inode);
|
|
goto cleanup_file;
|
|
}
|
|
f->f_mode |= FMODE_WRITER;
|
|
}
|
|
|
|
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
|
|
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
|
|
f->f_mode |= FMODE_ATOMIC_POS;
|
|
|
|
f->f_op = fops_get(inode->i_fop);
|
|
if (unlikely(WARN_ON(!f->f_op))) {
|
|
error = -ENODEV;
|
|
goto cleanup_all;
|
|
}
|
|
|
|
error = security_file_open(f, cred);
|
|
if (error)
|
|
goto cleanup_all;
|
|
|
|
error = break_lease(locks_inode(f), f->f_flags);
|
|
if (error)
|
|
goto cleanup_all;
|
|
|
|
if (!open)
|
|
open = f->f_op->open;
|
|
if (open) {
|
|
error = open(inode, f);
|
|
if (error)
|
|
goto cleanup_all;
|
|
}
|
|
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
|
|
i_readcount_inc(inode);
|
|
if ((f->f_mode & FMODE_READ) &&
|
|
likely(f->f_op->read || f->f_op->read_iter))
|
|
f->f_mode |= FMODE_CAN_READ;
|
|
if ((f->f_mode & FMODE_WRITE) &&
|
|
likely(f->f_op->write || f->f_op->write_iter))
|
|
f->f_mode |= FMODE_CAN_WRITE;
|
|
|
|
f->f_write_hint = WRITE_LIFE_NOT_SET;
|
|
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
|
|
|
|
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
|
|
|
|
return 0;
|
|
|
|
cleanup_all:
|
|
fops_put(f->f_op);
|
|
if (f->f_mode & FMODE_WRITER) {
|
|
put_write_access(inode);
|
|
__mnt_drop_write(f->f_path.mnt);
|
|
}
|
|
cleanup_file:
|
|
path_put(&f->f_path);
|
|
f->f_path.mnt = NULL;
|
|
f->f_path.dentry = NULL;
|
|
f->f_inode = NULL;
|
|
return error;
|
|
}
|
|
|
|
/**
|
|
* finish_open - finish opening a file
|
|
* @file: file pointer
|
|
* @dentry: pointer to dentry
|
|
* @open: open callback
|
|
* @opened: state of open
|
|
*
|
|
* This can be used to finish opening a file passed to i_op->atomic_open().
|
|
*
|
|
* If the open callback is set to NULL, then the standard f_op->open()
|
|
* filesystem callback is substituted.
|
|
*
|
|
* NB: the dentry reference is _not_ consumed. If, for example, the dentry is
|
|
* the return value of d_splice_alias(), then the caller needs to perform dput()
|
|
* on it after finish_open().
|
|
*
|
|
* Returns zero on success or -errno if the open failed.
|
|
*/
|
|
int finish_open(struct file *file, struct dentry *dentry,
|
|
int (*open)(struct inode *, struct file *),
|
|
int *opened)
|
|
{
|
|
int error;
|
|
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
|
|
|
|
file->f_path.dentry = dentry;
|
|
error = do_dentry_open(file, d_backing_inode(dentry), open,
|
|
current_cred());
|
|
if (!error)
|
|
*opened |= FILE_OPENED;
|
|
|
|
return error;
|
|
}
|
|
EXPORT_SYMBOL(finish_open);
|
|
|
|
/**
|
|
* finish_no_open - finish ->atomic_open() without opening the file
|
|
*
|
|
* @file: file pointer
|
|
* @dentry: dentry or NULL (as returned from ->lookup())
|
|
*
|
|
* This can be used to set the result of a successful lookup in ->atomic_open().
|
|
*
|
|
* NB: unlike finish_open() this function does consume the dentry reference and
|
|
* the caller need not dput() it.
|
|
*
|
|
* Returns "1" which must be the return value of ->atomic_open() after having
|
|
* called this function.
|
|
*/
|
|
int finish_no_open(struct file *file, struct dentry *dentry)
|
|
{
|
|
file->f_path.dentry = dentry;
|
|
return 1;
|
|
}
|
|
EXPORT_SYMBOL(finish_no_open);
|
|
|
|
char *file_path(struct file *filp, char *buf, int buflen)
|
|
{
|
|
return d_path(&filp->f_path, buf, buflen);
|
|
}
|
|
EXPORT_SYMBOL(file_path);
|
|
|
|
/**
|
|
* vfs_open - open the file at the given path
|
|
* @path: path to open
|
|
* @file: newly allocated file with f_flag initialized
|
|
* @cred: credentials to use
|
|
*/
|
|
int vfs_open(const struct path *path, struct file *file,
|
|
const struct cred *cred)
|
|
{
|
|
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
|
|
|
|
if (IS_ERR(dentry))
|
|
return PTR_ERR(dentry);
|
|
|
|
file->f_path = *path;
|
|
return do_dentry_open(file, d_backing_inode(dentry), NULL, cred);
|
|
}
|
|
|
|
struct file *dentry_open(const struct path *path, int flags,
|
|
const struct cred *cred)
|
|
{
|
|
int error;
|
|
struct file *f;
|
|
|
|
validate_creds(cred);
|
|
|
|
/* We must always pass in a valid mount pointer. */
|
|
BUG_ON(!path->mnt);
|
|
|
|
f = get_empty_filp();
|
|
if (!IS_ERR(f)) {
|
|
f->f_flags = flags;
|
|
error = vfs_open(path, f, cred);
|
|
if (!error) {
|
|
/* from now on we need fput() to dispose of f */
|
|
error = open_check_o_direct(f);
|
|
if (error) {
|
|
fput(f);
|
|
f = ERR_PTR(error);
|
|
}
|
|
} else {
|
|
put_filp(f);
|
|
f = ERR_PTR(error);
|
|
}
|
|
}
|
|
return f;
|
|
}
|
|
EXPORT_SYMBOL(dentry_open);
|
|
|
|
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
|
|
{
|
|
int lookup_flags = 0;
|
|
int acc_mode = ACC_MODE(flags);
|
|
|
|
/*
|
|
* Clear out all open flags we don't know about so that we don't report
|
|
* them in fcntl(F_GETFD) or similar interfaces.
|
|
*/
|
|
flags &= VALID_OPEN_FLAGS;
|
|
|
|
if (flags & (O_CREAT | __O_TMPFILE))
|
|
op->mode = (mode & S_IALLUGO) | S_IFREG;
|
|
else
|
|
op->mode = 0;
|
|
|
|
/* Must never be set by userspace */
|
|
flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
|
|
|
|
/*
|
|
* O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
|
|
* check for O_DSYNC if the need any syncing at all we enforce it's
|
|
* always set instead of having to deal with possibly weird behaviour
|
|
* for malicious applications setting only __O_SYNC.
|
|
*/
|
|
if (flags & __O_SYNC)
|
|
flags |= O_DSYNC;
|
|
|
|
if (flags & __O_TMPFILE) {
|
|
if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
|
|
return -EINVAL;
|
|
if (!(acc_mode & MAY_WRITE))
|
|
return -EINVAL;
|
|
} else if (flags & O_PATH) {
|
|
/*
|
|
* If we have O_PATH in the open flag. Then we
|
|
* cannot have anything other than the below set of flags
|
|
*/
|
|
flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
|
|
acc_mode = 0;
|
|
}
|
|
|
|
op->open_flag = flags;
|
|
|
|
/* O_TRUNC implies we need access checks for write permissions */
|
|
if (flags & O_TRUNC)
|
|
acc_mode |= MAY_WRITE;
|
|
|
|
/* Allow the LSM permission hook to distinguish append
|
|
access from general write access. */
|
|
if (flags & O_APPEND)
|
|
acc_mode |= MAY_APPEND;
|
|
|
|
op->acc_mode = acc_mode;
|
|
|
|
op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
|
|
|
|
if (flags & O_CREAT) {
|
|
op->intent |= LOOKUP_CREATE;
|
|
if (flags & O_EXCL)
|
|
op->intent |= LOOKUP_EXCL;
|
|
}
|
|
|
|
if (flags & O_DIRECTORY)
|
|
lookup_flags |= LOOKUP_DIRECTORY;
|
|
if (!(flags & O_NOFOLLOW))
|
|
lookup_flags |= LOOKUP_FOLLOW;
|
|
op->lookup_flags = lookup_flags;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* file_open_name - open file and return file pointer
|
|
*
|
|
* @name: struct filename containing path to open
|
|
* @flags: open flags as per the open(2) second argument
|
|
* @mode: mode for the new file if O_CREAT is set, else ignored
|
|
*
|
|
* This is the helper to open a file from kernelspace if you really
|
|
* have to. But in generally you should not do this, so please move
|
|
* along, nothing to see here..
|
|
*/
|
|
struct file *file_open_name(struct filename *name, int flags, umode_t mode)
|
|
{
|
|
struct open_flags op;
|
|
int err = build_open_flags(flags, mode, &op);
|
|
return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op);
|
|
}
|
|
|
|
/**
|
|
* filp_open - open file and return file pointer
|
|
*
|
|
* @filename: path to open
|
|
* @flags: open flags as per the open(2) second argument
|
|
* @mode: mode for the new file if O_CREAT is set, else ignored
|
|
*
|
|
* This is the helper to open a file from kernelspace if you really
|
|
* have to. But in generally you should not do this, so please move
|
|
* along, nothing to see here..
|
|
*/
|
|
struct file *filp_open(const char *filename, int flags, umode_t mode)
|
|
{
|
|
struct filename *name = getname_kernel(filename);
|
|
struct file *file = ERR_CAST(name);
|
|
|
|
if (!IS_ERR(name)) {
|
|
file = file_open_name(name, flags, mode);
|
|
putname(name);
|
|
}
|
|
return file;
|
|
}
|
|
EXPORT_SYMBOL(filp_open);
|
|
|
|
struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
|
|
const char *filename, int flags, umode_t mode)
|
|
{
|
|
struct open_flags op;
|
|
int err = build_open_flags(flags, mode, &op);
|
|
if (err)
|
|
return ERR_PTR(err);
|
|
return do_file_open_root(dentry, mnt, filename, &op);
|
|
}
|
|
EXPORT_SYMBOL(file_open_root);
|
|
|
|
struct file *filp_clone_open(struct file *oldfile)
|
|
{
|
|
struct file *file;
|
|
int retval;
|
|
|
|
file = get_empty_filp();
|
|
if (IS_ERR(file))
|
|
return file;
|
|
|
|
file->f_flags = oldfile->f_flags;
|
|
retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
|
|
if (retval) {
|
|
put_filp(file);
|
|
return ERR_PTR(retval);
|
|
}
|
|
|
|
return file;
|
|
}
|
|
EXPORT_SYMBOL(filp_clone_open);
|
|
|
|
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
|
|
{
|
|
struct open_flags op;
|
|
int fd = build_open_flags(flags, mode, &op);
|
|
struct filename *tmp;
|
|
|
|
if (fd)
|
|
return fd;
|
|
|
|
tmp = getname(filename);
|
|
if (IS_ERR(tmp))
|
|
return PTR_ERR(tmp);
|
|
|
|
fd = get_unused_fd_flags(flags);
|
|
if (fd >= 0) {
|
|
struct file *f = do_filp_open(dfd, tmp, &op);
|
|
if (IS_ERR(f)) {
|
|
put_unused_fd(fd);
|
|
fd = PTR_ERR(f);
|
|
} else {
|
|
fsnotify_open(f);
|
|
fd_install(fd, f);
|
|
}
|
|
}
|
|
putname(tmp);
|
|
return fd;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
|
|
{
|
|
if (force_o_largefile())
|
|
flags |= O_LARGEFILE;
|
|
|
|
return do_sys_open(AT_FDCWD, filename, flags, mode);
|
|
}
|
|
|
|
SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
|
|
umode_t, mode)
|
|
{
|
|
if (force_o_largefile())
|
|
flags |= O_LARGEFILE;
|
|
|
|
return do_sys_open(dfd, filename, flags, mode);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
/*
|
|
* Exactly like sys_open(), except that it doesn't set the
|
|
* O_LARGEFILE flag.
|
|
*/
|
|
COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
|
|
{
|
|
return do_sys_open(AT_FDCWD, filename, flags, mode);
|
|
}
|
|
|
|
/*
|
|
* Exactly like sys_openat(), except that it doesn't set the
|
|
* O_LARGEFILE flag.
|
|
*/
|
|
COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
|
|
{
|
|
return do_sys_open(dfd, filename, flags, mode);
|
|
}
|
|
#endif
|
|
|
|
#ifndef __alpha__
|
|
|
|
/*
|
|
* For backward compatibility? Maybe this should be moved
|
|
* into arch/i386 instead?
|
|
*/
|
|
SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
|
|
{
|
|
return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* "id" is the POSIX thread ID. We use the
|
|
* files pointer for this..
|
|
*/
|
|
int filp_close(struct file *filp, fl_owner_t id)
|
|
{
|
|
int retval = 0;
|
|
|
|
if (!file_count(filp)) {
|
|
printk(KERN_ERR "VFS: Close: file count is 0\n");
|
|
return 0;
|
|
}
|
|
|
|
if (filp->f_op->flush)
|
|
retval = filp->f_op->flush(filp, id);
|
|
|
|
if (likely(!(filp->f_mode & FMODE_PATH))) {
|
|
dnotify_flush(filp, id);
|
|
locks_remove_posix(filp, id);
|
|
}
|
|
fput(filp);
|
|
return retval;
|
|
}
|
|
|
|
EXPORT_SYMBOL(filp_close);
|
|
|
|
/*
|
|
* Careful here! We test whether the file pointer is NULL before
|
|
* releasing the fd. This ensures that one clone task can't release
|
|
* an fd while another clone is opening it.
|
|
*/
|
|
SYSCALL_DEFINE1(close, unsigned int, fd)
|
|
{
|
|
int retval = __close_fd(current->files, fd);
|
|
|
|
/* can't restart close syscall because file table entry was cleared */
|
|
if (unlikely(retval == -ERESTARTSYS ||
|
|
retval == -ERESTARTNOINTR ||
|
|
retval == -ERESTARTNOHAND ||
|
|
retval == -ERESTART_RESTARTBLOCK))
|
|
retval = -EINTR;
|
|
|
|
return retval;
|
|
}
|
|
EXPORT_SYMBOL(sys_close);
|
|
|
|
/*
|
|
* This routine simulates a hangup on the tty, to arrange that users
|
|
* are given clean terminals at login time.
|
|
*/
|
|
SYSCALL_DEFINE0(vhangup)
|
|
{
|
|
if (capable(CAP_SYS_TTY_CONFIG)) {
|
|
tty_vhangup_self();
|
|
return 0;
|
|
}
|
|
return -EPERM;
|
|
}
|
|
|
|
/*
|
|
* Called when an inode is about to be open.
|
|
* We use this to disallow opening large files on 32bit systems if
|
|
* the caller didn't specify O_LARGEFILE. On 64bit systems we force
|
|
* on this flag in sys_open.
|
|
*/
|
|
int generic_file_open(struct inode * inode, struct file * filp)
|
|
{
|
|
if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
|
|
return -EOVERFLOW;
|
|
return 0;
|
|
}
|
|
|
|
EXPORT_SYMBOL(generic_file_open);
|
|
|
|
/*
|
|
* This is used by subsystems that don't want seekable
|
|
* file descriptors. The function is not supposed to ever fail, the only
|
|
* reason it returns an 'int' and not 'void' is so that it can be plugged
|
|
* directly into file_operations structure.
|
|
*/
|
|
int nonseekable_open(struct inode *inode, struct file *filp)
|
|
{
|
|
filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
|
|
return 0;
|
|
}
|
|
|
|
EXPORT_SYMBOL(nonseekable_open);
|
|
|
|
/*
|
|
* stream_open is used by subsystems that want stream-like file descriptors.
|
|
* Such file descriptors are not seekable and don't have notion of position
|
|
* (file.f_pos is always 0). Contrary to file descriptors of other regular
|
|
* files, .read() and .write() can run simultaneously.
|
|
*
|
|
* stream_open never fails and is marked to return int so that it could be
|
|
* directly used as file_operations.open .
|
|
*/
|
|
int stream_open(struct inode *inode, struct file *filp)
|
|
{
|
|
filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
|
|
filp->f_mode |= FMODE_STREAM;
|
|
return 0;
|
|
}
|
|
|
|
EXPORT_SYMBOL(stream_open);
|