msm-4.14/fs/file_table.c
azrim e9e83ae080
Merge remote-tracking branch 'google/upstream-f2fs-stable-linux-4.14.y' into sheesh
* google/upstream-f2fs-stable-linux-4.14.y:
  f2fs: fix to do sanity check on .cp_pack_total_block_count
  f2fs: make gc_urgent and gc_segment_mode sysfs node readable
  f2fs: use aggressive GC policy during f2fs_disable_checkpoint()
  f2fs: fix compressed file start atomic write may cause data corruption
  f2fs: initialize sbi->gc_mode explicitly
  f2fs: introduce gc_urgent_mid mode
  f2fs: compress: fix to print raw data size in error path of lz4 decompression
  f2fs: remove redundant parameter judgment
  f2fs: use spin_lock to avoid hang
  f2fs: don't get FREEZE lock in f2fs_evict_inode in frozen fs
  f2fs: remove unnecessary read for F2FS_FITS_IN_INODE
  f2fs: fix to do sanity check on curseg->alloc_type
  f2fs: fix to avoid potential deadlock
  f2fs: quota: fix loop condition at f2fs_quota_sync()
  f2fs: Restore rwsem lockdep support
  f2fs: fix missing free nid in f2fs_handle_failed_inode
  f2fs: add a way to limit roll forward recovery time
  f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy
  f2fs: adjust readahead block number during recovery
  f2fs: fix to unlock page correctly in error path of is_alive()
  f2fs: expose discard related parameters in sysfs
  f2fs: move discard parameters into discard_cmd_control
  f2fs: fix to enable ATGC correctly via gc_idle sysfs interface
  f2fs: move f2fs to use reader-unfair rwsems
  f2fs: do not allow partial truncation on pinned file
  f2fs: remove redunant invalidate compress pages
  f2fs: Simplify bool conversion
  f2fs: don't drop compressed page cache in .{invalidate,release}page
  f2fs: fix to reserve space for IO align feature
  f2fs: fix to check available space of CP area correctly in update_ckpt_flags()
  f2fs: support fault injection to f2fs_trylock_op()
  f2fs: clean up __find_inline_xattr() with __find_xattr()
  f2fs: fix to do sanity check on last xattr entry in __f2fs_setxattr()
  f2fs: do not bother checkpoint by f2fs_get_node_info
  f2fs: avoid down_write on nat_tree_lock during checkpoint
  f2fs: compress: fix potential deadlock of compress file
  f2fs: avoid EINVAL by SBI_NEED_FSCK when pinning a file
  f2fs: add gc_urgent_high_remaining sysfs node
  f2fs: fix to do sanity check in is_alive()
  f2fs: fix to avoid panic in is_alive() if metadata is inconsistent
  f2fs: fix to do sanity check on inode type during garbage collection
  f2fs: avoid duplicate call of mark_inode_dirty
  f2fs: fix remove page failed in invalidate compress pages
  f2fs: fix the f2fs_file_write_iter tracepoint
  f2fs: do not expose unwritten blocks to user by DIO
  f2fs: reduce indentation in f2fs_file_write_iter()
  f2fs: rework write preallocations
  f2fs: compress: reduce one page array alloc and free when write compressed page
  f2fs: show number of pending discard commands
  f2fs: check nr_pages for readahead
  f2fs: fix UAF in f2fs_available_free_memory
  f2fs: invalidate META_MAPPING before IPU/DIO write
  f2fs: support fault injection for dquot_initialize()
  f2fs: fix incorrect return value in f2fs_sanity_check_ckpt()
  f2fs: compress: disallow disabling compress on non-empty compressed file
  f2fs: compress: fix overwrite may reduce compress ratio unproperly
  f2fs: multidevice: support direct IO
  f2fs: introduce fragment allocation mode mount option
  f2fs: include non-compressed blocks in compr_written_block
  f2fs: fix wrong condition to trigger background checkpoint correctly
  f2fs: fix to use WHINT_MODE
  f2fs: fix up f2fs_lookup tracepoints
  f2fs: set SBI_NEED_FSCK flag when inconsistent node block found
  f2fs: introduce excess_dirty_threshold()
  f2fs: avoid attaching SB_ACTIVE flag during mount
  f2fs: quota: fix potential deadlock
  f2fs: should use GFP_NOFS for directory inodes
  f2fs: should put a page beyond EOF when preparing a write
  f2fs: deallocate compressed pages when error happens
  f2fs: enable realtime discard iff device supports discard
  f2fs: guarantee to write dirty data when enabling checkpoint back
  f2fs: fix to unmap pages from userspace process in punch_hole()
  f2fs: fix unexpected ENOENT comes from f2fs_map_blocks()
  f2fs: fix to account missing .skipped_gc_rwsem
  f2fs: adjust unlock order for cleanup
  f2fs: Don't create discard thread when device doesn't support realtime discard
  f2fs: rebuild nat_bits during umount
  f2fs: introduce periodic iostat io latency traces
  f2fs: separate out iostat feature
  f2fs: compress: do sanity check on cluster
  f2fs: fix description about main_blkaddr node
  f2fs: convert S_IRUGO to 0444
  f2fs: fix to keep compatibility of fault injection interface
  f2fs: support fault injection for f2fs_kmem_cache_alloc()
  f2fs: compress: allow write compress released file after truncate to zero
  f2fs: correct comment in segment.h
  f2fs: improve sbi status info in debugfs/f2fs/status
  f2fs: compress: avoid duplicate counting of valid blocks when read compressed file
  f2fs: fix to do sanity check for sb/cp fields correctly
  f2fs: avoid unneeded memory allocation in __add_ino_entry()
  f2fs: extent cache: support unaligned extent
  f2fs: Kconfig: clean up config options about compression
  f2fs: reduce the scope of setting fsck tag when de->name_len is zero
  f2fs: fix to stop filesystem update once CP failed
  f2fs: introduce discard_unit mount option
  f2fs: fix min_seq_blocks can not make sense in some scenes.
  f2fs: fix to force keeping write barrier for strict fsync mode
  f2fs: fix wrong checkpoint_changed value in f2fs_remount()
  f2fs: show sbi status in debugfs/f2fs/status
  f2fs: turn back remapped address in compressed page endio
  f2fs: change fiemap way in printing compression chunk
  f2fs: do not submit NEW_ADDR to read node block
  f2fs: compress: remove unneeded read when rewrite whole cluster
  f2fs: don't sleep while grabing nat_tree_lock
  f2fs: remove allow_outplace_dio()
  f2fs: make f2fs_write_failed() take struct inode
  f2fs: quota: fix potential deadlock
  f2fs: let's keep writing IOs on SBI_NEED_FSCK
  f2fs: Revert "f2fs: Fix indefinite loop in f2fs_gc() v1"
  f2fs: avoid to create an empty string as the extension_list
  f2fs: compress: fix to set zstd compress level correctly
  f2fs: add sysfs nodes to get GC info for each GC mode
  f2fs: drop dirty node pages when cp is in error status
  f2fs: initialize page->private when using for our internal use
  f2fs: compress: add nocompress extensions support
  Revert "f2fs: avoid attaching SB_ACTIVE flag during mount/remount"
  f2fs: remove false alarm on iget failure during GC
  f2fs: enable extent cache for compression files in read-only
  f2fs: fix to avoid adding tab before doc section
  f2fs: introduce f2fs_casefolded_name slab cache
  f2fs: swap: support migrating swapfile in aligned write mode
  f2fs: swap: remove dead codes
  f2fs: compress: add compress_inode to cache compressed blocks
  f2fs: clean up /sys/fs/f2fs/<disk>/features
  f2fs: add pin_file in feature list
  f2fs: Advertise encrypted casefolding in sysfs
  f2fs: Show casefolding support only when supported
  f2fs: support RO feature
  f2fs: logging neatening
  f2fs: restructure f2fs page.private layout
  f2fs: introduce FI_COMPRESS_RELEASED instead of using IMMUTABLE bit
  f2fs: compress: remove unneeded preallocation
  f2fs: avoid attaching SB_ACTIVE flag during mount/remount
  f2fs: atgc: export entries for better tunability via sysfs
  f2fs: compress: fix to disallow temp extension
  f2fs: let's allow compression for mmap files
  f2fs: add MODULE_SOFTDEP to ensure crc32 is included in the initramfs
  f2fs: return success if there is no work to do
  f2fs: compress: clean up parameter of __f2fs_cluster_blocks()
  f2fs: compress: remove unneeded f2fs_put_dnode()
  f2fs: atgc: fix to set default age threshold
  f2fs: Prevent swap file in LFS mode
  f2fs: fix to avoid racing on fsync_entry_slab by multi filesystem instances
  f2fs: add cp_error check in f2fs_write_compressed_pages
  f2fs: compress: rename __cluster_may_compress
  f2fs: return EINVAL for hole cases in swap file
  f2fs: avoid swapon failure by giving a warning first
  f2fs: compress: fix to assign cc.cluster_idx correctly
  f2fs: compress: fix race condition of overwrite vs truncate
  f2fs: compress: fix to free compress page correctly
  f2fs: support iflag change given the mask
  f2fs: avoid null pointer access when handling IPU error
  f2fs: drop inplace IO if fs status is abnormal
  f2fs: compress: remove unneed check condition
  f2fs: clean up left deprecated IO trace codes
  f2fs: avoid using native allocate_segment_by_default()
  f2fs: remove unnecessary struct declaration
  f2fs: fix to avoid NULL pointer dereference
  f2fs: avoid duplicated codes for cleanup
  f2fs: document: add description about compressed space handling
  f2fs: clean up build warnings
  f2fs: fix the periodic wakeups of discard thread
  f2fs: fix to avoid accessing invalid fio in f2fs_allocate_data_block()
  f2fs: fix to avoid GC/mmap race with f2fs_truncate()
  f2fs: set checkpoint_merge by default
  f2fs: Fix a hungtask problem in atomic write
  f2fs: fix to restrict mount condition on readonly block device
  f2fs: introduce gc_merge mount option
  f2fs: fix to cover __allocate_new_section() with curseg_lock
  f2fs: fix wrong alloc_type in f2fs_do_replace_block
  f2fs: delete empty compress.h
  f2fs: fix a typo in inode.c
  f2fs: allow to change discard policy based on cached discard cmds
  f2fs: fix to avoid touching checkpointed data in get_victim()
  f2fs: fix to update last i_size if fallocate partially succeeds
  f2fs: fix error path of f2fs_remount()
  f2fs: fix wrong comment of nat_tree_lock
  f2fs: fix to avoid out-of-bounds memory access
  f2fs: don't start checkpoint thread in readonly mountpoint
  f2fs: do not use AT_SSR mode in FG_GC & high urgent BG_GC
  f2fs: add sysfs nodes to get runtime compression stat
  f2fs: fix to use per-inode maxbytes in f2fs_fiemap
  f2fs: fix to align to section for fallocate() on pinned file
  f2fs: expose # of overprivision segments
  f2fs: fix error handling in f2fs_end_enable_verity()
  f2fs: fix a redundant call to f2fs_balance_fs if an error occurs
  f2fs: remove unused file_clear_encrypt()
  f2fs: check if swapfile is section-alligned
  f2fs: fix last_lblock check in check_swap_activate_fast
  f2fs: remove unnecessary IS_SWAPFILE check
  f2fs: Replace one-element array with flexible-array member
  f2fs: compress: Allow modular (de)compression algorithms
  f2fs: check discard command number before traversing discard pending list
  f2fs: update comments for explicit memory barrier
  f2fs: remove unused FORCE_FG_GC macro
  f2fs: avoid unused f2fs_show_compress_options()
  f2fs: fix panic during f2fs_resize_fs()
  f2fs: fix to allow migrating fully valid segment
  f2fs: fix a spelling error
  f2fs: fix a spacing coding style
  fs: Enable bmap() function to properly return errors
  f2fs: remove obsolete f2fs.txt
  fs-verity: support reading signature with ioctl
  fs-verity: support reading descriptor with ioctl
  fs-verity: support reading Merkle tree with ioctl
  fs-verity: add FS_IOC_READ_VERITY_METADATA ioctl
  fs-verity: don't pass whole descriptor to fsverity_verify_signature()
  fs-verity: factor out fsverity_get_descriptor()
  fs-verity: move structs needed for file signing to UAPI header
  fs-verity: rename "file measurement" to "file digest"
  fs-verity: rename fsverity_signed_digest to fsverity_formatted_digest
  fs-verity: remove filenames from file comments
  fs-verity: use smp_load_acquire() for ->i_verity_info
  f2fs: remove FAULT_ALLOC_BIO
  f2fs: use blkdev_issue_flush in __submit_flush_wait
  f2fs: remove a few bd_part checks
  quota: Cleanup list iteration in dqcache_shrink_scan()
  quota: reclaim least recently used dquots
  fs: quota: Replace GFP_ATOMIC with GFP_KERNEL in dquot_init
  quota: Check for register_shrinker() failure.
  quota: propagate error from __dquot_initialize
  quota: be aware of error from dquot_initialize
  Documentation: f2fs: fix typo s/automaic/automatic
  f2fs: give a warning only for readonly partition
  f2fs: don't grab superblock freeze for flush/ckpt thread
  f2fs: add ckpt_thread_ioprio sysfs node
  f2fs: introduce checkpoint_merge mount option
  f2fs: relocate inline conversion from mmap() to mkwrite()
  f2fs: fix a wrong condition in __submit_bio
  f2fs: remove unnecessary initialization in xattr.c
  f2fs: fix to avoid inconsistent quota data
  f2fs: flush data when enabling checkpoint back
  f2fs: deprecate f2fs_trace_io
  f2fs: remove unused stat_{inc, dec}_atomic_write
  f2fs: introduce sb_status sysfs node
  f2fs: fix to use per-inode maxbytes
  f2fs: compress: fix potential deadlock
  libfs: unexport generic_ci_d_compare() and generic_ci_d_hash()
  f2fs: fix to set/clear I_LINKABLE under i_lock
  f2fs: fix null page reference in redirty_blocks
  f2fs: clean up post-read processing
  f2fs: trival cleanup in move_data_block()
  f2fs: fix out-of-repair __setattr_copy()
  f2fs: fix to tag FIEMAP_EXTENT_MERGED in f2fs_fiemap()
  f2fs: introduce a new per-sb directory in sysfs
  f2fs: compress: support compress level
  f2fs: compress: deny setting unsupported compress algorithm
  f2fs: relocate f2fs_precache_extents()
  f2fs: enforce the immutable flag on open files
  f2fs: enhance to update i_mode and acl atomically in f2fs_setattr()
  f2fs: fix to set inode->i_mode correctly for posix_acl_update_mode
  f2fs: Replace expression with offsetof()
  f2fs: handle unallocated section and zone on pinned/atgc
  f2fs: compress: fix compression chksum
  f2fs: fix shift-out-of-bounds in sanity_check_raw_super()
  f2fs: fix race of pending_pages in decompression
  f2fs: fix to account inline xattr correctly during recovery
  f2fs: inline: fix wrong inline inode stat
  f2fs: inline: correct comment in f2fs_recover_inline_data
  f2fs: don't check PAGE_SIZE again in sanity_check_raw_super()
  f2fs: convert to F2FS_*_INO macro
  f2fs: introduce max_io_bytes, a sysfs entry, to limit bio size
  f2fs: don't allow any writes on readonly mount
  f2fs: avoid race condition for shrinker count
  f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE
  f2fs: add compress_mode mount option
  f2fs: Remove unnecessary unlikely()
  f2fs: init dirty_secmap incorrectly
  f2fs: remove buffer_head which has 32bits limit
  f2fs: fix wrong block count instead of bytes
  f2fs: use new conversion functions between blks and bytes
  f2fs: rename logical_to_blk and blk_to_logical
  f2fs: fix kbytes written stat for multi-device case
  f2fs: compress: support chksum
  f2fs: fix to avoid REQ_TIME and CP_TIME collision
  f2fs: change to use rwsem for cp_mutex
  f2fs: Handle casefolding with Encryption
  fscrypt: Have filesystems handle their d_ops
  libfs: Add generic function for setting dentry_ops
  f2fs: Remove the redundancy initialization
  f2fs: remove writeback_inodes_sb in f2fs_remount
  f2fs: fix double free of unicode map
  f2fs: fix compat F2FS_IOC_{MOVE,GARBAGE_COLLECT}_RANGE
  f2fs: avoid unneeded data copy in f2fs_ioc_move_range()
  f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl
  f2fs: add F2FS_IOC_GET_COMPRESS_OPTION ioctl
  f2fs: move ioctl interface definitions to separated file
  f2fs: fix to seek incorrect data offset in inline data file
  f2fs: check fiemap parameters
  f2fs: call f2fs_get_meta_page_retry for nat page
  fscrypt: rename DCACHE_ENCRYPTED_NAME to DCACHE_NOKEY_NAME
  fscrypt: don't call no-key names "ciphertext names"
  fscrypt: export fscrypt_d_revalidate()
  f2fs: code cleanup by removing unnecessary check
  f2fs: wait for sysfs kobject removal before freeing f2fs_sb_info
  f2fs: fix writecount false positive in releasing compress blocks
  f2fs: introduce check_swap_activate_fast()
  f2fs: don't issue flush in f2fs_flush_device_cache() for nobarrier case
  f2fs: handle errors of f2fs_get_meta_page_nofail
  f2fs: fix to set SBI_NEED_FSCK flag for inconsistent inode
  f2fs: reject CASEFOLD inode flag without casefold feature
  f2fs: fix memory alignment to support 32bit
  f2fs: fix slab leak of rpages pointer
  f2fs: compress: fix to disallow enabling compress on non-empty file
  f2fs: compress: introduce cic/dic slab cache
  f2fs: compress: introduce page array slab cache
  f2fs: fix to do sanity check on segment/section count
  f2fs: fix to check segment boundary during SIT page readahead
  f2fs: fix uninit-value in f2fs_lookup
  fs/buffer.c: record blockdev write errors in super_block that it backs
  vfs: track per-sb writeback errors and report them to syncfs
  f2fs: remove unneeded parameter in find_in_block()
  f2fs: fix wrong total_sections check and fsmeta check
  f2fs: remove duplicated code in sanity_check_area_boundary
  f2fs: remove unused check on version_bitmap
  f2fs: relocate blkzoned feature check
  f2fs: do sanity check on zoned block device path
  f2fs: add trace exit in exception path
  f2fs: change return value of reserved_segments to unsigned int
  f2fs: clean up kvfree
  f2fs: change virtual mapping way for compression pages
  f2fs: change return value of f2fs_disable_compressed_file to bool
  f2fs: change i_compr_blocks of inode to atomic value
  f2fs: ignore compress mount option on image w/o compression feature
  f2fs: allocate proper size memory for zstd decompress
  f2fs: change compr_blocks of superblock info to 64bit
  f2fs: add block address limit check to compressed file
  f2fs: check position in move range ioctl
  f2fs: correct statistic of APP_DIRECT_IO/APP_DIRECT_READ_IO
  f2fs: support age threshold based garbage collection
  f2fs: Use generic casefolding support
  fs: Add standard casefolding support
  unicode: Add utf8_casefold_hash
  f2fs: compress: use more readable atomic_t type for {cic,dic}.ref
  f2fs: fix compile warning
  f2fs: support 64-bits key in f2fs rb-tree node entry
  f2fs: inherit mtime of original block during GC
  f2fs: record average update time of segment
  f2fs: introduce inmem curseg
  f2fs: compress: remove unneeded code
  f2fs: remove duplicated type casting
  f2fs: support zone capacity less than zone size
  f2fs: update changes in upstream on GC_URGENT_HIGH
  f2fs: Return EOF on unaligned end of file DIO read
  f2fs: fix indefinite loop scanning for free nid
  f2fs: Fix type of section block count variables
  f2fs: prepare a waiter before entering io_schedule
  f2fs: update_sit_entry: Make the judgment condition of f2fs_bug_on more intuitive
  f2fs: replace test_and_set/clear_bit() with set/clear_bit()
  f2fs: make file immutable even if releasing zero compression block
  f2fs: compress: disable compression mount option if compression is off
  f2fs: compress: add sanity check during compressed cluster read
  f2fs: use macro instead of f2fs verity version
  f2fs: fix deadlock between quota writes and checkpoint
  f2fs: correct comment of f2fs_exist_written_data
  f2fs: compress: delay temp page allocation
  f2fs: compress: fix to update isize when overwriting compressed file
  f2fs: space related cleanup
  f2fs: fix use-after-free issue
  f2fs: Change the type of f2fs_flush_inline_data() to void
  f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  f2fs: segment.h: delete a duplicated word
  f2fs: compress: fix to avoid memory leak on cc->cpages
  f2fs: use generic names for generic ioctls
  f2fs: don't keep meta inode pages used for compressed block migration
  f2fs: fix error path in do_recover_data()
  f2fs: fix to wait GCed compressed page writeback
  f2fs: remove write attribute of main_blkaddr sysfs node
  f2fs: add GC_URGENT_LOW mode in gc_urgent
  f2fs: avoid readahead race condition
  f2fs: fix return value of move_data_block()
  f2fs: add parameter op_flag in f2fs_submit_page_read()
  f2fs: split f2fs_allocate_new_segments()
  f2fs: lost matching-pair of trace in f2fs_truncate_inode_blocks
  f2fs: fix an oops in f2fs_is_compressed_page
  f2fs: make trace enter and end in pairs for unlink
  f2fs: fix to check page dirty status before writeback
  f2fs: remove the unused compr parameter
  f2fs: support to trace f2fs_fiemap()
  f2fs: support to trace f2fs_bmap()
  f2fs: fix wrong return value of f2fs_bmap_compress()
  f2fs: remove useless parameter of __insert_free_nid()
  f2fs: fix typo in comment of f2fs_do_add_link
  f2fs: fix to wait page writeback before update
  f2fs: show more debug info for per-temperature log
  f2fs: add f2fs_gc exception handle in f2fs_ioc_gc_range
  f2fs: clean up parameter of f2fs_allocate_data_block()
  f2fs: shrink node_write lock coverage
  f2fs: add prefix for exported symbols
  f2fs: use kfree() to free variables allocated by match_strdup()
  f2fs: get the right gc victim section when section has several segments
  f2fs: fix a race condition between f2fs_write_end_io and f2fs_del_fsync_node_entry
  f2fs: remove useless truncate in f2fs_collapse_range()
  f2fs: use kfree() instead of kvfree() to free superblock data
  f2fs: avoid checkpatch error
  f2fs: should avoid inode eviction in synchronous path
2022-05-20 14:51:11 +09:00

482 lines
13 KiB
C

/*
* linux/fs/file_table.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*/
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/eventpoll.h>
#include <linux/rcupdate.h>
#include <linux/mount.h>
#include <linux/capability.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/task_work.h>
#include <linux/ima.h>
#include <linux/swap.h>
#include <linux/atomic.h>
#include "internal.h"
/* sysctl tunables... */
struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
#ifdef CONFIG_FILE_TABLE_DEBUG
#include <linux/hashtable.h>
#include <mount.h>
static DEFINE_MUTEX(global_files_lock);
static DEFINE_HASHTABLE(global_files_hashtable, 10);
struct global_filetable_lookup_key {
struct work_struct work;
uintptr_t value;
};
void global_filetable_print_warning_once(void)
{
pr_err_once("\n**********************************************************\n");
pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_err_once("** **\n");
pr_err_once("** VFS FILE TABLE DEBUG is enabled . **\n");
pr_err_once("** Allocating extra memory and slowing access to files **\n");
pr_err_once("** **\n");
pr_err_once("** This means that this is a DEBUG kernel and it is **\n");
pr_err_once("** unsafe for production use. **\n");
pr_err_once("** **\n");
pr_err_once("** If you see this message and you are not debugging **\n");
pr_err_once("** the kernel, report this immediately to your vendor! **\n");
pr_err_once("** **\n");
pr_err_once("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_err_once("**********************************************************\n");
}
void global_filetable_add(struct file *filp)
{
struct mount *mnt;
if (filp->f_path.dentry->d_iname == NULL ||
strlen(filp->f_path.dentry->d_iname) == 0)
return;
mnt = real_mount(filp->f_path.mnt);
mutex_lock(&global_files_lock);
hash_add(global_files_hashtable, &filp->f_hash, (uintptr_t)mnt);
mutex_unlock(&global_files_lock);
}
void global_filetable_del(struct file *filp)
{
mutex_lock(&global_files_lock);
hash_del(&filp->f_hash);
mutex_unlock(&global_files_lock);
}
static void global_print_file(struct file *filp, char *path_buffer, int *count)
{
char *pathname;
pathname = d_path(&filp->f_path, path_buffer, PAGE_SIZE);
if (IS_ERR(pathname))
pr_err("VFS: File %d Address : %pa partial filename: %s ref_count=%ld\n",
++(*count), &filp, filp->f_path.dentry->d_iname,
atomic_long_read(&filp->f_count));
else
pr_err("VFS: File %d Address : %pa full filepath: %s ref_count=%ld\n",
++(*count), &filp, pathname,
atomic_long_read(&filp->f_count));
}
static void global_filetable_print(uintptr_t lookup_mnt)
{
struct hlist_node *tmp;
struct file *filp;
struct mount *mnt;
int index;
int count = 0;
char *path_buffer = (char *)__get_free_page(GFP_KERNEL);
mutex_lock(&global_files_lock);
pr_err("\n**********************************************************\n");
pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_err("\n");
pr_err("VFS: The following files hold a reference to the mount\n");
pr_err("\n");
hash_for_each_possible_safe(global_files_hashtable, filp, tmp, f_hash,
lookup_mnt) {
mnt = real_mount(filp->f_path.mnt);
if ((uintptr_t)mnt == lookup_mnt)
global_print_file(filp, path_buffer, &count);
}
pr_err("\n");
pr_err("VFS: Found total of %d open files\n", count);
pr_err("\n");
count = 0;
pr_err("\n");
pr_err("VFS: The following files need to cleaned up\n");
pr_err("\n");
hash_for_each_safe(global_files_hashtable, index, tmp, filp, f_hash) {
if (atomic_long_read(&filp->f_count) == 0)
global_print_file(filp, path_buffer, &count);
}
pr_err("\n");
pr_err("VFS: Found total of %d files awaiting clean-up\n", count);
pr_err("\n");
pr_err("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_err("\n**********************************************************\n");
mutex_unlock(&global_files_lock);
free_page((unsigned long)path_buffer);
}
static void global_filetable_print_work_fn(struct work_struct *work)
{
struct global_filetable_lookup_key *key;
uintptr_t lookup_mnt;
key = container_of(work, struct global_filetable_lookup_key, work);
lookup_mnt = key->value;
kfree(key);
global_filetable_print(lookup_mnt);
}
void global_filetable_delayed_print(struct mount *mnt)
{
struct global_filetable_lookup_key *key;
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (key == NULL)
return;
key->value = (uintptr_t)mnt;
INIT_WORK(&key->work, global_filetable_print_work_fn);
schedule_work(&key->work);
}
#endif /* CONFIG_FILE_TABLE_DEBUG */
static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
put_cred(f->f_cred);
kmem_cache_free(filp_cachep, f);
}
static inline void file_free(struct file *f)
{
percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
/*
* Return the total number of open files in the system
*/
static long get_nr_files(void)
{
return percpu_counter_read_positive(&nr_files);
}
/*
* Return the maximum number of open files in the system
*/
unsigned long get_max_files(void)
{
return files_stat.max_files;
}
EXPORT_SYMBOL_GPL(get_max_files);
/*
* Handle nr_files sysctl
*/
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
#else
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
#endif
/* Find an unused file structure and return a pointer to it.
* Returns an error pointer if some error happend e.g. we over file
* structures limit, run out of memory or operation is not permitted.
*
* Be very careful using this. You are responsible for
* getting write access to any mount that you might assign
* to this filp, if it is opened for write. If this is not
* done, you will imbalance int the mount's writer count
* and a warning at __fput() time.
*/
struct file *get_empty_filp(void)
{
const struct cred *cred = current_cred();
static long old_max;
struct file *f;
int error;
/*
* Privileged users can go above max_files
*/
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
/*
* percpu_counters are inaccurate. Do an expensive check before
* we go and fail.
*/
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
goto over;
}
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
if (unlikely(!f))
return ERR_PTR(-ENOMEM);
percpu_counter_inc(&nr_files);
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
file_free(f);
return ERR_PTR(error);
}
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
over:
/* Ran out of filps - report that */
if (get_nr_files() > old_max) {
pr_info("VFS: file-max limit %lu reached\n", get_max_files());
old_max = get_nr_files();
}
return ERR_PTR(-ENFILE);
}
/**
* alloc_file - allocate and initialize a 'struct file'
*
* @path: the (dentry, vfsmount) pair for the new file
* @mode: the mode with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*/
struct file *alloc_file(const struct path *path, fmode_t mode,
const struct file_operations *fop)
{
struct file *file;
file = get_empty_filp();
if (IS_ERR(file))
return file;
file->f_path = *path;
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
file->f_sb_err = file_sample_sb_err(file);
if ((mode & FMODE_READ) &&
likely(fop->read || fop->read_iter))
mode |= FMODE_CAN_READ;
if ((mode & FMODE_WRITE) &&
likely(fop->write || fop->write_iter))
mode |= FMODE_CAN_WRITE;
file->f_mode = mode;
file->f_op = fop;
if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path->dentry->d_inode);
return file;
}
EXPORT_SYMBOL(alloc_file);
/* the real guts of fput() - releasing the last reference to file
*/
static void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
might_sleep();
fsnotify_close(file);
/*
* The function eventpoll_release() should be the first called
* in the file cleanup chain.
*/
eventpoll_release(file);
locks_remove_file(file);
if (unlikely(file->f_flags & FASYNC)) {
if (file->f_op->fasync)
file->f_op->fasync(-1, file, 0);
}
ima_file_free(file);
if (file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
}
fops_put(file->f_op);
put_pid(file->f_owner.pid);
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_dec(inode);
if (file->f_mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(mnt);
}
global_filetable_del(file);
file->f_path.dentry = NULL;
file->f_path.mnt = NULL;
file->f_inode = NULL;
file_free(file);
dput(dentry);
mntput(mnt);
}
static LLIST_HEAD(delayed_fput_list);
static void delayed_fput(struct work_struct *unused)
{
struct llist_node *node = llist_del_all(&delayed_fput_list);
struct file *f, *t;
llist_for_each_entry_safe(f, t, node, f_u.fu_llist)
__fput(f);
}
static void ____fput(struct callback_head *work)
{
__fput(container_of(work, struct file, f_u.fu_rcuhead));
}
/*
* If kernel thread really needs to have the final fput() it has done
* to complete, call this. The only user right now is the boot - we
* *do* need to make sure our writes to binaries on initramfs has
* not left us with opened struct file waiting for __fput() - execve()
* won't work without that. Please, don't add more callers without
* very good reasons; in particular, never call that with locks
* held and never call that from a thread that might need to do
* some work on any kind of umount.
*/
void flush_delayed_fput(void)
{
delayed_fput(NULL);
}
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
void flush_delayed_fput_wait(void)
{
delayed_fput(NULL);
flush_delayed_work(&delayed_fput_work);
}
void fput_many(struct file *file, unsigned int refs)
{
if (atomic_long_sub_and_test(refs, &file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
init_task_work(&file->f_u.fu_rcuhead, ____fput);
if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
return;
/*
* After this task has run exit_task_work(),
* task_work_add() will fail. Fall through to delayed
* fput to avoid leaking *file.
*/
}
if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
schedule_delayed_work(&delayed_fput_work, 1);
}
}
void fput(struct file *file)
{
fput_many(file, 1);
}
/*
* synchronous analog of fput(); for kernel threads that might be needed
* in some umount() (and thus can't use flush_delayed_fput() without
* risking deadlocks), need to wait for completion of __fput() and know
* for this specific struct file it won't involve anything that would
* need them. Use only if you really need it - at the very least,
* don't blindly convert fput() by kernel thread to that.
*/
void __fput_sync(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
struct task_struct *task = current;
BUG_ON(!(task->flags & PF_KTHREAD));
__fput(file);
}
}
EXPORT_SYMBOL(fput);
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
file_free(file);
}
}
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
global_filetable_print_warning_once();
}
/*
* One file with associated inode and dcache is very roughly 1K. Per default
* do not use more than 10% of our memory for files.
*/
void __init files_maxfiles_init(void)
{
unsigned long n;
unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
memreserve = min(memreserve, totalram_pages - 1);
n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
}