msm-4.14/fs/buffer.c
Srinivasarao P 90bb7a2b24 Merge android-4.14-stable.180 (816f245) into msm-4.14
* refs/heads/tmp-816f245:
  Revert "clk: qcom: rcg2: Don't crash if our parent can't be found; return an error"
  Reverting crypto patches
  Reverting incremental fs changes
  Linux 4.14.180
  cgroup, netclassid: remove double cond_resched
  mac80211: add ieee80211_is_any_nullfunc()
  ALSA: hda: Match both PCI ID and SSID for driver blacklist
  tracing: Reverse the order of trace_types_lock and event_mutex
  sctp: Fix SHUTDOWN CTSN Ack in the peer restart case
  net: systemport: suppress warnings on failed Rx SKB allocations
  net: bcmgenet: suppress warnings on failed Rx SKB allocations
  lib/mpi: Fix building for powerpc with clang
  net: dsa: b53: Rework ARL bin logic
  scripts/config: allow colons in option strings for sed
  s390/ftrace: fix potential crashes when switching tracers
  cifs: protect updating server->dstaddr with a spinlock
  net: stmmac: Fix sub-second increment
  net: stmmac: fix enabling socfpga's ptp_ref_clock
  wimax/i2400m: Fix potential urb refcnt leak
  ASoC: codecs: hdac_hdmi: Fix incorrect use of list_for_each_entry
  ASoC: rsnd: Fix HDMI channel mapping for multi-SSI mode
  ASoC: sgtl5000: Fix VAG power-on handling
  selftests/ipc: Fix test failure seen after initial test run
  ASoC: topology: Check return value of pcm_new_ver
  powerpc/pci/of: Parse unassigned resources
  vhost: vsock: kick send_pkt worker once device is started
  ANDROID: arm64: fix a mismerge in proc.S
  Linux 4.14.179
  selinux: properly handle multiple messages in selinux_netlink_send()
  dmaengine: dmatest: Fix iteration non-stop logic
  nfs: Fix potential posix_acl refcnt leak in nfs3_set_acl
  ALSA: opti9xx: shut up gcc-10 range warning
  iommu/amd: Fix legacy interrupt remapping for x2APIC-enabled system
  scsi: target/iblock: fix WRITE SAME zeroing
  iommu/qcom: Fix local_base status check
  vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()
  vfio: avoid possible overflow in vfio_iommu_type1_pin_pages
  RDMA/mlx4: Initialize ib_spec on the stack
  RDMA/mlx5: Set GRH fields in query QP on RoCE
  dm verity fec: fix hash block number in verity_fec_decode
  PM: hibernate: Freeze kernel threads in software_resume()
  PM: ACPI: Output correct message on target power state
  ALSA: pcm: oss: Place the plugin buffer overflow checks correctly
  ALSA: hda/hdmi: fix without unlocked before return
  ALSA: hda/realtek - Two front mics on a Lenovo ThinkCenter
  mmc: sdhci-pci: Fix eMMC driver strength for BYT-based controllers
  mmc: sdhci-xenon: fix annoying 1.8V regulator warning
  btrfs: fix partial loss of prealloc extent past i_size after fsync
  btrfs: fix block group leak when removing fails
  drm/qxl: qxl_release use after free
  drm/qxl: qxl_release leak in qxl_hw_surface_alloc()
  drm/qxl: qxl_release leak in qxl_draw_dirty_fb()
  drm/edid: Fix off-by-one in DispID DTD pixel clock
  ext4: fix special inode number checks in __ext4_iget()
  ANDROID: Incremental fs: Fix issues with very large files
  Linux 4.14.178
  propagate_one(): mnt_set_mountpoint() needs mount_lock
  ext4: check for non-zero journal inum in ext4_calculate_overhead
  qed: Fix use after free in qed_chain_free
  ext4: unsigned int compared against zero
  ext4: fix block validity checks for journal inodes using indirect blocks
  ext4: don't perform block validity checks on the journal inode
  ext4: protect journal inode's blocks using block_validity
  ext4: avoid declaring fs inconsistent due to invalid file handles
  hwmon: (jc42) Fix name to have no illegal characters
  ext4: convert BUG_ON's to WARN_ON's in mballoc.c
  ext4: increase wait time needed before reuse of deleted inode numbers
  ext4: use matching invalidatepage in ext4_writepage
  arm64: Delete the space separator in __emit_inst
  xen/xenbus: ensure xenbus_map_ring_valloc() returns proper grant status
  objtool: Support Clang non-section symbols in ORC dump
  objtool: Fix CONFIG_UBSAN_TRAP unreachable warnings
  scsi: target: fix PR IN / READ FULL STATUS for FC
  xfs: fix partially uninitialized structure in xfs_reflink_remap_extent
  x86: hyperv: report value of misc_features
  bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX BPF_B
  mm: shmem: disable interrupt when acquiring info->lock in userfaultfd_copy path
  perf/core: fix parent pid/tid in task exit events
  ARM: dts: bcm283x: Disable dsi0 node
  net/cxgb4: Check the return from t4_query_params properly
  i2c: altera: use proper variable to hold errno
  nfsd: memory corruption in nfsd4_lock()
  iio:ad7797: Use correct attribute_group
  usb: gadget: udc: bdc: Remove unnecessary NULL checks in bdc_req_complete
  usb: dwc3: gadget: Do link recovery for SS and SSP
  binder: take read mode of mmap_sem in binder_alloc_free_page()
  include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap
  mtd: cfi: fix deadloop in cfi_cmdset_0002.c do_write_buffer
  remoteproc: Fix wrong rvring index computation
  xfs: Fix deadlock between AGI and AGF with RENAME_WHITEOUT
  xfs: validate sb_logsunit is a multiple of the fs blocksize
  serial: sh-sci: Make sure status register SCxSR is read in correct sequence
  usb: f_fs: Clear OS Extended descriptor counts to zero in ffs_data_reset()
  UAS: fix deadlock in error handling and PM flushing work
  UAS: no use logging any details in case of ENODEV
  cdc-acm: introduce a cool down
  cdc-acm: close race betrween suspend() and acm_softint
  staging: vt6656: Power save stop wake_up_count wrap around.
  staging: vt6656: Fix pairwise key entry save.
  staging: vt6656: Fix drivers TBTT timing counter.
  staging: vt6656: Fix calling conditions of vnt_set_bss_mode
  staging: vt6656: Don't set RCR_MULTICAST or RCR_BROADCAST by default.
  vt: don't hardcode the mem allocation upper bound
  staging: comedi: Fix comedi_device refcnt leak in comedi_open
  staging: comedi: dt2815: fix writing hi byte of analog output
  powerpc/setup_64: Set cache-line-size based on cache-block-size
  ARM: imx: provide v7_cpu_resume() only on ARM_CPU_SUSPEND=y
  iwlwifi: pcie: actually release queue memory in TVQM
  ASoC: dapm: fixup dapm kcontrol widget
  audit: check the length of userspace generated audit records
  usb-storage: Add unusual_devs entry for JMicron JMS566
  tty: rocket, avoid OOB access
  tty: hvc: fix buffer overflow during hvc_alloc().
  KVM: VMX: Enable machine check support for 32bit targets
  KVM: Check validity of resolved slot when searching memslots
  tpm: ibmvtpm: retry on H_CLOSED in tpm_ibmvtpm_send()
  tpm/tpm_tis: Free IRQ if probing fails
  ALSA: usb-audio: Filter out unsupported sample rates on Focusrite devices
  ALSA: usb-audio: Fix usb audio refcnt leak when getting spdif
  ALSA: hda/realtek - Add new codec supported for ALC245
  ALSA: usx2y: Fix potential NULL dereference
  tools/vm: fix cross-compile build
  mm/ksm: fix NULL pointer dereference when KSM zero page is enabled
  mm/hugetlb: fix a addressing exception caused by huge_pte_offset
  vmalloc: fix remap_vmalloc_range() bounds checks
  overflow.h: Add arithmetic shift helper
  USB: hub: Fix handling of connect changes during sleep
  USB: core: Fix free-while-in-use bug in the USB S-Glibrary
  USB: early: Handle AMD's spec-compliant identifiers, too
  USB: Add USB_QUIRK_DELAY_CTRL_MSG and USB_QUIRK_DELAY_INIT for Corsair K70 RGB RAPIDFIRE
  USB: sisusbvga: Change port variable from signed to unsigned
  fs/namespace.c: fix mountpoint reference counter race
  iio: xilinx-xadc: Fix sequencer configuration for aux channels in simultaneous mode
  iio: xilinx-xadc: Fix clearing interrupt when enabling trigger
  iio: xilinx-xadc: Fix ADC-B powerdown
  iio: adc: stm32-adc: fix sleep in atomic context
  ALSA: hda: Remove ASUS ROG Zenith from the blacklist
  KEYS: Avoid false positive ENOMEM error on key read
  vrf: Check skb for XFRM_TRANSFORMED flag
  xfrm: Always set XFRM_TRANSFORMED in xfrm{4,6}_output_finish
  net: dsa: b53: Fix ARL register definitions
  team: fix hang in team_mode_get()
  tcp: cache line align MAX_TCP_HEADER
  net/x25: Fix x25_neigh refcnt leak when receiving frame
  net: netrom: Fix potential nr_neigh refcnt leak in nr_add_node
  net: bcmgenet: correct per TX/RX ring statistics
  macvlan: fix null dereference in macvlan_device_event()
  macsec: avoid to set wrong mtu
  ipv6: fix restrict IPV6_ADDRFORM operation
  cxgb4: fix large delays in PTP synchronization
  mm, slub: restore the original intention of prefetch_freepointer()
  PCI/ASPM: Allow re-enabling Clock PM
  perf/core: Disable page faults when getting phys address
  pwm: bcm2835: Dynamically allocate base
  pwm: renesas-tpu: Fix late Runtime PM enablement
  s390/cio: avoid duplicated 'ADD' uevents
  ipc/util.c: sysvipc_find_ipc() should increase position index
  selftests: kmod: fix handling test numbers above 9
  kernel/gcov/fs.c: gcov_seq_next() should increase position index
  ASoC: Intel: atom: Take the drv->lock mutex before calling sst_send_slot_map()
  scsi: iscsi: Report unbind session event when the target has been removed
  pwm: rcar: Fix late Runtime PM enablement
  ceph: don't skip updating wanted caps when cap is stale
  ceph: return ceph_mdsc_do_request() errors from __get_parent()
  scsi: lpfc: Fix kasan slab-out-of-bounds error in lpfc_unreg_login
  watchdog: reset last_hw_keepalive time at start
  vti4: removed duplicate log message.
  crypto: mxs-dcp - make symbols 'sha1_null_hash' and 'sha256_null_hash' static
  drm/msm: Use the correct dma_sync calls harder
  keys: Fix the use of the C++ keyword "private" in uapi/linux/keyctl.h
  net: ipv4: avoid unused variable warning for sysctl
  net: ipv4: emulate READ_ONCE() on ->hdrincl bit-field in raw_sendmsg()
  ext4: fix extent_status fragmentation for plain files
  FROMGIT: f2fs: fix missing check for f2fs_unlock_op
  ANDROID: Fix kernel build regressions from virtio-gpu-next patches
  ANDROID: Incremental fs: Add setattr call
  ANDROID: cuttlefish_defconfig: enable LTO and CFI
  ANDROID: x86: map CFI jump tables in pti_clone_entry_text
  ANDROID: crypto: aesni: fix function types for aesni_(enc|dec)
  ANDROID: x86: disable CFI for do_syscall_*
  ANDROID: BACKPORT: x86, module: Ignore __typeid__ relocations
  ANDROID: BACKPORT: x86, relocs: Ignore __typeid__ relocations
  ANDROID: BACKPORT: x86/extable: Do not mark exception callback as CFI
  FROMLIST: crypto, x86/sha: Eliminate casts on asm implementations
  UPSTREAM: crypto: x86 - Rename functions to avoid conflict with crypto/sha256.h
  BACKPORT: x86/vmlinux: Actually use _etext for the end of the text segment
  ANDROID: x86: disable STACK_VALIDATION with LTO_CLANG
  ANDROID: x86: add support for CONFIG_LTO_CLANG
  ANDROID: x86/vdso: disable LTO only for VDSO
  ANDROID: x86/cpu/vmware: use the full form of inl in VMWARE_PORT
  UPSTREAM: x86/build/lto: Fix truncated .bss with -fdata-sections
  ANDROID: kbuild: don't select LD_DEAD_CODE_DATA_ELIMINATION with LTO
  ANDROID: kbuild: export LTO and CFI flags
  ANDROID: cfi: remove unnecessary <asm/memory.h> include
  ANDROID: drm/virtio: rebase to latest virgl/drm-misc-next (take 2)
  UPSTREAM: sysrq: Use panic() to force a crash
  ANDROID: Incremental fs: Use simple compression in log buffer
  ANDROID: dm-bow: Fix not to skip trim at framented range
  ANDROID: Remove VLA from uid_sys_stats.c
  ANDROID: cuttlefish_defconfig: enable CONFIG_DEBUG_LIST
  Linux 4.14.177
  KEYS: Don't write out to userspace while holding key semaphore
  KEYS: Use individual pages in big_key for crypto buffers
  mtd: phram: fix a double free issue in error path
  mtd: lpddr: Fix a double free in probe()
  locktorture: Print ratio of acquisitions, not failures
  tty: evh_bytechan: Fix out of bounds accesses
  fbdev: potential information leak in do_fb_ioctl()
  net: dsa: bcm_sf2: Fix overflow checks
  iommu/amd: Fix the configuration of GCR3 table root pointer
  libnvdimm: Out of bounds read in __nd_ioctl()
  ext2: fix debug reference to ext2_xattr_cache
  ext2: fix empty body warnings when -Wextra is used
  iommu/vt-d: Fix mm reference leak
  NFS: Fix memory leaks in nfs_pageio_stop_mirroring()
  drm/amdkfd: kfree the wrong pointer
  x86: ACPI: fix CPU hotplug deadlock
  KVM: s390: vsie: Fix possible race when shadowing region 3 tables
  compiler.h: fix error in BUILD_BUG_ON() reporting
  percpu_counter: fix a data race at vm_committed_as
  include/linux/swapops.h: correct guards for non_swap_entry()
  ext4: do not commit super on read-only bdev
  powerpc/maple: Fix declaration made after definition
  s390/cpuinfo: fix wrong output when CPU0 is offline
  NFS: direct.c: Fix memory leak of dreq when nfs_get_lock_context fails
  NFSv4/pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid()
  rtc: 88pm860x: fix possible race condition
  soc: imx: gpc: fix power up sequencing
  clk: tegra: Fix Tegra PMC clock out parents
  power: supply: bq27xxx_battery: Silence deferred-probe error
  clk: at91: usb: continue if clk_hw_round_rate() return zero
  of: unittest: kmemleak in of_unittest_platform_populate()
  rbd: call rbd_dev_unprobe() after unwatching and flushing notifies
  rbd: avoid a deadlock on header_rwsem when flushing notifies
  of: fix missing kobject init for !SYSFS && OF_DYNAMIC config
  soc: qcom: smem: Use le32_to_cpu for comparison
  wil6210: abort properly in cfg suspend
  wil6210: fix length check in __wmi_send
  wil6210: add block size checks during FW load
  wil6210: fix PCIe bus mastering in case of interface down
  rpmsg: glink: smem: Ensure ordering during tx
  rpmsg: glink: Fix missing mutex_init() in qcom_glink_alloc_channel()
  rtc: pm8xxx: Fix issue in RTC write path
  rpmsg: glink: use put_device() if device_register fail
  wil6210: rate limit wil_rx_refill error
  scsi: ufs: ufs-qcom: remove broken hci version quirk
  scsi: ufs: make sure all interrupts are processed
  wil6210: fix temperature debugfs
  wil6210: increase firmware ready timeout
  arch_topology: Fix section miss match warning due to free_raw_capacity()
  arm64: traps: Don't print stack or raw PC/LR values in backtraces
  arm64: perf: remove unsupported events for Cortex-A73
  Revert "gpio: set up initial state from .get_direction()"
  clk: Fix debugfs_create_*() usage
  drm: NULL pointer dereference [null-pointer-deref] (CWE 476) problem
  video: fbdev: sis: Remove unnecessary parentheses and commented code
  lib/raid6: use vdupq_n_u8 to avoid endianness warnings
  ALSA: hda: Don't release card at firmware loading error
  irqchip/mbigen: Free msi_desc on device teardown
  netfilter: nf_tables: report EOPNOTSUPP on unsupported flags/object type
  arm, bpf: Fix bugs with ALU64 {RSH, ARSH} BPF_K shift by 0
  ext4: use non-movable memory for superblock readahead
  scsi: sg: add sg_remove_request in sg_common_write
  objtool: Fix switch table detection in .text.unlikely
  mm/vmalloc.c: move 'area->pages' after if statement
  x86/resctrl: Fix invalid attempt at removing the default resource group
  x86/resctrl: Preserve CDP enable over CPU hotplug
  x86/intel_rdt: Enable L2 CDP in MSR IA32_L2_QOS_CFG
  x86/intel_rdt: Add two new resources for L2 Code and Data Prioritization (CDP)
  x86/intel_rdt: Enumerate L2 Code and Data Prioritization (CDP) feature
  x86/microcode/AMD: Increase microcode PATCH_MAX_SIZE
  scsi: target: fix hang when multiple threads try to destroy the same iscsi session
  scsi: target: remove boilerplate code
  kvm: x86: Host feature SSBD doesn't imply guest feature SPEC_CTRL_SSBD
  dm flakey: check for null arg_name in parse_features()
  ext4: do not zeroout extents beyond i_disksize
  mac80211_hwsim: Use kstrndup() in place of kasprintf()
  btrfs: check commit root generation in should_ignore_root
  tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation
  ALSA: usb-audio: Don't override ignore_ctl_error value from the map
  ASoC: Intel: mrfld: return error codes when an error occurs
  ASoC: Intel: mrfld: fix incorrect check on p->sink
  ext4: fix incorrect inodes per group in error message
  ext4: fix incorrect group count in ext4_fill_super error message
  pwm: pca9685: Fix PWM/GPIO inter-operation
  jbd2: improve comments about freeing data buffers whose page mapping is NULL
  scsi: ufs: Fix ufshcd_hold() caused scheduling while atomic
  net: stmmac: dwmac-sunxi: Provide TX and RX fifo sizes
  net: revert default NAPI poll timeout to 2 jiffies
  net: qrtr: send msgs from local of same id as broadcast
  net: ipv6: do not consider routes via gateways for anycast address check
  net: ipv4: devinet: Fix crash when add/del multicast IP with autojoin
  hsr: check protocol version in hsr_newlink()
  amd-xgbe: Use __napi_schedule() in BH context
  mfd: dln2: Fix sanity checking for endpoints
  misc: echo: Remove unnecessary parentheses and simplify check for zero
  powerpc/fsl_booke: Avoid creating duplicate tlb1 entry
  ipmi: fix hung processes in __get_guid()
  ftrace/kprobe: Show the maxactive number on kprobe_events
  drm: Remove PageReserved manipulation from drm_pci_alloc
  drm/dp_mst: Fix clearing payload state on topology disable
  crypto: caam - update xts sector size for large input length
  dm zoned: remove duplicate nr_rnd_zones increase in dmz_init_zone()
  btrfs: use nofs allocations for running delayed items
  Btrfs: fix crash during unmount due to race with delayed inode workers
  powerpc: Make setjmp/longjmp signature standard
  powerpc: Add attributes for setjmp/longjmp
  scsi: mpt3sas: Fix kernel panic observed on soft HBA unplug
  powerpc/kprobes: Ignore traps that happened in real mode
  powerpc/xive: Use XIVE_BAD_IRQ instead of zero to catch non configured IPIs
  powerpc/hash64/devmap: Use H_PAGE_THP_HUGE when setting up huge devmap PTE entries
  powerpc/64/tm: Don't let userspace set regs->trap via sigreturn
  powerpc/powernv/idle: Restore AMR/UAMOR/AMOR after idle
  libata: Return correct status in sata_pmp_eh_recover_pm() when ATA_DFLAG_DETACH is set
  hfsplus: fix crash and filesystem corruption when deleting files
  cpufreq: powernv: Fix use-after-free
  kmod: make request_module() return an error when autoloading is disabled
  Input: i8042 - add Acer Aspire 5738z to nomux list
  s390/diag: fix display of diagnose call statistics
  perf tools: Support Python 3.8+ in Makefile
  ocfs2: no need try to truncate file beyond i_size
  fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once()
  ext4: fix a data race at inode->i_blocks
  NFS: Fix a page leak in nfs_destroy_unlinked_subrequests()
  rtc: omap: Use define directive for PIN_CONFIG_ACTIVE_HIGH
  arm64: armv8_deprecated: Fix undef_hook mask for thumb setend
  scsi: zfcp: fix missing erp_lock in port recovery trigger for point-to-point
  dm verity fec: fix memory leak in verity_fec_dtr
  mm: Use fixed constant in page_frag_alloc instead of size + 1
  tools: gpio: Fix out-of-tree build regression
  x86/speculation: Remove redundant arch_smt_update() invocation
  powerpc/pseries: Drop pointless static qualifier in vpa_debugfs_init()
  net: rtnl_configure_link: fix dev flags changes arg to __dev_notify_flags
  ALSA: hda: Initialize power_state field properly
  crypto: mxs-dcp - fix scatterlist linearization for hash
  btrfs: drop block from cache on error in relocation
  CIFS: Fix bug which the return value by asynchronous read is error
  KVM: VMX: fix crash cleanup when KVM wasn't used
  KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support
  KVM: x86: Allocate new rmap and large page tracking when moving memslot
  KVM: s390: vsie: Fix delivery of addressing exceptions
  KVM: s390: vsie: Fix region 1 ASCE sanity shadow address checks
  KVM: nVMX: Properly handle userspace interrupt window request
  x86/entry/32: Add missing ASM_CLAC to general_protection entry
  signal: Extend exec_id to 64bits
  ath9k: Handle txpower changes even when TPC is disabled
  MIPS: OCTEON: irq: Fix potential NULL pointer dereference
  irqchip/versatile-fpga: Apply clear-mask earlier
  KEYS: reaching the keys quotas correctly
  PCI: endpoint: Fix for concurrent memory allocation in OB address region
  PCI/ASPM: Clear the correct bits when enabling L1 substates
  nvme-fc: Revert "add module to ops template to allow module references"
  thermal: devfreq_cooling: inline all stubs for CONFIG_DEVFREQ_THERMAL=n
  acpi/x86: ignore unspecified bit positions in the ACPI global lock field
  media: ti-vpe: cal: fix disable_irqs to only the intended target
  ALSA: hda/realtek - Set principled PC Beep configuration for ALC256
  ALSA: doc: Document PC Beep Hidden Register on Realtek ALC256
  ALSA: pcm: oss: Fix regression by buffer overflow fix
  ALSA: ice1724: Fix invalid access for enumerated ctl items
  ALSA: hda: Fix potential access overflow in beep helper
  ALSA: hda: Add driver blacklist
  ALSA: usb-audio: Add mixer workaround for TRX40 and co
  usb: gadget: composite: Inform controller driver of self-powered
  usb: gadget: f_fs: Fix use after free issue as part of queue failure
  ASoC: topology: use name_prefix for new kcontrol
  ASoC: dpcm: allow start or stop during pause for backend
  ASoC: dapm: connect virtual mux with default value
  ASoC: fix regwmask
  slub: improve bit diffusion for freelist ptr obfuscation
  misc: rtsx: set correct pcr_ops for rts522A
  uapi: rename ext2_swab() to swab() and share globally in swab.h
  btrfs: track reloc roots based on their commit root bytenr
  btrfs: remove a BUG_ON() from merge_reloc_roots()
  block, bfq: fix use-after-free in bfq_idle_slice_timer_body
  locking/lockdep: Avoid recursion in lockdep_count_{for,back}ward_deps()
  irqchip/gic-v4: Provide irq_retrigger to avoid circular locking dependency
  usb: dwc3: core: add support for disabling SS instances in park mode
  block: Fix use-after-free issue accessing struct io_cq
  genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()
  efi/x86: Ignore the memory attributes table on i386
  x86/boot: Use unsigned comparison for addresses
  gfs2: Don't demote a glock until its revokes are written
  libata: Remove extra scsi_host_put() in ata_scsi_add_hosts()
  PCI/switchtec: Fix init_completion race condition with poll_wait()
  selftests/x86/ptrace_syscall_32: Fix no-vDSO segfault
  sched: Avoid scale real weight down to zero
  irqchip/versatile-fpga: Handle chained IRQs properly
  block: keep bdi->io_pages in sync with max_sectors_kb for stacked devices
  x86: Don't let pgprot_modify() change the page encryption bit
  null_blk: fix spurious IO errors after failed past-wp access
  null_blk: Handle null_add_dev() failures properly
  null_blk: Fix the null_add_dev() error path
  i2c: st: fix missing struct parameter description
  qlcnic: Fix bad kzalloc null test
  cxgb4/ptp: pass the sign of offset delta in FW CMD
  hinic: fix wrong para of wait_for_completion_timeout
  hinic: fix a bug of waitting for IO stopped
  net: vxge: fix wrong __VA_ARGS__ usage
  bus: sunxi-rsb: Return correct data when mixing 16-bit and 8-bit reads
  ANDROID: fix wakeup reason findings
  UPSTREAM: gpu/trace: add a gpu total memory usage tracepoint
  CHROMIUM: drm/virtio: rebase zero-copy patches to virgl/drm-misc-next
  CHROMIUM: virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
  CHROMIUM: drm/virtgpu: add legacy VIRTIO_GPU_* values for non-upstream variants
  CHROMIUM: drm/virtgpu: fix various warnings
  CHROMIUM: drm/virtgpu: implement metadata allocation ioctl
  CHROMIUM: drm/virtgpu: introduce request IDRs
  CHROMIUM: drm/virtgpu: implement DRM_VIRTGPU_RESOURCE_CREATE_V2
  CHROMIUM: drm/virtgpu: add stub ioctl implementation
  CHROMIUM: drm/virtgpu: check for revelant capabilites
  CHROMIUM: drm/virtgpu: add memory type to virtio_gpu_object_params
  CHROMIUM: drm/virtgpu: make memory and resource creation opaque
  CHROMIUM: virtio-gpu api: VIRTIO_GPU_F_MEMORY
  CHROMIUM: virtwl: store plane info per virtio_gpu_object
  CHROMIUM: drm/virtgpu: expose new ioctls to userspace
  BACKPORT: drm/virtio: move virtio_gpu_object_{attach, detach} calls.
  ANDROID: drm: ttm: Add ttm_tt_create2 driver hook
  UPSTREAM: virtio-gpu api: comment feature flags
  UPSTREAM: drm/virtio: module_param_named() requires linux/moduleparam.h
  BACKPORT: drm/virtio: fix resource id creation race
  BACKPORT: drm/virtio: make resource id workaround runtime switchable.
  BACKPORT: drm/virtio: do NOT reuse resource ids
  BACKPORT: drm/virtio: Drop deprecated load/unload initialization
  f2fs: fix quota_sync failure due to f2fs_lock_op
  f2fs: support read iostat
  f2fs: Fix the accounting of dcc->undiscard_blks
  f2fs: fix to handle error path of f2fs_ra_meta_pages()
  f2fs: report the discard cmd errors properly
  f2fs: fix long latency due to discard during umount
  f2fs: add tracepoint for f2fs iostat
  f2fs: introduce sysfs/data_io_flag to attach REQ_META/FUA
  UPSTREAM: kheaders: include only headers into kheaders_data.tar.xz
  UPSTREAM: kheaders: remove meaningless -R option of 'ls'
  ANDROID: Incremental fs: Fix create_file performance
  ANDROID: Incremental fs: Fix compound page usercopy crash
  ANDROID: Incremental fs: Clean up incfs_test build process
  ANDROID: Incremental fs: make remount log buffer change atomic
  ANDROID: Incremental fs: Optimize get_filled_block
  ANDROID: Incremental fs: Fix mislabeled __user ptrs
  ANDROID: Incremental fs: Use 64-bit int for file_size when writing hash blocks
  Revert "ANDROID: Incremental fs: Fix initialization, use of bitfields"
  Linux 4.14.176
  drm/msm: Use the correct dma_sync calls in msm_gem
  rpmsg: glink: smem: Support rx peak for size less than 4 bytes
  drm_dp_mst_topology: fix broken drm_dp_sideband_parse_remote_dpcd_read()
  usb: dwc3: don't set gadget->is_otg flag
  rpmsg: glink: Remove chunk size word align warning
  arm64: Fix size of __early_cpu_boot_status
  drm/msm: stop abusing dma_map/unmap for cache
  clk: qcom: rcg: Return failure for RCG update
  acpi/nfit: Fix bus command validation
  fbcon: fix null-ptr-deref in fbcon_switch
  RDMA/cm: Update num_paths in cma_resolve_iboe_route error flow
  Bluetooth: RFCOMM: fix ODEBUG bug in rfcomm_dev_ioctl
  ceph: canonicalize server path in place
  ceph: remove the extra slashes in the server path
  IB/hfi1: Fix memory leaks in sysfs registration and unregistration
  IB/hfi1: Call kobject_put() when kobject_init_and_add() fails
  ASoC: jz4740-i2s: Fix divider written at incorrect offset in register
  hwrng: imx-rngc - fix an error path
  tools/accounting/getdelays.c: fix netlink attribute length
  random: always use batched entropy for get_random_u{32,64}
  mlxsw: spectrum_flower: Do not stop at FLOW_ACTION_VLAN_MANGLE
  slcan: Don't transmit uninitialized stack data in padding
  net: stmmac: dwmac1000: fix out-of-bounds mac address reg setting
  net: phy: micrel: kszphy_resume(): add delay after genphy_resume() before accessing PHY registers
  net: dsa: bcm_sf2: Ensure correct sub-node is parsed
  ipv6: don't auto-add link-local address to lag ports
  mm: mempolicy: require at least one nodeid for MPOL_PREFERRED
  padata: always acquire cpu_hotplug_lock before pinst->lock
  coresight: do not use the BIT() macro in the UAPI header
  misc: pci_endpoint_test: Fix to support > 10 pci-endpoint-test devices
  blk-mq: Allow blocking queue tag iter callbacks
  blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter
  drm/etnaviv: replace MMU flush marker with flush sequence
  tools/power turbostat: Fix gcc build warnings
  initramfs: restore default compression behavior
  drm/bochs: downgrade pci_request_region failure from error to warning
  sctp: fix possibly using a bad saddr with a given dst
  sctp: fix refcount bug in sctp_wfree
  net, ip_tunnel: fix interface lookup with no key
  ipv4: fix a RCU-list lock in fib_triestat_seq_show
  ANDROID: power: wakeup_reason: wake reason enhancements
  ubifs: wire up FS_IOC_GET_ENCRYPTION_NONCE
  f2fs: wire up FS_IOC_GET_ENCRYPTION_NONCE
  ext4: wire up FS_IOC_GET_ENCRYPTION_NONCE
  fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl
  FROMLIST: power_supply: Add additional health properties to the header
  UPSTREAM: power: supply: core: Update sysfs-class-power ABI document
  BACKPORT: FROMGIT: kbuild: mkcompile_h: Include $LD version in /proc/version
  ANDROID: fscrypt: fall back to filesystem-layer crypto when needed
  ANDROID: block: require drivers to declare supported crypto key type(s)
  ANDROID: block: make blk_crypto_start_using_mode() properly check for support
  f2fs: keep inline_data when compression conversion
  f2fs: fix to disable compression on directory
  f2fs: add missing CONFIG_F2FS_FS_COMPRESSION
  f2fs: switch discard_policy.timeout to bool type
  f2fs: fix to verify tpage before releasing in f2fs_free_dic()
  f2fs: show compression in statx
  f2fs: clean up dic->tpages assignment
  f2fs: compress: support zstd compress algorithm
  f2fs: compress: add .{init,destroy}_decompress_ctx callback
  f2fs: compress: fix to call missing destroy_compress_ctx()
  f2fs: change default compression algorithm
  f2fs: clean up {cic,dic}.ref handling
  f2fs: fix to use f2fs_readpage_limit() in f2fs_read_multi_pages()
  f2fs: xattr.h: Make stub helpers inline
  f2fs: fix to avoid double unlock
  f2fs: fix potential .flags overflow on 32bit architecture
  f2fs: fix NULL pointer dereference in f2fs_verity_work()
  f2fs: fix to clear PG_error if fsverity failed
  f2fs: don't call fscrypt_get_encryption_info() explicitly in f2fs_tmpfile()
  f2fs: don't trigger data flush in foreground operation
  f2fs: fix NULL pointer dereference in f2fs_write_begin()
  f2fs: clean up f2fs_may_encrypt()
  f2fs: fix to avoid potential deadlock
  f2fs: don't change inode status under page lock
  f2fs: fix potential deadlock on compressed quota file
  f2fs: delete DIO read lock
  f2fs: don't mark compressed inode dirty during f2fs_iget()
  f2fs: fix to account compressed blocks in f2fs_compressed_blocks()
  f2fs: xattr.h: Replace zero-length array with flexible-array member
  f2fs: fix to update f2fs_super_block fields under sb_lock
  f2fs: Add a new CP flag to help fsck fix resize SPO issues
  f2fs: Fix mount failure due to SPO after a successful online resize FS
  f2fs: use kmem_cache pool during inline xattr lookups
  f2fs: skip migration only when BG_GC is called
  f2fs: fix to show tracepoint correctly
  f2fs: avoid __GFP_NOFAIL in f2fs_bio_alloc
  f2fs: introduce F2FS_IOC_GET_COMPRESS_BLOCKS
  f2fs: fix to avoid triggering IO in write path
  f2fs: add prefix for f2fs slab cache name
  f2fs: introduce DEFAULT_IO_TIMEOUT
  f2fs: skip GC when section is full
  f2fs: add migration count iff migration happens
  f2fs: clean up bggc mount option
  f2fs: clean up lfs/adaptive mount option
  f2fs: fix to show norecovery mount option
  f2fs: clean up parameter of macro XATTR_SIZE()
  f2fs: clean up codes with {f2fs_,}data_blkaddr()
  f2fs: show mounted time
  f2fs: Use scnprintf() for avoiding potential buffer overflow
  f2fs: allow to clear F2FS_COMPR_FL flag
  f2fs: fix to check dirty pages during compressed inode conversion
  f2fs: fix to account compressed inode correctly
  f2fs: fix wrong check on F2FS_IOC_FSSETXATTR
  f2fs: fix to avoid use-after-free in f2fs_write_multi_pages()
  f2fs: fix to avoid using uninitialized variable
  f2fs: fix inconsistent comments
  f2fs: remove i_sem lock coverage in f2fs_setxattr()
  f2fs: cover last_disk_size update with spinlock
  f2fs: fix to check i_compr_blocks correctly
  FROMLIST: kmod: make request_module() return an error when autoloading is disabled
  UPSTREAM: loop: Only freeze block queue when needed.
  UPSTREAM: loop: Only change blocksize when needed.
  ANDROID: Incremental fs: Fix remount
  ANDROID: Incremental fs: Protect get_fill_block, and add a field
  ANDROID: Incremental fs: Fix crash polling 0 size read_log
  ANDROID: Incremental fs: get_filled_blocks: better index_out
  ANDROID: Fix wq fp check for CFI builds
  ANDROID: Incremental fs: Fix four resource bugs
  ANDROID: kbuild: ensure __cfi_check is correctly aligned
  ANDROID: kbuild: fix module linker script flags for LTO
  Linux 4.14.175
  arm64: dts: ls1046ardb: set RGMII interfaces to RGMII_ID mode
  arm64: dts: ls1043a-rdb: correct RGMII delay mode to rgmii-id
  ARM: bcm2835-rpi-zero-w: Add missing pinctrl name
  ARM: dts: oxnas: Fix clear-mask property
  perf map: Fix off by one in strncpy() size argument
  arm64: alternative: fix build with clang integrated assembler
  net: ks8851-ml: Fix IO operations, again
  gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 CHT + AXP288 model
  bpf: Explicitly memset some bpf info structures declared on the stack
  bpf: Explicitly memset the bpf_attr structure
  platform/x86: pmc_atom: Add Lex 2I385SW to critclk_systems DMI table
  vt: vt_ioctl: fix use-after-free in vt_in_use()
  vt: vt_ioctl: fix VT_DISALLOCATE freeing in-use virtual console
  vt: vt_ioctl: remove unnecessary console allocation checks
  vt: switch vt_dont_switch to bool
  vt: ioctl, switch VT_IS_IN_USE and VT_BUSY to inlines
  vt: selection, introduce vc_is_sel
  mac80211: fix authentication with iwlwifi/mvm
  mac80211: Check port authorization in the ieee80211_tx_dequeue() case
  media: xirlink_cit: add missing descriptor sanity checks
  media: stv06xx: add missing descriptor sanity checks
  media: dib0700: fix rc endpoint lookup
  media: ov519: add missing endpoint sanity checks
  libfs: fix infoleak in simple_attr_read()
  staging: wlan-ng: fix use-after-free Read in hfa384x_usbin_callback
  staging: wlan-ng: fix ODEBUG bug in prism2sta_disconnect_usb
  staging: rtl8188eu: Add ASUS USB-N10 Nano B1 to device table
  media: usbtv: fix control-message timeouts
  media: flexcop-usb: fix endpoint sanity check
  usb: musb: fix crash with highmen PIO and usbmon
  USB: serial: io_edgeport: fix slab-out-of-bounds read in edge_interrupt_callback
  USB: cdc-acm: restore capability check order
  USB: serial: option: add Wistron Neweb D19Q1
  USB: serial: option: add BroadMobi BM806U
  USB: serial: option: add support for ASKEY WWHC050
  afs: Fix some tracing details
  Input: raydium_i2c_ts - fix error codes in raydium_i2c_boot_trigger()
  Input: raydium_i2c_ts - use true and false for boolean values
  vti6: Fix memory leak of skb if input policy check fails
  netfilter: nft_fwd_netdev: validate family and chain type
  xfrm: policy: Fix doulbe free in xfrm_policy_timer
  xfrm: add the missing verify_sec_ctx_len check in xfrm_add_acquire
  xfrm: fix uctx len check in verify_sec_ctx_len
  RDMA/mlx5: Block delay drop to unprivileged users
  vti[6]: fix packet tx through bpf_redirect() in XinY cases
  xfrm: handle NETDEV_UNREGISTER for xfrm device
  genirq: Fix reference leaks on irq affinity notifiers
  RDMA/core: Ensure security pkey modify is not lost
  gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 BYT + AXP288 model
  gpiolib: acpi: Rework honor_wakeup option into an ignore_wake option
  gpiolib: acpi: Correct comment for HP x2 10 honor_wakeup quirk
  mac80211: mark station unauthorized before key removal
  scsi: sd: Fix optimal I/O size for devices that change reported values
  scripts/dtc: Remove redundant YYLOC global declaration
  tools: Let O= makes handle a relative path with -C option
  perf probe: Do not depend on dwfl_module_addrsym()
  ARM: dts: omap5: Add bus_dma_limit for L3 bus
  ARM: dts: dra7: Add bus_dma_limit for L3 bus
  Input: avoid BIT() macro usage in the serio.h UAPI header
  Input: synaptics - enable RMI on HP Envy 13-ad105ng
  i2c: hix5hd2: add missed clk_disable_unprepare in remove
  ftrace/x86: Anotate text_mutex split between ftrace_arch_code_modify_post_process() and ftrace_arch_code_modify_prepare()
  arm64: compat: map SPSR_ELx<->PSR for signals
  arm64: ptrace: map SPSR_ELx<->PSR for compat tasks
  sxgbe: Fix off by one in samsung driver strncpy size arg
  dpaa_eth: Remove unnecessary boolean expression in dpaa_get_headroom
  mac80211: Do not send mesh HWMP PREQ if HWMP is disabled
  scsi: ipr: Fix softlockup when rescanning devices in petitboot
  fsl/fman: detect FMan erratum A050385
  arm64: dts: ls1043a: FMan erratum A050385
  dt-bindings: net: FMan erratum A050385
  cgroup1: don't call release_agent when it is ""
  drivers/of/of_mdio.c:fix of_mdiobus_register()
  cpupower: avoid multiple definition with gcc -fno-common
  cgroup-v1: cgroup_pidlist_next should update position index
  net: ipv4: don't let PMTU updates increase route MTU
  hsr: set .netnsok flag
  hsr: add restart routine into hsr_get_node_list()
  hsr: use rcu_read_lock() in hsr_get_node_{list/status}()
  vxlan: check return value of gro_cells_init()
  net: dsa: mt7530: Change the LINK bit to reflect the link status
  bnxt_en: fix memory leaks in bnxt_dcbnl_ieee_getets()
  slcan: not call free_netdev before rtnl_unlock in slcan_open
  NFC: fdp: Fix a signedness bug in fdp_nci_send_patch()
  net: stmmac: dwmac-rk: fix error path in rk_gmac_probe
  net_sched: keep alloc_hash updated after hash allocation
  net_sched: cls_route: remove the right filter from hashtable
  net: qmi_wwan: add support for ASKEY WWHC050
  net/packet: tpacket_rcv: avoid a producer race condition
  net: mvneta: Fix the case where the last poll did not process all rx
  net: dsa: Fix duplicate frames flooded by learning
  macsec: restrict to ethernet devices
  hsr: fix general protection fault in hsr_addr_is_self()
  Revert "drm/dp_mst: Skip validating ports during destruction, just ref"
  staging: greybus: loopback_test: fix potential path truncations
  staging: greybus: loopback_test: fix potential path truncation
  drm/bridge: dw-hdmi: fix AVI frame colorimetry
  arm64: smp: fix crash_smp_send_stop() behaviour
  arm64: smp: fix smp_send_stop() behaviour
  ALSA: hda/realtek: Fix pop noise on ALC225
  Revert "ipv6: Fix handling of LLA with VRF and sockets bound to VRF"
  Revert "vrf: mark skb for multicast or link-local as enslaved to VRF"
  futex: Unbreak futex hashing
  futex: Fix inode life-time issue
  kbuild: Disable -Wpointer-to-enum-cast
  iio: adc: at91-sama5d2_adc: fix differential channels in triggered mode
  iio: adc: at91-sama5d2_adc: fix channel configuration for differential channels
  USB: cdc-acm: fix rounding error in TIOCSSERIAL
  USB: cdc-acm: fix close_delay and closing_wait units in TIOCSSERIAL
  x86/mm: split vmalloc_sync_all()
  page-flags: fix a crash at SetPageError(THP_SWAP)
  mm, slub: prevent kmalloc_node crashes and memory leaks
  mm: slub: be more careful about the double cmpxchg of freelist
  memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event
  xhci: Do not open code __print_symbolic() in xhci trace events
  rtc: max8907: add missing select REGMAP_IRQ
  intel_th: pci: Add Elkhart Lake CPU support
  intel_th: Fix user-visible error codes
  staging/speakup: fix get_word non-space look-ahead
  staging: rtl8188eu: Add device id for MERCUSYS MW150US v2
  mmc: sdhci-of-at91: fix cd-gpios for SAMA5D2
  iio: magnetometer: ak8974: Fix negative raw values in sysfs
  iio: trigger: stm32-timer: disable master mode when stopping
  ALSA: pcm: oss: Remove WARNING from snd_pcm_plug_alloc() checks
  ALSA: pcm: oss: Avoid plugin buffer overflow
  ALSA: seq: oss: Fix running status after receiving sysex
  ALSA: seq: virmidi: Fix running status after receiving sysex
  ALSA: line6: Fix endless MIDI read loop
  usb: xhci: apply XHCI_SUSPEND_DELAY to AMD XHCI controller 1022:145c
  USB: serial: pl2303: add device-id for HP LD381
  usb: host: xhci-plat: add a shutdown
  USB: serial: option: add ME910G1 ECM composition 0x110b
  usb: quirks: add NO_LPM quirk for RTL8153 based ethernet adapters
  USB: Disable LPM on WD19's Realtek Hub
  parse-maintainers: Mark as executable
  block, bfq: fix overwrite of bfq_group pointer in bfq_find_set_group()
  xenbus: req->err should be updated before req->state
  xenbus: req->body should be updated before req->state
  dm bio record: save/restore bi_end_io and bi_integrity
  altera-stapl: altera_get_note: prevent write beyond end of 'key'
  drivers/perf: arm_pmu_acpi: Fix incorrect checking of gicc pointer
  drm/exynos: dsi: fix workaround for the legacy clock name
  drm/exynos: dsi: propagate error value and silence meaningless warning
  spi/zynqmp: remove entry that causes a cs glitch
  spi: pxa2xx: Add CS control clock quirk
  ARM: dts: dra7: Add "dma-ranges" property to PCIe RC DT nodes
  powerpc: Include .BTF section
  spi: qup: call spi_qup_pm_resume_runtime before suspending
  UPSTREAM: ubifs: wire up FS_IOC_GET_ENCRYPTION_NONCE
  UPSTREAM: f2fs: wire up FS_IOC_GET_ENCRYPTION_NONCE
  UPSTREAM: ext4: wire up FS_IOC_GET_ENCRYPTION_NONCE
  UPSTREAM: fscrypt: add FS_IOC_GET_ENCRYPTION_NONCE ioctl
  UPSTREAM: usb: raw_gadget: fix compilation warnings in uapi headers
  BACKPORT: usb: gadget: add raw-gadget interface
  UPSTREAM: usb: gadget: move choice ... endchoice to legacy/Kconfig
  ANDROID: clang: update to 10.0.5
  FROMLIST: arm64: define __alloc_zeroed_user_highpage
  ANDROID: Incremental fs: Add INCFS_IOC_GET_FILLED_BLOCKS
  ANDROID: Incremental fs: Fix two typos
  f2fs: fix to avoid potential deadlock
  f2fs: add missing function name in kernel message
  f2fs: recycle unused compress_data.chksum feild
  f2fs: fix to avoid NULL pointer dereference
  f2fs: fix leaking uninitialized memory in compressed clusters
  f2fs: fix the panic in do_checkpoint()
  f2fs: fix to wait all node page writeback
  mm/swapfile.c: move inode_lock out of claim_swapfile
  UPSTREAM: ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier
  UPSTREAM: ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier
  ANDROID: dm-bow: Fix free_show value is incorrect
  UPSTREAM: coresight: Potential uninitialized variable in probe()
  ANDROID: kbuild: do not merge .section..* into .section in modules
  ANDROID: scsi: ufs: add ->map_sg_crypto() variant op
  UPSTREAM: bpf: Explicitly memset some bpf info structures declared on the stack
  UPSTREAM: bpf: Explicitly memset the bpf_attr structure
  Linux 4.14.174
  ipv4: ensure rcu_read_lock() in cipso_v4_error()
  mm: slub: add missing TID bump in kmem_cache_alloc_bulk()
  ARM: 8958/1: rename missed uaccess .fixup section
  ARM: 8957/1: VDSO: Match ARMv8 timer in cntvct_functional()
  jbd2: fix data races at struct journal_head
  net: rmnet: fix NULL pointer dereference in rmnet_newlink()
  hinic: fix a bug of setting hw_ioctxt
  slip: not call free_netdev before rtnl_unlock in slip_open
  signal: avoid double atomic counter increments for user accounting
  mac80211: rx: avoid RCU list traversal under mutex
  net: ks8851-ml: Fix IRQ handling and locking
  net: usb: qmi_wwan: restore mtu min/max values after raw_ip switch
  scsi: libfc: free response frame from GPN_ID
  cfg80211: check reg_rule for NULL in handle_channel_custom()
  HID: i2c-hid: add Trekstor Surfbook E11B to descriptor override
  HID: apple: Add support for recent firmware on Magic Keyboards
  ACPI: watchdog: Allow disabling WDAT at boot
  perf/amd/uncore: Replace manual sampling check with CAP_NO_INTERRUPT flag
  batman-adv: Don't schedule OGM for disabled interface
  batman-adv: Avoid free/alloc race when handling OGM buffer
  batman-adv: Avoid free/alloc race when handling OGM2 buffer
  batman-adv: Fix duplicated OGMs on NETDEV_UP
  batman-adv: Fix debugfs path for renamed softif
  batman-adv: Fix debugfs path for renamed hardif
  batman-adv: prevent TT request storms by not sending inconsistent TT TLVLs
  batman-adv: Fix TT sync flags for intermediate TT responses
  batman-adv: Avoid race in TT TVLV allocator helper
  batman-adv: update data pointers after skb_cow()
  batman-adv: Fix internal interface indices types
  batman-adv: Fix lock for ogm cnt access in batadv_iv_ogm_calc_tq
  batman-adv: Fix check of retrieved orig_gw in batadv_v_gw_is_eligible
  batman-adv: Always initialize fragment header priority
  batman-adv: Avoid spurious warnings from bat_v neigh_cmp implementation
  efi: Add a sanity check to efivar_store_raw()
  net/smc: check for valid ib_client_data
  ipv6: restrict IPV6_ADDRFORM operation
  i2c: acpi: put device when verifying client fails
  iommu/vt-d: Ignore devices with out-of-spec domain number
  iommu/vt-d: Fix the wrong printing in RHSA parsing
  netfilter: nft_payload: add missing attribute validation for payload csum flags
  netfilter: cthelper: add missing attribute validation for cthelper
  nl80211: add missing attribute validation for channel switch
  nl80211: add missing attribute validation for beacon report scanning
  nl80211: add missing attribute validation for critical protocol indication
  pinctrl: core: Remove extra kref_get which blocks hogs being freed
  pinctrl: meson-gxl: fix GPIOX sdio pins
  iommu/vt-d: Fix a bug in intel_iommu_iova_to_phys() for huge page
  iommu/vt-d: dmar: replace WARN_TAINT with pr_warn + add_taint
  iommu/dma: Fix MSI reservation allocation
  x86/mce: Fix logic and comments around MSR_PPIN_CTL
  efi: Fix a race and a buffer overflow while reading efivars via sysfs
  ARC: define __ALIGN_STR and __ALIGN symbols for ARC
  KVM: x86: clear stale x86_emulate_ctxt->intercept value
  gfs2_atomic_open(): fix O_EXCL|O_CREAT handling on cold dcache
  cifs_atomic_open(): fix double-put on late allocation failure
  ktest: Add timeout for ssh sync testing
  drm/amd/display: remove duplicated assignment to grph_obj_type
  workqueue: don't use wq_select_unbound_cpu() for bound works
  iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint
  virtio-blk: fix hw_queue stopped on arbitrary error
  iwlwifi: mvm: Do not require PHY_SKU NVM section for 3168 devices
  cgroup: Iterate tasks that did not finish do_exit()
  cgroup: cgroup_procs_next should increase position index
  ipvlan: don't deref eth hdr before checking it's set
  ipvlan: egress mcast packets are not exceptional
  ipvlan: do not add hardware address of master to its unicast filter list
  inet_diag: return classid for all socket types
  macvlan: add cond_resched() during multicast processing
  net: fec: validate the new settings in fec_enet_set_coalesce()
  slip: make slhc_compress() more robust against malicious packets
  bonding/alb: make sure arp header is pulled before accessing it
  net: phy: fix MDIO bus PM PHY resuming
  nfc: add missing attribute validation for vendor subcommand
  nfc: add missing attribute validation for SE API
  team: add missing attribute validation for array index
  team: add missing attribute validation for port ifindex
  net: fq: add missing attribute validation for orphan mask
  macsec: add missing attribute validation for port
  can: add missing attribute validation for termination
  nl802154: add missing attribute validation for dev_type
  nl802154: add missing attribute validation
  fib: add missing attribute validation for tun_id
  net: memcg: fix lockdep splat in inet_csk_accept()
  net: memcg: late association of sock to memcg
  cgroup: memcg: net: do not associate sock with unrelated cgroup
  bnxt_en: reinitialize IRQs when MTU is modified
  sfc: detach from cb_page in efx_copy_channel()
  r8152: check disconnect status after long sleep
  net/packet: tpacket_rcv: do not increment ring index on drop
  net: nfc: fix bounds checking bugs on "pipe"
  net: macsec: update SCI upon MAC address change.
  netlink: Use netlink header as base to calculate bad attribute offset
  ipvlan: do not use cond_resched_rcu() in ipvlan_process_multicast()
  ipvlan: add cond_resched_rcu() while processing muticast backlog
  ipv6/addrconf: call ipv6_mc_up() for non-Ethernet interface
  gre: fix uninit-value in __iptunnel_pull_header
  cgroup, netclassid: periodically release file_lock on classid updating
  net: phy: Avoid multiple suspends
  phy: Revert toggling reset changes.
  ANDROID: Incremental fs: Add INCFS_IOC_PERMIT_FILL
  ANDROID: Incremental fs: Remove signature checks from kernel
  ANDROID: Incremental fs: Pad hash blocks
  ANDROID: Incremental fs: Make fill block an ioctl
  ANDROID: Incremental fs: Remove all access_ok checks
  UPSTREAM: cgroup: Iterate tasks that did not finish do_exit()
  UPSTREAM: arm64: memory: Add missing brackets to untagged_addr() macro
  UPSTREAM: mm: Avoid creating virtual address aliases in brk()/mmap()/mremap()
  ANDROID: Add TPM support and the vTPM proxy to Cuttlefish.
  ANDROID: serdev: restrict claim of platform devices
  UPSTREAM: fscrypt: don't evict dirty inodes after removing key
  fscrypt: don't evict dirty inodes after removing key
  Linux 4.14.173
  ASoC: topology: Fix memleak in soc_tplg_manifest_load()
  xhci: handle port status events for removed USB3 hcd
  dm integrity: fix a deadlock due to offloading to an incorrect workqueue
  powerpc: fix hardware PMU exception bug on PowerVM compatibility mode systems
  dmaengine: coh901318: Fix a double lock bug in dma_tc_handle()
  hwmon: (adt7462) Fix an error return in ADT7462_REG_VOLT()
  ARM: imx: build v7_cpu_resume() unconditionally
  IB/hfi1, qib: Ensure RCU is locked when accessing list
  RMDA/cm: Fix missing ib_cm_destroy_id() in ib_cm_insert_listen()
  RDMA/iwcm: Fix iwcm work deallocation
  ASoC: dapm: Correct DAPM handling of active widgets during shutdown
  ASoC: pcm512x: Fix unbalanced regulator enable call in probe error path
  ASoC: pcm: Fix possible buffer overflow in dpcm state sysfs output
  ASoC: intel: skl: Fix possible buffer overflow in debug outputs
  ASoC: intel: skl: Fix pin debug prints
  ASoC: topology: Fix memleak in soc_tplg_link_elems_load()
  ARM: dts: ls1021a: Restore MDIO compatible to gianfar
  dm cache: fix a crash due to incorrect work item cancelling
  dmaengine: tegra-apb: Prevent race conditions of tasklet vs free list
  dmaengine: tegra-apb: Fix use-after-free
  x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes
  vt: selection, push sel_lock up
  vt: selection, push console lock down
  vt: selection, close sel_buffer race
  serial: 8250_exar: add support for ACCES cards
  tty:serial:mvebu-uart:fix a wrong return
  arm: dts: dra76x: Fix mmc3 max-frequency
  fat: fix uninit-memory access for partial initialized inode
  mm, numa: fix bad pmd by atomically check for pmd_trans_huge when marking page tables prot_numa
  vgacon: Fix a UAF in vgacon_invert_region
  usb: core: port: do error out if usb_autopm_get_interface() fails
  usb: core: hub: do error out if usb_autopm_get_interface() fails
  usb: core: hub: fix unhandled return by employing a void function
  usb: quirks: add NO_LPM quirk for Logitech Screen Share
  usb: storage: Add quirk for Samsung Fit flash
  cifs: don't leak -EAGAIN for stat() during reconnect
  net: thunderx: workaround BGX TX Underflow issue
  x86/xen: Distribute switch variables for initialization
  nvme: Fix uninitialized-variable warning
  x86/boot/compressed: Don't declare __force_order in kaslr_64.c
  s390/cio: cio_ignore_proc_seq_next should increase position index
  watchdog: da9062: do not ping the hw during stop()
  net: ks8851-ml: Fix 16-bit IO operation
  net: ks8851-ml: Fix 16-bit data access
  net: ks8851-ml: Remove 8-bit bus accessors
  drm/msm/dsi: save pll state before dsi host is powered off
  drm: msm: Fix return type of dsi_mgr_connector_mode_valid for kCFI
  drm/msm/mdp5: rate limit pp done timeout warnings
  usb: gadget: serial: fix Tx stall after buffer overflow
  usb: gadget: ffs: ffs_aio_cancel(): Save/restore IRQ flags
  usb: gadget: composite: Support more than 500mA MaxPower
  selftests: fix too long argument
  serial: ar933x_uart: set UART_CS_{RX,TX}_READY_ORIDE
  kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic
  RDMA/core: Fix use of logical OR in get_new_pps
  RDMA/core: Fix pkey and port assignment in get_new_pps
  net: dsa: bcm_sf2: Forcibly configure IMP port for 1Gb/sec
  EDAC/amd64: Set grain per DIMM
  x86/mce: Handle varying MCA bank counts
  vhost: Check docket sk_family instead of call getname
  audit: always check the netlink payload length in audit_receive_msg()
  Revert "char/random: silence a lockdep splat with printk()"
  mm, thp: fix defrag setting if newline is not used
  mm/huge_memory.c: use head to check huge zero page
  perf hists browser: Restore ESC as "Zoom out" of DSO/thread/etc
  kprobes: Set unoptimized flag after unoptimizing code
  drivers: net: xgene: Fix the order of the arguments of 'alloc_etherdev_mqs()'
  tuntap: correctly set SOCKWQ_ASYNC_NOSPACE
  KVM: Check for a bad hva before dropping into the ghc slow path
  KVM: SVM: Override default MMIO mask if memory encryption is enabled
  mwifiex: drop most magic numbers from mwifiex_process_tdls_action_frame()
  namei: only return -ECHILD from follow_dotdot_rcu()
  net: ena: make ena rxfh support ETH_RSS_HASH_NO_CHANGE
  net: atlantic: fix potential error handling
  net: netlink: cap max groups which will be considered in netlink_bind()
  include/linux/bitops.h: introduce BITS_PER_TYPE
  ecryptfs: Fix up bad backport of fe2e082f5da5b4a0a92ae32978f81507ef37ec66
  usb: charger: assign specific number for enum value
  drm/i915/gvt: Separate display reset from ALL_ENGINES reset
  i2c: jz4780: silence log flood on txabrt
  i2c: altera: Fix potential integer overflow
  MIPS: VPE: Fix a double free and a memory leak in 'release_vpe()'
  HID: hiddev: Fix race in in hiddev_disconnect()
  Revert "PM / devfreq: Modify the device name as devfreq(X) for sysfs"
  tracing: Disable trace_printk() on post poned tests
  HID: core: increase HID report buffer size to 8KiB
  HID: core: fix off-by-one memset in hid_report_raw_event()
  HID: ite: Only bind to keyboard USB interface on Acer SW5-012 keyboard dock
  KVM: VMX: check descriptor table exits on instruction emulation
  ACPI: watchdog: Fix gas->access_width usage
  ACPICA: Introduce ACPI_ACCESS_BYTE_WIDTH() macro
  audit: fix error handling in audit_data_to_entry()
  ext4: potential crash on allocation error in ext4_alloc_flex_bg_array()
  net: sched: correct flower port blocking
  qede: Fix race between rdma destroy workqueue and link change event
  ipv6: Fix route replacement with dev-only route
  ipv6: Fix nlmsg_flags when splitting a multipath route
  sctp: move the format error check out of __sctp_sf_do_9_1_abort
  nfc: pn544: Fix occasional HW initialization failure
  net: phy: restore mdio regs in the iproc mdio driver
  net: fib_rules: Correctly set table field when table number exceeds 8 bits
  sysrq: Remove duplicated sysrq message
  sysrq: Restore original console_loglevel when sysrq disabled
  cfg80211: add missing policy for NL80211_ATTR_STATUS_CODE
  cifs: Fix mode output in debugging statements
  net: ena: ena-com.c: prevent NULL pointer dereference
  net: ena: ethtool: use correct value for crc32 hash
  net: ena: fix incorrectly saving queue numbers when setting RSS indirection table
  net: ena: rss: store hash function as values and not bits
  net: ena: rss: fix failure to get indirection table
  net: ena: fix incorrect default RSS key
  net: ena: add missing ethtool TX timestamping indication
  net: ena: fix uses of round_jiffies()
  net: ena: fix potential crash when rxfh key is NULL
  qmi_wwan: unconditionally reject 2 ep interfaces
  qmi_wwan: re-add DW5821e pre-production variant
  cfg80211: check wiphy driver existence for drvinfo report
  mac80211: consider more elements in parsing CRC
  dax: pass NOWAIT flag to iomap_apply
  drm/msm: Set dma maximum segment size for mdss
  ipmi:ssif: Handle a possible NULL pointer reference
  ext4: fix potential race between s_group_info online resizing and access
  ext4: fix potential race between s_flex_groups online resizing and access
  ext4: fix potential race between online resizing and write operations
  netfilter: nf_conntrack: resolve clash for matching conntracks
  iwlwifi: pcie: fix rb_allocator workqueue allocation
  FROMLIST: f2fs: fix wrong check on F2FS_IOC_FSSETXATTR
  UPSTREAM: binder: prevent UAF for binderfs devices II
  UPSTREAM: binder: prevent UAF for binderfs devices
  FROMLIST: lib: test_stackinit.c: XFAIL switch variable init tests
  ANDROID: cuttlefish: disable KPROBES
  ANDROID: scsi: ufs: allow ufs variants to override sg entry size
  FROMLIST: ufs: fix a bug on printing PRDT
  BACKPORT: loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl
  ANDROID: fix build issue in security/selinux/avc.c
  ANDROID: cuttlefish_defconfig: Disable CONFIG_RT_GROUP_SCHED
  ANDROID: Enable HID_NINTENDO as y
  FROMLIST: HID: nintendo: add nintendo switch controller driver
  Linux 4.14.172
  s390/mm: Explicitly compare PAGE_DEFAULT_KEY against zero in storage_key_init_range
  xen: Enable interrupts when calling _cond_resched()
  ata: ahci: Add shutdown to freeze hardware resources of ahci
  netfilter: xt_hashlimit: limit the max size of hashtable
  ALSA: seq: Fix concurrent access to queue current tick/time
  ALSA: seq: Avoid concurrent access to queue flags
  ALSA: rawmidi: Avoid bit fields for state flags
  genirq/proc: Reject invalid affinity masks (again)
  iommu/vt-d: Fix compile warning from intel-svm.h
  ecryptfs: replace BUG_ON with error handling code
  staging: greybus: use after free in gb_audio_manager_remove_all()
  staging: rtl8723bs: fix copy of overlapping memory
  usb: gadget: composite: Fix bMaxPower for SuperSpeedPlus
  scsi: Revert "target: iscsi: Wait for all commands to finish before freeing a session"
  scsi: Revert "RDMA/isert: Fix a recently introduced regression related to logout"
  Btrfs: fix btrfs_wait_ordered_range() so that it waits for all ordered extents
  btrfs: do not check delayed items are empty for single transaction cleanup
  btrfs: fix bytes_may_use underflow in prealloc error condtition
  KVM: apic: avoid calculating pending eoi from an uninitialized val
  KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1
  KVM: nVMX: Check IO instruction VM-exit conditions
  KVM: nVMX: Refactor IO bitmap checks into helper function
  ext4: fix race between writepages and enabling EXT4_EXTENTS_FL
  ext4: rename s_journal_flag_rwsem to s_writepages_rwsem
  ext4: fix mount failure with quota configured as module
  ext4: add cond_resched() to __ext4_find_entry()
  ext4: fix a data race in EXT4_I(inode)->i_disksize
  KVM: nVMX: Don't emulate instructions in guest mode
  lib/stackdepot.c: fix global out-of-bounds in stack_slabs
  serial: 8250: Check UPF_IRQ_SHARED in advance
  vt: vt_ioctl: fix race in VT_RESIZEX
  VT_RESIZEX: get rid of field-by-field copyin
  xhci: apply XHCI_PME_STUCK_QUIRK to Intel Comet Lake platforms
  KVM: x86: don't notify userspace IOAPIC on edge-triggered interrupt EOI
  drm/amdgpu/soc15: fix xclk for raven
  mm/vmscan.c: don't round up scan size for online memory cgroup
  Revert "ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()"
  MAINTAINERS: Update drm/i915 bug filing URL
  serdev: ttyport: restore client ops on deregistration
  tty: serial: imx: setup the correct sg entry for tx dma
  tty/serial: atmel: manage shutdown in case of RS485 or ISO7816 mode
  x86/mce/amd: Fix kobject lifetime
  x86/mce/amd: Publish the bank pointer only after setup has succeeded
  staging: rtl8723bs: Fix potential overuse of kernel memory
  staging: rtl8723bs: Fix potential security hole
  staging: rtl8188eu: Fix potential overuse of kernel memory
  staging: rtl8188eu: Fix potential security hole
  USB: hub: Fix the broken detection of USB3 device in SMSC hub
  USB: hub: Don't record a connect-change event during reset-resume
  USB: Fix novation SourceControl XL after suspend
  usb: uas: fix a plug & unplug racing
  usb: host: xhci: update event ring dequeue pointer on purpose
  xhci: fix runtime pm enabling for quirky Intel hosts
  xhci: Force Maximum Packet size for Full-speed bulk devices to valid range.
  staging: vt6656: fix sign of rx_dbm to bb_pre_ed_rssi.
  staging: android: ashmem: Disallow ashmem memory from being remapped
  vt: selection, handle pending signals in paste_selection
  floppy: check FDC index for errors before assigning it
  USB: misc: iowarrior: add support for the 100 device
  USB: misc: iowarrior: add support for the 28 and 28L devices
  USB: misc: iowarrior: add support for 2 OEMed devices
  thunderbolt: Prevent crash if non-active NVMem file is read
  net/smc: fix leak of kernel memory to user space
  net/sched: flower: add missing validation of TCA_FLOWER_FLAGS
  net/sched: matchall: add missing validation of TCA_MATCHALL_FLAGS
  net: dsa: tag_qca: Make sure there is headroom for tag
  enic: prevent waking up stopped tx queues over watchdog reset
  selinux: ensure we cleanup the internal AVC counters on error in avc_update()
  mlxsw: spectrum_dpipe: Add missing error path
  virtio_balloon: prevent pfn array overflow
  help_next should increase position index
  brd: check and limit max_part par
  microblaze: Prevent the overflow of the start
  iwlwifi: mvm: Fix thermal zone registration
  irqchip/gic-v3-its: Reference to its_invall_cmd descriptor when building INVALL
  bcache: explicity type cast in bset_bkey_last()
  reiserfs: prevent NULL pointer dereference in reiserfs_insert_item()
  lib/scatterlist.c: adjust indentation in __sg_alloc_table
  ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans()
  radeon: insert 10ms sleep in dce5_crtc_load_lut
  trigger_next should increase position index
  ftrace: fpid_next() should increase position index
  drm/nouveau/disp/nv50-: prevent oops when no channel method map provided
  irqchip/gic-v3: Only provision redistributors that are enabled in ACPI
  ceph: check availability of mds cluster on mount after wait timeout
  cifs: fix NULL dereference in match_prepath
  iwlegacy: ensure loop counter addr does not wrap and cause an infinite loop
  hostap: Adjust indentation in prism2_hostapd_add_sta
  ARM: 8951/1: Fix Kexec compilation issue.
  jbd2: make sure ESHUTDOWN to be recorded in the journal superblock
  jbd2: switch to use jbd2_journal_abort() when failed to submit the commit record
  powerpc/sriov: Remove VF eeh_dev state when disabling SR-IOV
  ALSA: hda - Add docking station support for Lenovo Thinkpad T420s
  driver core: platform: fix u32 greater or equal to zero comparison
  s390/ftrace: generate traced function stack frame
  x86/decoder: Add TEST opcode to Group3-2
  ALSA: hda/hdmi - add retry logic to parse_intel_hdmi()
  irqchip/mbigen: Set driver .suppress_bind_attrs to avoid remove problems
  remoteproc: Initialize rproc_class before use
  btrfs: device stats, log when stats are zeroed
  btrfs: safely advance counter when looking up bio csums
  btrfs: fix possible NULL-pointer dereference in integrity checks
  pwm: Remove set but not set variable 'pwm'
  ide: serverworks: potential overflow in svwks_set_pio_mode()
  cmd64x: potential buffer overflow in cmd64x_program_timings()
  pwm: omap-dmtimer: Remove PWM chip in .remove before making it unfunctional
  x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd
  f2fs: fix memleak of kobject
  watchdog/softlockup: Enforce that timestamp is valid on boot
  arm64: fix alternatives with LLVM's integrated assembler
  scsi: iscsi: Don't destroy session if there are outstanding connections
  f2fs: free sysfs kobject
  iommu/arm-smmu-v3: Use WRITE_ONCE() when changing validity of an STE
  usb: musb: omap2430: Get rid of musb .set_vbus for omap2430 glue
  drm/vmwgfx: prevent memory leak in vmw_cmdbuf_res_add
  drm/nouveau: Fix copy-paste error in nouveau_fence_wait_uevent_handler
  drm/nouveau/gr/gk20a,gm200-: add terminators to method lists read from fw
  drm/nouveau/secboot/gm20b: initialize pointer in gm20b_secboot_new()
  vme: bridges: reduce stack usage
  driver core: Print device when resources present in really_probe()
  driver core: platform: Prevent resouce overflow from causing infinite loops
  tty: synclink_gt: Adjust indentation in several functions
  tty: synclinkmp: Adjust indentation in several functions
  ASoC: atmel: fix build error with CONFIG_SND_ATMEL_SOC_DMA=m
  wan: ixp4xx_hss: fix compile-testing on 64-bit
  Input: edt-ft5x06 - work around first register access error
  rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls
  efi/x86: Don't panic or BUG() on non-critical error conditions
  soc/tegra: fuse: Correct straps' address for older Tegra124 device trees
  IB/hfi1: Add software counter for ctxt0 seq drop
  udf: Fix free space reporting for metadata and virtual partitions
  usbip: Fix unsafe unaligned pointer usage
  drm: remove the newline for CRC source name.
  tools lib api fs: Fix gcc9 stringop-truncation compilation error
  ALSA: sh: Fix compile warning wrt const
  ALSA: sh: Fix unused variable warnings
  clk: sunxi-ng: add mux and pll notifiers for A64 CPU clock
  RDMA/rxe: Fix error type of mmap_offset
  pinctrl: sh-pfc: sh7269: Fix CAN function GPIOs
  PM / devfreq: rk3399_dmc: Add COMPILE_TEST and HAVE_ARM_SMCCC dependency
  x86/vdso: Provide missing include file
  dmaengine: Store module owner in dma_device struct
  ARM: dts: r8a7779: Add device node for ARM global timer
  drm/mediatek: handle events when enabling/disabling crtc
  scsi: aic7xxx: Adjust indentation in ahc_find_syncrate
  scsi: ufs: Complete pending requests in host reset and restore path
  ACPICA: Disassembler: create buffer fields in ACPI_PARSE_LOAD_PASS1
  orinoco: avoid assertion in case of NULL pointer
  rtlwifi: rtl_pci: Fix -Wcast-function-type
  iwlegacy: Fix -Wcast-function-type
  ipw2x00: Fix -Wcast-function-type
  b43legacy: Fix -Wcast-function-type
  ALSA: usx2y: Adjust indentation in snd_usX2Y_hwdep_dsp_status
  fore200e: Fix incorrect checks of NULL pointer dereference
  reiserfs: Fix spurious unlock in reiserfs_fill_super() error handling
  media: v4l2-device.h: Explicitly compare grp{id,mask} to zero in v4l2_device macros
  ARM: dts: imx6: rdu2: Disable WP for USDHC2 and USDHC3
  arm64: dts: qcom: msm8996: Disable USB2 PHY suspend by core
  NFC: port100: Convert cpu_to_le16(le16_to_cpu(E1) + E2) to use le16_add_cpu().
  PCI/IOV: Fix memory leak in pci_iov_add_virtfn()
  net/wan/fsl_ucc_hdlc: reject muram offsets above 64K
  regulator: rk808: Lower log level on optional GPIOs being not available
  drm/amdgpu: remove 4 set but not used variable in amdgpu_atombios_get_connector_info_from_object_table
  clk: qcom: rcg2: Don't crash if our parent can't be found; return an error
  kconfig: fix broken dependency in randconfig-generated .config
  KVM: s390: ENOTSUPP -> EOPNOTSUPP fixups
  nbd: add a flush_workqueue in nbd_start_device
  ext4, jbd2: ensure panic when aborting with zero errno
  tracing: Fix very unlikely race of registering two stat tracers
  tracing: Fix tracing_stat return values in error handling paths
  x86/sysfb: Fix check for bad VRAM size
  jbd2: clear JBD2_ABORT flag before journal_reset to update log tail info when load journal
  kselftest: Minimise dependency of get_size on C library interfaces
  clocksource/drivers/bcm2835_timer: Fix memory leak of timer
  usb: dwc2: Fix IN FIFO allocation
  usb: gadget: udc: fix possible sleep-in-atomic-context bugs in gr_probe()
  uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()
  sparc: Add .exit.data section.
  MIPS: Loongson: Fix potential NULL dereference in loongson3_platform_init()
  efi/x86: Map the entire EFI vendor string before copying it
  pinctrl: baytrail: Do not clear IRQ flags on direct-irq enabled pins
  media: sti: bdisp: fix a possible sleep-in-atomic-context bug in bdisp_device_run()
  char/random: silence a lockdep splat with printk()
  gpio: gpio-grgpio: fix possible sleep-in-atomic-context bugs in grgpio_irq_map/unmap()
  powerpc/powernv/iov: Ensure the pdn for VFs always contains a valid PE number
  media: i2c: mt9v032: fix enum mbus codes and frame sizes
  pxa168fb: Fix the function used to release some memory in an error handling path
  pinctrl: sh-pfc: sh7264: Fix CAN function GPIOs
  gianfar: Fix TX timestamping with a stacked DSA driver
  ALSA: ctl: allow TLV read operation for callback type of element in locked case
  ext4: fix ext4_dax_read/write inode locking sequence for IOCB_NOWAIT
  leds: pca963x: Fix open-drain initialization
  brcmfmac: Fix use after free in brcmf_sdio_readframes()
  cpu/hotplug, stop_machine: Fix stop_machine vs hotplug order
  drm/gma500: Fixup fbdev stolen size usage evaluation
  KVM: nVMX: Use correct root level for nested EPT shadow page tables
  Revert "KVM: VMX: Add non-canonical check on writes to RTIT address MSRs"
  Revert "KVM: nVMX: Use correct root level for nested EPT shadow page tables"
  scsi: qla2xxx: fix a potential NULL pointer dereference
  jbd2: do not clear the BH_Mapped flag when forgetting a metadata buffer
  jbd2: move the clearing of b_modified flag to the journal_unmap_buffer()
  hwmon: (pmbus/ltc2978) Fix PMBus polling of MFR_COMMON definitions.
  perf/x86/intel: Fix inaccurate period in context switch for auto-reload
  s390/time: Fix clk type in get_tod_clock
  RDMA/core: Fix protection fault in get_pkey_idx_qp_list
  IB/hfi1: Close window for pq and request coliding
  serial: imx: Only handle irqs that are actually enabled
  serial: imx: ensure that RX irqs are off if RX is off
  padata: Remove broken queue flushing
  perf/x86/amd: Add missing L2 misses event spec to AMD Family 17h's event map
  KVM: nVMX: Use correct root level for nested EPT shadow page tables
  arm64: ssbs: Fix context-switch when SSBS is present on all CPUs
  btrfs: log message when rw remount is attempted with unclean tree-log
  btrfs: print message when tree-log replay starts
  Btrfs: fix race between using extent maps and merging them
  ext4: improve explanation of a mount failure caused by a misconfigured kernel
  ext4: fix checksum errors with indexed dirs
  ext4: fix support for inode sizes > 1024 bytes
  ext4: don't assume that mmp_nodename/bdevname have NUL
  ARM: 8723/2: always assume the "unified" syntax for assembly code
  arm64: nofpsimd: Handle TIF_FOREIGN_FPSTATE flag cleanly
  arm64: ptrace: nofpsimd: Fail FP/SIMD regset operations
  arm64: cpufeature: Set the FP/SIMD compat HWCAP bits properly
  ALSA: usb-audio: Apply sample rate quirk for Audioengine D1
  Input: synaptics - remove the LEN0049 dmi id from topbuttonpad list
  Input: synaptics - enable SMBus on ThinkPad L470
  Input: synaptics - switch T470s to RMI4 by default
  ecryptfs: fix a memory leak bug in ecryptfs_init_messaging()
  ecryptfs: fix a memory leak bug in parse_tag_1_packet()
  ASoC: sun8i-codec: Fix setting DAI data format
  ALSA: hda: Use scnprintf() for printing texts for sysfs/procfs
  iommu/qcom: Fix bogus detach logic
  KVM: x86: emulate RDPID
  UPSTREAM: sched/psi: Fix OOB write when writing 0 bytes to PSI files
  UPSTREAM: psi: Fix a division error in psi poll()
  UPSTREAM: sched/psi: Fix sampling error and rare div0 crashes with cgroups and high uptime
  UPSTREAM: sched/psi: Correct overly pessimistic size calculation
  FROMLIST: f2fs: Handle casefolding with Encryption
  FROMLIST: fscrypt: Have filesystems handle their d_ops
  FROMLIST: ext4: Use generic casefolding support
  FROMLIST: f2fs: Use generic casefolding support
  FROMLIST: Add standard casefolding support
  FROMLIST: unicode: Add utf8_casefold_hash
  ANDROID: cuttlefish_defconfig: Add CONFIG_UNICODE
  ANDROID: sdcardfs: fix -ENOENT lookup race issue
  ANDROID: gki_defconfig: Enable CONFIG_RD_LZ4
  ANDROID: dm: Add wrapped key support in dm-default-key
  ANDROID: dm: add support for passing through derive_raw_secret
  ANDROID: block: Prevent crypto fallback for wrapped keys
  ANDROID: Disable wq fp check in CFI builds
  ANDROID: increase limit on sched-tune boost groups
  ANDROID: ufs, block: fix crypto power management and move into block layer
  ANDROID: Incremental fs: Support xattrs
  ANDROID: test_stackinit: work around LLVM PR44916
  ANDROID: clang: update to 10.0.4
  fs-verity: use u64_to_user_ptr()
  fs-verity: use mempool for hash requests
  fs-verity: implement readahead of Merkle tree pages
  ext4: readpages() should submit IO as read-ahead
  fs-verity: implement readahead for FS_IOC_ENABLE_VERITY
  fscrypt: improve format of no-key names
  ubifs: allow both hash and disk name to be provided in no-key names
  ubifs: don't trigger assertion on invalid no-key filename
  fscrypt: clarify what is meant by a per-file key
  fscrypt: derive dirhash key for casefolded directories
  fscrypt: don't allow v1 policies with casefolding
  fscrypt: add "fscrypt_" prefix to fname_encrypt()
  fscrypt: don't print name of busy file when removing key
  fscrypt: document gfp_flags for bounce page allocation
  fscrypt: optimize fscrypt_zeroout_range()
  fscrypt: remove redundant bi_status check
  fscrypt: Allow modular crypto algorithms
  fscrypt: include <linux/ioctl.h> in UAPI header
  fscrypt: don't check for ENOKEY from fscrypt_get_encryption_info()
  fscrypt: remove fscrypt_is_direct_key_policy()
  fscrypt: move fscrypt_valid_enc_modes() to policy.c
  fscrypt: check for appropriate use of DIRECT_KEY flag earlier
  fscrypt: split up fscrypt_supported_policy() by policy version
  fscrypt: introduce fscrypt_needs_contents_encryption()
  fscrypt: move fscrypt_d_revalidate() to fname.c
  fscrypt: constify inode parameter to filename encryption functions
  fscrypt: constify struct fscrypt_hkdf parameter to fscrypt_hkdf_expand()
  fscrypt: verify that the crypto_skcipher has the correct ivsize
  fscrypt: use crypto_skcipher_driver_name()
  fscrypt: support passing a keyring key to FS_IOC_ADD_ENCRYPTION_KEY
  keys: Export lookup_user_key to external users
  f2fs: fix build error on PAGE_KERNEL_RO

 Conflicts:
	arch/arm64/kernel/smp.c
	arch/arm64/kernel/traps.c
	block/blk-crypto-fallback.c
	block/keyslot-manager.c
	drivers/base/power/wakeup.c
	drivers/clk/clk.c
	drivers/clk/qcom/clk-rcg2.c
	drivers/gpu/Makefile
	drivers/gpu/drm/msm/msm_drv.c
	drivers/gpu/drm/msm/msm_gem.c
	drivers/hwtracing/coresight/coresight-funnel.c
	drivers/irqchip/irq-gic-v3.c
	drivers/md/dm.c
	drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
	drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
	drivers/net/macsec.c
	drivers/net/phy/micrel.c
	drivers/net/wireless/ath/wil6210/cfg80211.c
	drivers/net/wireless/ath/wil6210/fw_inc.c
	drivers/net/wireless/ath/wil6210/pcie_bus.c
	drivers/net/wireless/ath/wil6210/pm.c
	drivers/net/wireless/ath/wil6210/wil6210.h
	drivers/of/base.c
	drivers/power/supply/power_supply_sysfs.c
	drivers/rpmsg/qcom_glink_smem.c
	drivers/scsi/sd.c
	drivers/scsi/ufs/ufshcd-crypto.c
	drivers/scsi/ufs/ufshcd.c
	drivers/scsi/ufs/ufshcd.h
	drivers/scsi/ufs/ufshci.h
	drivers/usb/dwc3/core.c
	drivers/usb/dwc3/gadget.c
	drivers/usb/gadget/Kconfig
	drivers/usb/gadget/composite.c
	drivers/usb/gadget/function/f_fs.c
	drivers/usb/gadget/legacy/Makefile
	drivers/usb/host/xhci-mem.c
	fs/ext4/readpage.c
	fs/sdcardfs/lookup.c
	include/linux/key.h
	include/linux/keyslot-manager.h
	include/linux/power_supply.h
	include/uapi/linux/coresight-stm.h
	net/qrtr/qrtr.c

Change-Id: Iaa9fcbe987e721f02596e167249a519781ed3888
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
2020-08-05 11:41:56 +05:30

3694 lines
96 KiB
C

/*
* linux/fs/buffer.c
*
* Copyright (C) 1991, 1992, 2002 Linus Torvalds
*/
/*
* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
*
* Removed a lot of unnecessary code and simplified things now that
* the buffer cache isn't our primary cache - Andrew Tridgell 12/96
*
* Speed up hash, lru, and free list operations. Use gfp() for allocating
* hash table, use SLAB cache for buffer heads. SMP threading. -DaveM
*
* Added 32k buffer block sizes - these are required older ARM systems. - RMK
*
* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
*/
#include <linux/kernel.h>
#include <linux/sched/signal.h>
#include <linux/syscalls.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/blkdev.h>
#include <linux/file.h>
#include <linux/quotaops.h>
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/hash.h>
#include <linux/suspend.h>
#include <linux/buffer_head.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/bio.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
#include <linux/pagevec.h>
#include <trace/events/block.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
{
bh->b_end_io = handler;
bh->b_private = private;
}
EXPORT_SYMBOL(init_buffer);
inline void touch_buffer(struct buffer_head *bh)
{
trace_block_touch_buffer(bh);
mark_page_accessed(bh->b_page);
}
EXPORT_SYMBOL(touch_buffer);
void __lock_buffer(struct buffer_head *bh)
{
wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_buffer);
void unlock_buffer(struct buffer_head *bh)
{
clear_bit_unlock(BH_Lock, &bh->b_state);
smp_mb__after_atomic();
wake_up_bit(&bh->b_state, BH_Lock);
}
EXPORT_SYMBOL(unlock_buffer);
/*
* Returns if the page has dirty or writeback buffers. If all the buffers
* are unlocked and clean then the PageDirty information is stale. If
* any of the pages are locked, it is assumed they are locked for IO.
*/
void buffer_check_dirty_writeback(struct page *page,
bool *dirty, bool *writeback)
{
struct buffer_head *head, *bh;
*dirty = false;
*writeback = false;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
return;
if (PageWriteback(page))
*writeback = true;
head = page_buffers(page);
bh = head;
do {
if (buffer_locked(bh))
*writeback = true;
if (buffer_dirty(bh))
*dirty = true;
bh = bh->b_this_page;
} while (bh != head);
}
EXPORT_SYMBOL(buffer_check_dirty_writeback);
/*
* Block until a buffer comes unlocked. This doesn't stop it
* from becoming locked again - you have to lock it yourself
* if you want to preserve its state.
*/
void __wait_on_buffer(struct buffer_head * bh)
{
wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__wait_on_buffer);
static void
__clear_page_buffers(struct page *page)
{
ClearPagePrivate(page);
set_page_private(page, 0);
put_page(page);
}
static void buffer_io_error(struct buffer_head *bh, char *msg)
{
if (!test_bit(BH_Quiet, &bh->b_state))
printk_ratelimited(KERN_ERR
"Buffer I/O error on dev %pg, logical block %llu%s\n",
bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
}
/*
* End-of-IO handler helper function which does not touch the bh after
* unlocking it.
* Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
* a race there is benign: unlock_buffer() only use the bh's address for
* hashing after unlocking the buffer, so it doesn't actually touch the bh
* itself.
*/
static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
{
if (uptodate) {
set_buffer_uptodate(bh);
} else {
/* This happens, due to failed read-ahead attempts. */
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
}
/*
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
* unlock the buffer. This is what ll_rw_block uses too.
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
__end_buffer_read_notouch(bh, uptodate);
put_bh(bh);
}
EXPORT_SYMBOL(end_buffer_read_sync);
void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
if (uptodate) {
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh, ", lost sync page write");
mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
put_bh(bh);
}
EXPORT_SYMBOL(end_buffer_write_sync);
/*
* Various filesystems appear to want __find_get_block to be non-blocking.
* But it's the page lock which protects the buffers. To get around this,
* we get exclusion from try_to_free_buffers with the blockdev mapping's
* private_lock.
*
* Hack idea: for the blockdev mapping, i_bufferlist_lock contention
* may be quite high. This code could TryLock the page, and if that
* succeeds, there is no need to take private_lock. (But if
* private_lock is contended then so is mapping->tree_lock).
*/
static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
{
struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
struct buffer_head *ret = NULL;
pgoff_t index;
struct buffer_head *bh;
struct buffer_head *head;
struct page *page;
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
if (!page)
goto out;
spin_lock(&bd_mapping->private_lock);
if (!page_has_buffers(page))
goto out_unlock;
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh))
all_mapped = 0;
else if (bh->b_blocknr == block) {
ret = bh;
get_bh(bh);
goto out_unlock;
}
bh = bh->b_this_page;
} while (bh != head);
/* we might be here because some of the buffers on this page are
* not mapped. This is due to various races between
* file io on the block device and getblk. It gets dealt with
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
if (all_mapped && __ratelimit(&last_warned)) {
printk("__find_get_block_slow() failed. block=%llu, "
"b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
"device %pg blocksize: %d\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr,
bh->b_state, bh->b_size, bdev,
1 << bd_inode->i_blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
put_page(page);
out:
return ret;
}
/*
* Kick the writeback threads then try to free up some ZONE_NORMAL memory.
*/
static void free_more_memory(void)
{
struct zoneref *z;
int nid;
wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
yield();
for_each_online_node(nid) {
z = first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
gfp_zone(GFP_NOFS), NULL);
if (z->zone)
try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
GFP_NOFS, NULL);
}
}
/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
struct page *page;
int page_uptodate = 1;
BUG_ON(!buffer_async_read(bh));
page = bh->b_page;
if (uptodate) {
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
buffer_io_error(bh, ", async page read");
SetPageError(page);
}
/*
* Be _very_ careful from here on. Bad things can happen if
* two buffer heads end IO at almost the same time and both
* decide that the page is now completely done.
*/
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
do {
if (!buffer_uptodate(tmp))
page_uptodate = 0;
if (buffer_async_read(tmp)) {
BUG_ON(!buffer_locked(tmp));
goto still_busy;
}
tmp = tmp->b_this_page;
} while (tmp != bh);
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
/*
* If none of the buffers had errors and they are all
* uptodate then we can set the page uptodate.
*/
if (page_uptodate && !PageError(page))
SetPageUptodate(page);
unlock_page(page);
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
return;
}
/*
* Completion handler for block_write_full_page() - pages which are unlocked
* during I/O, and which have PageWriteback cleared upon I/O completion.
*/
void end_buffer_async_write(struct buffer_head *bh, int uptodate)
{
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
struct page *page;
BUG_ON(!buffer_async_write(bh));
page = bh->b_page;
if (uptodate) {
set_buffer_uptodate(bh);
} else {
buffer_io_error(bh, ", lost async page write");
mark_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
SetPageError(page);
}
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
clear_buffer_async_write(bh);
unlock_buffer(bh);
tmp = bh->b_this_page;
while (tmp != bh) {
if (buffer_async_write(tmp)) {
BUG_ON(!buffer_locked(tmp));
goto still_busy;
}
tmp = tmp->b_this_page;
}
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
end_page_writeback(page);
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
return;
}
EXPORT_SYMBOL(end_buffer_async_write);
/*
* If a page's buffers are under async readin (end_buffer_async_read
* completion) then there is a possibility that another thread of
* control could lock one of the buffers after it has completed
* but while some of the other buffers have not completed. This
* locked buffer would confuse end_buffer_async_read() into not unlocking
* the page. So the absence of BH_Async_Read tells end_buffer_async_read()
* that this buffer is not under async I/O.
*
* The page comes unlocked when it has no locked buffer_async buffers
* left.
*
* PageLocked prevents anyone starting new async I/O reads any of
* the buffers.
*
* PageWriteback is used to prevent simultaneous writeout of the same
* page.
*
* PageLocked prevents anyone from starting writeback of a page which is
* under read I/O (PageWriteback is only ever set against a locked page).
*/
static void mark_buffer_async_read(struct buffer_head *bh)
{
bh->b_end_io = end_buffer_async_read;
set_buffer_async_read(bh);
}
static void mark_buffer_async_write_endio(struct buffer_head *bh,
bh_end_io_t *handler)
{
bh->b_end_io = handler;
set_buffer_async_write(bh);
}
void mark_buffer_async_write(struct buffer_head *bh)
{
mark_buffer_async_write_endio(bh, end_buffer_async_write);
}
EXPORT_SYMBOL(mark_buffer_async_write);
/*
* fs/buffer.c contains helper functions for buffer-backed address space's
* fsync functions. A common requirement for buffer-based filesystems is
* that certain data from the backing blockdev needs to be written out for
* a successful fsync(). For example, ext2 indirect blocks need to be
* written back and waited upon before fsync() returns.
*
* The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
* inode_has_buffers() and invalidate_inode_buffers() are provided for the
* management of a list of dependent buffers at ->i_mapping->private_list.
*
* Locking is a little subtle: try_to_free_buffers() will remove buffers
* from their controlling inode's queue when they are being freed. But
* try_to_free_buffers() will be operating against the *blockdev* mapping
* at the time, not against the S_ISREG file which depends on those buffers.
* So the locking for private_list is via the private_lock in the address_space
* which backs the buffers. Which is different from the address_space
* against which the buffers are listed. So for a particular address_space,
* mapping->private_lock does *not* protect mapping->private_list! In fact,
* mapping->private_list will always be protected by the backing blockdev's
* ->private_lock.
*
* Which introduces a requirement: all buffers on an address_space's
* ->private_list must be from the same address_space: the blockdev's.
*
* address_spaces which do not place buffers at ->private_list via these
* utility functions are free to use private_lock and private_list for
* whatever they want. The only requirement is that list_empty(private_list)
* be true at clear_inode() time.
*
* FIXME: clear_inode should not call invalidate_inode_buffers(). The
* filesystems should do that. invalidate_inode_buffers() should just go
* BUG_ON(!list_empty).
*
* FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should
* take an address_space, not an inode. And it should be called
* mark_buffer_dirty_fsync() to clearly define why those buffers are being
* queued up.
*
* FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
* list if it is already on a list. Because if the buffer is on a list,
* it *must* already be on the right one. If not, the filesystem is being
* silly. This will save a ton of locking. But first we have to ensure
* that buffers are taken *off* the old inode's list when they are freed
* (presumably in truncate). That requires careful auditing of all
* filesystems (do it inside bforget()). It could also be done by bringing
* b_inode back.
*/
/*
* The buffer's backing address_space's private_lock must be held
*/
static void __remove_assoc_queue(struct buffer_head *bh)
{
list_del_init(&bh->b_assoc_buffers);
WARN_ON(!bh->b_assoc_map);
bh->b_assoc_map = NULL;
}
int inode_has_buffers(struct inode *inode)
{
return !list_empty(&inode->i_data.private_list);
}
/*
* osync is designed to support O_SYNC io. It waits synchronously for
* all already-submitted IO to complete, but does not queue any new
* writes to the disk.
*
* To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
* you dirty the buffers, and then use osync_inode_buffers to wait for
* completion. Any other dirty buffers which are not yet queued for
* write will not be flushed to disk by the osync.
*/
static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
struct list_head *p;
int err = 0;
spin_lock(lock);
repeat:
list_for_each_prev(p, list) {
bh = BH_ENTRY(p);
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(lock);
goto repeat;
}
}
spin_unlock(lock);
return err;
}
static void do_thaw_one(struct super_block *sb, void *unused)
{
while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
}
static void do_thaw_all(struct work_struct *work)
{
iterate_supers(do_thaw_one, NULL);
kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
/**
* emergency_thaw_all -- forcibly thaw every frozen filesystem
*
* Used for emergency unfreeze of all filesystems via SysRq
*/
void emergency_thaw_all(void)
{
struct work_struct *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
INIT_WORK(work, do_thaw_all);
schedule_work(work);
}
}
/**
* sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
* @mapping: the mapping which wants those buffers written
*
* Starts I/O against the buffers at mapping->private_list, and waits upon
* that I/O.
*
* Basically, this is a convenience function for fsync().
* @mapping is a file or directory which needs those buffers to be written for
* a successful fsync().
*/
int sync_mapping_buffers(struct address_space *mapping)
{
struct address_space *buffer_mapping = mapping->private_data;
if (buffer_mapping == NULL || list_empty(&mapping->private_list))
return 0;
return fsync_buffers_list(&buffer_mapping->private_lock,
&mapping->private_list);
}
EXPORT_SYMBOL(sync_mapping_buffers);
/*
* Called when we've recently written block `bblock', and it is known that
* `bblock' was for a buffer_boundary() buffer. This means that the block at
* `bblock + 1' is probably a dirty indirect block. Hunt it down and, if it's
* dirty, schedule it for IO. So that indirects merge nicely with their data.
*/
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize)
{
struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
put_bh(bh);
}
}
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
{
struct address_space *mapping = inode->i_mapping;
struct address_space *buffer_mapping = bh->b_page->mapping;
mark_buffer_dirty(bh);
if (!mapping->private_data) {
mapping->private_data = buffer_mapping;
} else {
BUG_ON(mapping->private_data != buffer_mapping);
}
if (!bh->b_assoc_map) {
spin_lock(&buffer_mapping->private_lock);
list_move_tail(&bh->b_assoc_buffers,
&mapping->private_list);
bh->b_assoc_map = mapping;
spin_unlock(&buffer_mapping->private_lock);
}
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
/*
* Mark the page dirty, and set it dirty in the radix tree, and mark the inode
* dirty.
*
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*
* The caller must hold lock_page_memcg().
*/
static void __set_page_dirty(struct page *page, struct address_space *mapping,
int warn)
{
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
/*
* Add a page to the dirty page list.
*
* It is a sad fact of life that this function is called from several places
* deeply under spinlocking. It may not sleep.
*
* If the page has buffers, the uptodate buffers are set dirty, to preserve
* dirty-state coherency between the page and the buffers. It the page does
* not have buffers then when they are later attached they will all be set
* dirty.
*
* The buffers are dirtied before the page is dirtied. There's a small race
* window in which a writepage caller may see the page cleanness but not the
* buffer dirtiness. That's fine. If this code were to set the page dirty
* before the buffers, a concurrent writepage caller could clear the page dirty
* bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
* page on the dirty page list.
*
* We use private_lock to lock against try_to_free_buffers while using the
* page's buffer list. Also use this to protect against clean buffers being
* added to the page after it was set dirty.
*
* FIXME: may need to call ->reservepage here as well. That's rather up to the
* address_space though.
*/
int __set_page_dirty_buffers(struct page *page)
{
int newly_dirty;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
return !TestSetPageDirty(page);
spin_lock(&mapping->private_lock);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
do {
set_buffer_dirty(bh);
bh = bh->b_this_page;
} while (bh != head);
}
/*
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
if (newly_dirty)
__set_page_dirty(page, mapping, 1);
unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
/*
* Write out and wait upon a list of buffers.
*
* We have conflicting pressures: we want to make sure that all
* initially dirty buffers get waited on, but that any subsequently
* dirtied buffers don't. After all, we don't want fsync to last
* forever if somebody is actively writing to the file.
*
* Do this in two main stages: first we copy dirty buffers to a
* temporary inode list, queueing the writes as we go. Then we clean
* up, waiting for those writes to complete.
*
* During this second stage, any subsequent updates to the file may end
* up refiling the buffer on the original inode's dirty list again, so
* there is a chance we will end up with a buffer queued for write but
* not yet completed on that list. So, as a final cleanup we go through
* the osync code to catch these locked, dirty buffers without requeuing
* any newly dirty buffers for write.
*/
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
struct list_head tmp;
struct address_space *mapping;
int err = 0, err2;
struct blk_plug plug;
INIT_LIST_HEAD(&tmp);
blk_start_plug(&plug);
spin_lock(lock);
while (!list_empty(list)) {
bh = BH_ENTRY(list->next);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh) || buffer_locked(bh)) {
list_add(&bh->b_assoc_buffers, &tmp);
bh->b_assoc_map = mapping;
if (buffer_dirty(bh)) {
get_bh(bh);
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
* write_dirty_buffer() actually writes the
* current contents - it is a noop if I/O is
* still in flight on potentially older
* contents.
*/
write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
* that we will not run the very last mapping,
* wait_on_buffer() will do that for us
* through sync_buffer().
*/
brelse(bh);
spin_lock(lock);
}
}
}
spin_unlock(lock);
blk_finish_plug(&plug);
spin_lock(lock);
while (!list_empty(&tmp)) {
bh = BH_ENTRY(tmp.prev);
get_bh(bh);
mapping = bh->b_assoc_map;
__remove_assoc_queue(bh);
/* Avoid race with mark_buffer_dirty_inode() which does
* a lockless check and we rely on seeing the dirty bit */
smp_mb();
if (buffer_dirty(bh)) {
list_add(&bh->b_assoc_buffers,
&mapping->private_list);
bh->b_assoc_map = mapping;
}
spin_unlock(lock);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
err = -EIO;
brelse(bh);
spin_lock(lock);
}
spin_unlock(lock);
err2 = osync_buffers_list(lock, list);
if (err)
return err;
else
return err2;
}
/*
* Invalidate any and all dirty buffers on a given inode. We are
* probably unmounting the fs, but that doesn't mean we have already
* done a sync(). Just drop the buffers from the inode list.
*
* NOTE: we take the inode's blockdev's mapping's private_lock. Which
* assumes that all the buffers are against the blockdev. Not true
* for reiserfs.
*/
void invalidate_inode_buffers(struct inode *inode)
{
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list;
struct address_space *buffer_mapping = mapping->private_data;
spin_lock(&buffer_mapping->private_lock);
while (!list_empty(list))
__remove_assoc_queue(BH_ENTRY(list->next));
spin_unlock(&buffer_mapping->private_lock);
}
}
EXPORT_SYMBOL(invalidate_inode_buffers);
/*
* Remove any clean buffers from the inode's buffer list. This is called
* when we're trying to free the inode itself. Those buffers can pin it.
*
* Returns true if all buffers were removed.
*/
int remove_inode_buffers(struct inode *inode)
{
int ret = 1;
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list;
struct address_space *buffer_mapping = mapping->private_data;
spin_lock(&buffer_mapping->private_lock);
while (!list_empty(list)) {
struct buffer_head *bh = BH_ENTRY(list->next);
if (buffer_dirty(bh)) {
ret = 0;
break;
}
__remove_assoc_queue(bh);
}
spin_unlock(&buffer_mapping->private_lock);
}
return ret;
}
/*
* Create the appropriate buffers when given a page for data area and
* the size of each buffer.. Use the bh->b_this_page linked list to
* follow the buffers created. Return NULL if unable to create more
* buffers.
*
* The retry flag is used to differentiate async IO (paging, swapping)
* which may not fail from ordinary buffer allocations.
*/
struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
int retry)
{
struct buffer_head *bh, *head;
long offset;
try_again:
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
bh = alloc_buffer_head(GFP_NOFS);
if (!bh)
goto no_grow;
bh->b_this_page = head;
bh->b_blocknr = -1;
head = bh;
bh->b_size = size;
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
return head;
/*
* In case anything failed, we just free everything we got.
*/
no_grow:
if (head) {
do {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
} while (head);
}
/*
* Return failure for non-async IO requests. Async IO requests
* are not allowed to fail, so we have to wait until buffer heads
* become available. But we don't want tasks sleeping with
* partially complete buffers, so all were released above.
*/
if (!retry)
return NULL;
/* We're _really_ low on memory. Now we just
* wait for old buffer heads to become free due to
* finishing IO. Since this is an async request and
* the reserve list is empty, we're sure there are
* async buffer heads in use.
*/
free_more_memory();
goto try_again;
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
static inline void
link_dev_buffers(struct page *page, struct buffer_head *head)
{
struct buffer_head *bh, *tail;
bh = head;
do {
tail = bh;
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
attach_page_buffers(page, head);
}
static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
{
sector_t retval = ~((sector_t)0);
loff_t sz = i_size_read(bdev->bd_inode);
if (sz) {
unsigned int sizebits = blksize_bits(size);
retval = (sz >> sizebits);
}
return retval;
}
/*
* Initialise the state of a blockdev page's buffers.
*/
static sector_t
init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size)
{
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
int uptodate = PageUptodate(page);
sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
do {
if (!buffer_mapped(bh)) {
init_buffer(bh, NULL, NULL);
bh->b_bdev = bdev;
bh->b_blocknr = block;
if (uptodate)
set_buffer_uptodate(bh);
if (block < end_block)
set_buffer_mapped(bh);
}
block++;
bh = bh->b_this_page;
} while (bh != head);
/*
* Caller needs to validate requested block against end of device.
*/
return end_block;
}
/*
* Create the page-cache page that contains the requested block.
*
* This is used purely for blockdev mappings.
*/
static int
grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size, int sizebits, gfp_t gfp)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct buffer_head *bh;
sector_t end_block;
int ret = 0; /* Will call free_more_memory() */
gfp_t gfp_mask;
gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
/*
* XXX: __getblk_slow() can not really deal with failure and
* will endlessly loop on improvised global reclaim. Prefer
* looping in the allocator rather than here, at least that
* code knows what it's doing.
*/
gfp_mask |= __GFP_NOFAIL;
page = find_or_create_page(inode->i_mapping, index, gfp_mask);
if (!page)
return ret;
BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
(sector_t)index << sizebits,
size);
goto done;
}
if (!try_to_free_buffers(page))
goto failed;
}
/*
* Allocate some buffers for this page
*/
bh = alloc_page_buffers(page, size, 0);
if (!bh)
goto failed;
/*
* Link the page to the buffers and initialise them. Take the
* lock to be atomic wrt __find_get_block(), which does not
* run under the page lock.
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
size);
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
put_page(page);
return ret;
}
/*
* Create buffers for the specified block device block's page. If
* that page was dirty, the buffers are set dirty also.
*/
static int
grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
{
pgoff_t index;
int sizebits;
sizebits = -1;
do {
sizebits++;
} while ((size << sizebits) < PAGE_SIZE);
index = block >> sizebits;
/*
* Check for a block which wants to lie outside our maximum possible
* pagecache index. (this comparison is done using sector_t types).
*/
if (unlikely(index != block >> sizebits)) {
printk(KERN_ERR "%s: requested out-of-range block %llu for "
"device %pg\n",
__func__, (unsigned long long)block,
bdev);
return -EIO;
}
/* Create a page with the proper size buffers.. */
return grow_dev_page(bdev, block, index, size, sizebits, gfp);
}
static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
printk(KERN_ERR "getblk(): invalid block size %d requested\n",
size);
printk(KERN_ERR "logical block size: %d\n",
bdev_logical_block_size(bdev));
dump_stack();
return NULL;
}
for (;;) {
struct buffer_head *bh;
int ret;
bh = __find_get_block(bdev, block, size);
if (bh)
return bh;
ret = grow_buffers(bdev, block, size, gfp);
if (ret < 0)
return NULL;
if (ret == 0)
free_more_memory();
}
}
/*
* The relationship between dirty buffers and dirty pages:
*
* Whenever a page has any dirty buffers, the page's dirty bit is set, and
* the page is tagged dirty in its radix tree.
*
* At all times, the dirtiness of the buffers represents the dirtiness of
* subsections of the page. If the page has buffers, the page dirty bit is
* merely a hint about the true dirty state.
*
* When a page is set dirty in its entirety, all its buffers are marked dirty
* (if the page has buffers).
*
* When a buffer is marked dirty, its page is dirtied, but the page's other
* buffers are not.
*
* Also. When blockdev buffers are explicitly read with bread(), they
* individually become uptodate. But their backing page remains not
* uptodate - even if all of its buffers are uptodate. A subsequent
* block_read_full_page() against that page will discover all the uptodate
* buffers, will set the page uptodate and will perform no I/O.
*/
/**
* mark_buffer_dirty - mark a buffer_head as needing writeout
* @bh: the buffer_head to mark dirty
*
* mark_buffer_dirty() will set the dirty bit against the buffer, then set its
* backing page dirty, then tag the page as dirty in its address_space's radix
* tree and then attach the address_space's inode to its superblock's dirty
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
* mapping->tree_lock and mapping->host->i_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
{
WARN_ON_ONCE(!buffer_uptodate(bh));
trace_block_dirty_buffer(bh);
/*
* Very *carefully* optimize the it-is-already-dirty case.
*
* Don't let the final "is it dirty" escape to before we
* perhaps modified the buffer.
*/
if (buffer_dirty(bh)) {
smp_mb();
if (buffer_dirty(bh))
return;
}
if (!test_set_buffer_dirty(bh)) {
struct page *page = bh->b_page;
struct address_space *mapping = NULL;
lock_page_memcg(page);
if (!TestSetPageDirty(page)) {
mapping = page_mapping(page);
if (mapping)
__set_page_dirty(page, mapping, 0);
}
unlock_page_memcg(page);
if (mapping)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
}
EXPORT_SYMBOL(mark_buffer_dirty);
void mark_buffer_write_io_error(struct buffer_head *bh)
{
set_buffer_write_io_error(bh);
/* FIXME: do we need to set this in both places? */
if (bh->b_page && bh->b_page->mapping)
mapping_set_error(bh->b_page->mapping, -EIO);
if (bh->b_assoc_map)
mapping_set_error(bh->b_assoc_map, -EIO);
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
/*
* Decrement a buffer_head's reference count. If all buffers against a page
* have zero reference count, are clean and unlocked, and if the page is clean
* and unlocked then try_to_free_buffers() may strip the buffers from the page
* in preparation for freeing it (sometimes, rarely, buffers are removed from
* a page but it ends up not being freed, and buffers may later be reattached).
*/
void __brelse(struct buffer_head * buf)
{
if (atomic_read(&buf->b_count)) {
put_bh(buf);
return;
}
WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
}
EXPORT_SYMBOL(__brelse);
/*
* bforget() is like brelse(), except it discards any
* potentially dirty data.
*/
void __bforget(struct buffer_head *bh)
{
clear_buffer_dirty(bh);
if (bh->b_assoc_map) {
struct address_space *buffer_mapping = bh->b_page->mapping;
spin_lock(&buffer_mapping->private_lock);
list_del_init(&bh->b_assoc_buffers);
bh->b_assoc_map = NULL;
spin_unlock(&buffer_mapping->private_lock);
}
__brelse(bh);
}
EXPORT_SYMBOL(__bforget);
static struct buffer_head *__bread_slow(struct buffer_head *bh)
{
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return bh;
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, 0, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
}
brelse(bh);
return NULL;
}
/*
* Per-cpu buffer LRU implementation. To reduce the cost of __find_get_block().
* The bhs[] array is sorted - newest buffer is at bhs[0]. Buffers have their
* refcount elevated by one when they're in an LRU. A buffer can only appear
* once in a particular CPU's LRU. A single buffer can be present in multiple
* CPU's LRUs at the same time.
*
* This is a transparent caching front-end to sb_bread(), sb_getblk() and
* sb_find_get_block().
*
* The LRUs themselves only need locking against invalidate_bh_lrus. We use
* a local interrupt disable for that.
*/
#define BH_LRU_SIZE 16
struct bh_lru {
struct buffer_head *bhs[BH_LRU_SIZE];
};
static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
#ifdef CONFIG_SMP
#define bh_lru_lock() local_irq_disable()
#define bh_lru_unlock() local_irq_enable()
#else
#define bh_lru_lock() preempt_disable()
#define bh_lru_unlock() preempt_enable()
#endif
static inline void check_irqs_on(void)
{
#ifdef irqs_disabled
BUG_ON(irqs_disabled());
#endif
}
/*
* Install a buffer_head into this cpu's LRU. If not already in the LRU, it is
* inserted at the front, and the buffer_head at the back if any is evicted.
* Or, if already in the LRU it is moved to the front.
*/
static void bh_lru_install(struct buffer_head *bh)
{
struct buffer_head *evictee = bh;
struct bh_lru *b;
int i;
check_irqs_on();
bh_lru_lock();
b = this_cpu_ptr(&bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
swap(evictee, b->bhs[i]);
if (evictee == bh) {
bh_lru_unlock();
return;
}
}
get_bh(bh);
bh_lru_unlock();
brelse(evictee);
}
/*
* Look up the bh in this cpu's LRU. If it's there, move it to the head.
*/
static struct buffer_head *
lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *ret = NULL;
unsigned int i;
check_irqs_on();
bh_lru_lock();
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
bh->b_size == size) {
if (i) {
while (i) {
__this_cpu_write(bh_lrus.bhs[i],
__this_cpu_read(bh_lrus.bhs[i - 1]));
i--;
}
__this_cpu_write(bh_lrus.bhs[0], bh);
}
get_bh(bh);
ret = bh;
break;
}
}
bh_lru_unlock();
return ret;
}
/*
* Perform a pagecache lookup for the matching buffer. If it's there, refresh
* it in the LRU and mark it as accessed. If it is not present then return
* NULL
*/
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
/* __find_get_block_slow will mark the page accessed */
bh = __find_get_block_slow(bdev, block);
if (bh)
bh_lru_install(bh);
} else
touch_buffer(bh);
return bh;
}
EXPORT_SYMBOL(__find_get_block);
/*
* __getblk_gfp() will locate (and, if necessary, create) the buffer_head
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
* __getblk_gfp() will lock up the machine if grow_dev_page's
* try_to_free_buffers() attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
__getblk_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
might_sleep();
if (bh == NULL)
bh = __getblk_slow(bdev, block, size, gfp);
return bh;
}
EXPORT_SYMBOL(__getblk_gfp);
/*
* Do async read-ahead on a buffer..
*/
void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead);
void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
gfp_t gfp)
{
struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
if (likely(bh)) {
ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead_gfp);
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
* @block: number of block
* @size: size (in bytes) to read
* @gfp: page allocation flag
*
* Reads a specified block, and returns buffer head that contains it.
* The page cache can be allocated from non-movable area
* not to prevent page migration if you set gfp to zero.
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
__bread_gfp(struct block_device *bdev, sector_t block,
unsigned size, gfp_t gfp)
{
struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
if (likely(bh) && !buffer_uptodate(bh))
bh = __bread_slow(bh);
return bh;
}
EXPORT_SYMBOL(__bread_gfp);
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
* or with preempt disabled.
*/
static void invalidate_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
put_cpu_var(bh_lrus);
}
static bool has_bh_in_lru(int cpu, void *dummy)
{
struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
if (b->bhs[i])
return 1;
}
return 0;
}
static void __evict_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
struct buffer_head *bh = arg;
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
if (b->bhs[i] == bh) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
goto out;
}
}
out:
put_cpu_var(bh_lrus);
}
static bool bh_exists_in_lru(int cpu, void *arg)
{
struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
struct buffer_head *bh = arg;
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
if (b->bhs[i] == bh)
return 1;
}
return 0;
}
void invalidate_bh_lrus(void)
{
on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
static void evict_bh_lrus(struct buffer_head *bh)
{
on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC);
}
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
bh->b_page = page;
BUG_ON(offset >= PAGE_SIZE);
if (PageHighMem(page))
/*
* This catches illegal uses and preserves the offset:
*/
bh->b_data = (char *)(0 + offset);
else
bh->b_data = page_address(page) + offset;
}
EXPORT_SYMBOL(set_bh_page);
/*
* Called when truncating a buffer on a page completely.
*/
/* Bits that are cleared during an invalidate */
#define BUFFER_FLAGS_DISCARD \
(1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
1 << BH_Delay | 1 << BH_Unwritten)
static void discard_buffer(struct buffer_head * bh)
{
unsigned long b_state, b_state_old;
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
b_state = bh->b_state;
for (;;) {
b_state_old = cmpxchg(&bh->b_state, b_state,
(b_state & ~BUFFER_FLAGS_DISCARD));
if (b_state_old == b_state)
break;
b_state = b_state_old;
}
unlock_buffer(bh);
}
/**
* block_invalidatepage - invalidate part or all of a buffer-backed page
*
* @page: the page which is affected
* @offset: start of the range to invalidate
* @length: length of the range to invalidate
*
* block_invalidatepage() is called when all or part of the page has become
* invalidated by a truncate operation.
*
* block_invalidatepage() does not have to release all buffers, but it must
* ensure that no dirty buffer is left outside @offset and that no I/O
* is underway against any of the blocks which are outside the truncation
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
void block_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
unsigned int stop = length + offset;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
goto out;
/*
* Check for overflow
*/
BUG_ON(stop > PAGE_SIZE || stop < length);
head = page_buffers(page);
bh = head;
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
/*
* Are we still fully in range ?
*/
if (next_off > stop)
goto out;
/*
* is this block fully invalidated?
*/
if (offset <= curr_off)
discard_buffer(bh);
curr_off = next_off;
bh = next;
} while (bh != head);
/*
* We release buffers only if the entire page is being invalidated.
* The get_block cached value has been unconditionally invalidated,
* so real IO is not possible anymore.
*/
if (offset == 0)
try_to_release_page(page, 0);
out:
return;
}
EXPORT_SYMBOL(block_invalidatepage);
/*
* We attach and possibly dirty the buffers atomically wrt
* __set_page_dirty_buffers() via private_lock. try_to_free_buffers
* is already excluded via the page lock.
*/
void create_empty_buffers(struct page *page,
unsigned long blocksize, unsigned long b_state)
{
struct buffer_head *bh, *head, *tail;
head = alloc_page_buffers(page, blocksize, 1);
bh = head;
do {
bh->b_state |= b_state;
tail = bh;
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
spin_lock(&page->mapping->private_lock);
if (PageUptodate(page) || PageDirty(page)) {
bh = head;
do {
if (PageDirty(page))
set_buffer_dirty(bh);
if (PageUptodate(page))
set_buffer_uptodate(bh);
bh = bh->b_this_page;
} while (bh != head);
}
attach_page_buffers(page, head);
spin_unlock(&page->mapping->private_lock);
}
EXPORT_SYMBOL(create_empty_buffers);
/**
* clean_bdev_aliases: clean a range of buffers in block device
* @bdev: Block device to clean buffers in
* @block: Start of a range of blocks to clean
* @len: Number of blocks to clean
*
* We are taking a range of blocks for data and we don't want writeback of any
* buffer-cache aliases starting from return from this function and until the
* moment when something will explicitly mark the buffer dirty (hopefully that
* will not happen until we will free that block ;-) We don't even need to mark
* it not-uptodate - nobody can expect anything from a newly allocated buffer
* anyway. We used to use unmap_buffer() for such invalidation, but that was
* wrong. We definitely don't want to mark the alias unmapped, for example - it
* would confuse anyone who might pick it with bread() afterwards...
*
* Also.. Note that bforget() doesn't lock the buffer. So there can be
* writeout I/O going on against recently-freed buffers. We don't wait on that
* I/O in bforget() - it's more efficient to wait on the I/O only if we really
* need to. That happens here.
*/
void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
struct pagevec pvec;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
pgoff_t end;
int i, count;
struct buffer_head *bh;
struct buffer_head *head;
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
pagevec_init(&pvec, 0);
while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
count = pagevec_count(&pvec);
for (i = 0; i < count; i++) {
struct page *page = pvec.pages[i];
if (!page_has_buffers(page))
continue;
/*
* We use page lock instead of bd_mapping->private_lock
* to pin buffers here since we can afford to sleep and
* it scales better than a global spinlock lock.
*/
lock_page(page);
/* Recheck when the page is locked which pins bhs */
if (!page_has_buffers(page))
goto unlock_page;
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh) || (bh->b_blocknr < block))
goto next;
if (bh->b_blocknr >= block + len)
break;
clear_buffer_dirty(bh);
wait_on_buffer(bh);
clear_buffer_req(bh);
next:
bh = bh->b_this_page;
} while (bh != head);
unlock_page:
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
/* End of range already reached? */
if (index > end || !index)
break;
}
}
EXPORT_SYMBOL(clean_bdev_aliases);
/*
* Size is a power-of-two in the range 512..PAGE_SIZE,
* and the case we care about most is PAGE_SIZE.
*
* So this *could* possibly be written with those
* constraints in mind (relevant mostly if some
* architecture has a slow bit-scan instruction)
*/
static inline int block_size_bits(unsigned int blocksize)
{
return ilog2(blocksize);
}
static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
{
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
return page_buffers(page);
}
/*
* NOTE! All mapped/uptodate combinations are valid:
*
* Mapped Uptodate Meaning
*
* No No "unknown" - must do get_block()
* No Yes "hole" - zero-filled
* Yes No "allocated" - allocated on disk, not read in
* Yes Yes "valid" - allocated and up-to-date in memory.
*
* "Dirty" is valid only with the last case (mapped+uptodate).
*/
/*
* While block_write_full_page is writing back the dirty buffers under
* the page lock, whoever dirtied the buffers may decide to clean them
* again at any time. We handle that by only looking at the buffer
* state inside lock_buffer().
*
* If block_write_full_page() is called for regular writeback
* (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
* locked buffer. This only can happen if someone has written the buffer
* directly, with submit_bh(). At the address_space level PageWriteback
* prevents this contention from occurring.
*
* If block_write_full_page() is called with wbc->sync_mode ==
* WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
* causes the writes to be flagged as synchronous writes.
*/
int __block_write_full_page(struct inode *inode, struct page *page,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
int err;
sector_t block;
sector_t last_block;
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
int write_flags = wbc_to_write_flags(wbc);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
/*
* Be very careful. We have no exclusion from __set_page_dirty_buffers
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
*
* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
* handle that here by just cleaning them.
*/
bh = head;
blocksize = bh->b_size;
bbits = block_size_bits(blocksize);
block = (sector_t)page->index << (PAGE_SHIFT - bbits);
last_block = (i_size_read(inode) - 1) >> bbits;
/*
* Get all the dirty buffers mapped to disk addresses and
* handle any aliases from the underlying blockdev's mapping.
*/
do {
if (block > last_block) {
/*
* mapped buffers outside i_size will occur, because
* this page can be outside i_size when there is a
* truncate in progress.
*/
/*
* The buffer was zeroed by block_write_full_page()
*/
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
buffer_dirty(bh)) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, block, bh, 1);
if (err)
goto recover;
clear_buffer_delay(bh);
if (buffer_new(bh)) {
/* blockdev mappings never come here */
clear_buffer_new(bh);
clean_bdev_bh_alias(bh);
}
}
bh = bh->b_this_page;
block++;
} while (bh != head);
do {
if (!buffer_mapped(bh))
continue;
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* potentially cause a busy-wait loop from writeback threads
* and kswapd activity, but those code paths have their own
* higher-level throttling.
*/
if (wbc->sync_mode != WB_SYNC_NONE) {
lock_buffer(bh);
} else if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
continue;
}
if (test_clear_buffer_dirty(bh)) {
mark_buffer_async_write_endio(bh, handler);
} else {
unlock_buffer(bh);
}
} while ((bh = bh->b_this_page) != head);
/*
* The page and its buffers are protected by PageWriteback(), so we can
* drop the bh refcounts early.
*/
BUG_ON(PageWriteback(page));
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
} while (bh != head);
unlock_page(page);
err = 0;
done:
if (nr_underway == 0) {
/*
* The page was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
* ll_rw_block/submit_bh. A rare case.
*/
end_page_writeback(page);
/*
* The page and buffer_heads can be released at any time from
* here on.
*/
}
return err;
recover:
/*
* ENOSPC, or some other error. We may already have added some
* blocks to the file, so we need to write these out to avoid
* exposing stale data.
* The page is currently locked and not marked for writeback
*/
bh = head;
/* Recovery: lock and submit the mapped buffers */
do {
if (buffer_mapped(bh) && buffer_dirty(bh) &&
!buffer_delay(bh)) {
lock_buffer(bh);
mark_buffer_async_write_endio(bh, handler);
} else {
/*
* The buffer may have been set dirty during
* attachment to a dirty page.
*/
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
SetPageError(page);
BUG_ON(PageWriteback(page));
mapping_set_error(page->mapping, err);
set_page_writeback(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
} while (bh != head);
unlock_page(page);
goto done;
}
EXPORT_SYMBOL(__block_write_full_page);
/*
* If a page has any new buffers, zero them out here, and mark them uptodate
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
{
unsigned int block_start, block_end;
struct buffer_head *head, *bh;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
return;
bh = head = page_buffers(page);
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
if (!PageUptodate(page)) {
unsigned start, size;
start = max(from, block_start);
size = min(to, block_end) - start;
zero_user(page, start, size);
set_buffer_uptodate(bh);
}
clear_buffer_new(bh);
mark_buffer_dirty(bh);
}
}
block_start = block_end;
bh = bh->b_this_page;
} while (bh != head);
}
EXPORT_SYMBOL(page_zero_new_buffers);
static void
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
struct iomap *iomap)
{
loff_t offset = block << inode->i_blkbits;
bh->b_bdev = iomap->bdev;
/*
* Block points to offset in file we need to map, iomap contains
* the offset at which the map starts. If the map ends before the
* current block, then do not map the buffer and let the caller
* handle it.
*/
BUG_ON(offset >= iomap->offset + iomap->length);
switch (iomap->type) {
case IOMAP_HOLE:
/*
* If the buffer is not up to date or beyond the current EOF,
* we need to mark it as new to ensure sub-block zeroing is
* executed if necessary.
*/
if (!buffer_uptodate(bh) ||
(offset >= i_size_read(inode)))
set_buffer_new(bh);
break;
case IOMAP_DELALLOC:
if (!buffer_uptodate(bh) ||
(offset >= i_size_read(inode)))
set_buffer_new(bh);
set_buffer_uptodate(bh);
set_buffer_mapped(bh);
set_buffer_delay(bh);
break;
case IOMAP_UNWRITTEN:
/*
* For unwritten regions, we always need to ensure that
* sub-block writes cause the regions in the block we are not
* writing to are zeroed. Set the buffer as new to ensure this.
*/
set_buffer_new(bh);
set_buffer_unwritten(bh);
/* FALLTHRU */
case IOMAP_MAPPED:
if (offset >= i_size_read(inode))
set_buffer_new(bh);
bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
((offset - iomap->offset) >> inode->i_blkbits);
set_buffer_mapped(bh);
break;
}
}
int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
sector_t block;
int err = 0;
unsigned blocksize, bbits;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
BUG_ON(to > PAGE_SIZE);
BUG_ON(from > to);
head = create_page_buffers(page, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
block = (sector_t)page->index << (PAGE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (PageUptodate(page)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
}
continue;
}
if (buffer_new(bh))
clear_buffer_new(bh);
if (!buffer_mapped(bh)) {
WARN_ON(bh->b_size != blocksize);
if (get_block) {
err = get_block(inode, block, bh, 1);
if (err)
break;
} else {
iomap_to_bh(inode, block, bh, iomap);
}
if (buffer_new(bh)) {
clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
clear_buffer_new(bh);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
continue;
}
if (block_end > to || block_start < from)
zero_user_segments(page,
to, block_end,
block_start, from);
continue;
}
}
if (PageUptodate(page)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
continue;
}
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
*wait_bh++=bh;
}
}
/*
* If we issued read requests - let them complete.
*/
while(wait_bh > wait) {
wait_on_buffer(*--wait_bh);
if (!buffer_uptodate(*wait_bh))
err = -EIO;
}
if (unlikely(err))
page_zero_new_buffers(page, from, to);
return err;
}
int __block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
return __block_write_begin_int(page, pos, len, get_block, NULL);
}
EXPORT_SYMBOL(__block_write_begin);
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
{
unsigned block_start, block_end;
int partial = 0;
unsigned blocksize;
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
blocksize = bh->b_size;
block_start = 0;
do {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
} else {
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
}
clear_buffer_new(bh);
block_start = block_end;
bh = bh->b_this_page;
} while (bh != head);
/*
* If this is a partial write which happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
* the next read(). Here we 'discover' whether the page went
* uptodate as a result of this (potentially partial) write.
*/
if (!partial)
SetPageUptodate(page);
return 0;
}
/*
* block_write_begin takes care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
*
* The filesystem needs to handle block truncation upon failure.
*/
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
unsigned flags, struct page **pagep, get_block_t *get_block)
{
pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
status = __block_write_begin(page, pos, len, get_block);
if (unlikely(status)) {
unlock_page(page);
put_page(page);
page = NULL;
}
*pagep = page;
return status;
}
EXPORT_SYMBOL(block_write_begin);
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
unsigned start;
start = pos & (PAGE_SIZE - 1);
if (unlikely(copied < len)) {
/*
* The buffers that were written will now be uptodate, so we
* don't have to worry about a readpage reading them and
* overwriting a partial write. However if we have encountered
* a short write and only partially written into a buffer, it
* will not be marked uptodate, so a readpage might come in and
* destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a
* non uptodate page as a zero-length write, and force the
* caller to redo the whole thing.
*/
if (!PageUptodate(page))
copied = 0;
page_zero_new_buffers(page, start+copied, start+len);
}
flush_dcache_page(page);
/* This could be a short (even 0-length) commit */
__block_commit_write(inode, page, start, start+copied);
return copied;
}
EXPORT_SYMBOL(block_write_end);
int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold i_mutex.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if (pos+copied > inode->i_size) {
i_size_write(inode, pos+copied);
i_size_changed = 1;
}
unlock_page(page);
put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling
* filesystems.
*/
if (i_size_changed)
mark_inode_dirty(inode);
return copied;
}
EXPORT_SYMBOL(generic_write_end);
/*
* block_is_partially_uptodate checks whether buffers within a page are
* uptodate or not.
*
* Returns true if all buffers which correspond to a file portion
* we want to read are uptodate.
*/
int block_is_partially_uptodate(struct page *page, unsigned long from,
unsigned long count)
{
unsigned block_start, block_end, blocksize;
unsigned to;
struct buffer_head *bh, *head;
int ret = 1;
if (!page_has_buffers(page))
return 0;
head = page_buffers(page);
blocksize = head->b_size;
to = min_t(unsigned, PAGE_SIZE - from, count);
to = from + to;
if (from < blocksize && to > PAGE_SIZE - blocksize)
return 0;
bh = head;
block_start = 0;
do {
block_end = block_start + blocksize;
if (block_end > from && block_start < to) {
if (!buffer_uptodate(bh)) {
ret = 0;
break;
}
if (block_end >= to)
break;
}
block_start = block_end;
bh = bh->b_this_page;
} while (bh != head);
return ret;
}
EXPORT_SYMBOL(block_is_partially_uptodate);
/*
* Generic "read page" function for block devices that have the normal
* get_block functionality. This is most of the block device filesystems.
* Reads the page asynchronously --- the unlock_buffer() and
* set/clear_buffer_uptodate() functions propagate buffer state into the
* page struct once IO has completed.
*/
int block_read_full_page(struct page *page, get_block_t *get_block)
{
struct inode *inode = page->mapping->host;
sector_t iblock, lblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, bbits;
int nr, i;
int fully_mapped = 1;
head = create_page_buffers(page, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
lblock = (i_size_read(inode)+blocksize-1) >> bbits;
bh = head;
nr = 0;
i = 0;
do {
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
int err = 0;
fully_mapped = 0;
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
if (err)
SetPageError(page);
}
if (!buffer_mapped(bh)) {
zero_user(page, i * blocksize, blocksize);
if (!err)
set_buffer_uptodate(bh);
continue;
}
/*
* get_block() might have updated the buffer
* synchronously
*/
if (buffer_uptodate(bh))
continue;
}
arr[nr++] = bh;
} while (i++, iblock++, (bh = bh->b_this_page) != head);
if (fully_mapped)
SetPageMappedToDisk(page);
if (!nr) {
/*
* All buffers are uptodate - we can set the page uptodate
* as well. But not if get_block() returned an error.
*/
if (!PageError(page))
SetPageUptodate(page);
unlock_page(page);
return 0;
}
/* Stage two: lock the buffers */
for (i = 0; i < nr; i++) {
bh = arr[i];
lock_buffer(bh);
mark_buffer_async_read(bh);
}
/*
* Stage 3: start the IO. Check for uptodateness
* inside the buffer lock in case another process reading
* the underlying blockdev brought it uptodate (the sct fix).
*/
for (i = 0; i < nr; i++) {
bh = arr[i];
if (buffer_uptodate(bh))
end_buffer_async_read(bh, 1);
else
submit_bh(REQ_OP_READ, 0, bh);
}
return 0;
}
EXPORT_SYMBOL(block_read_full_page);
/* utility function for filesystems that need to do work on expanding
* truncates. Uses filesystem pagecache writes to allow the filesystem to
* deal with the hole.
*/
int generic_cont_expand_simple(struct inode *inode, loff_t size)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
int err;
err = inode_newsize_ok(inode, size);
if (err)
goto out;
err = pagecache_write_begin(NULL, mapping, size, 0,
AOP_FLAG_CONT_EXPAND, &page, &fsdata);
if (err)
goto out;
err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
BUG_ON(err > 0);
out:
return err;
}
EXPORT_SYMBOL(generic_cont_expand_simple);
static int cont_expand_zero(struct file *file, struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
unsigned int blocksize = i_blocksize(inode);
struct page *page;
void *fsdata;
pgoff_t index, curidx;
loff_t curpos;
unsigned zerofrom, offset, len;
int err = 0;
index = pos >> PAGE_SHIFT;
offset = pos & ~PAGE_MASK;
while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
zerofrom = curpos & ~PAGE_MASK;
if (zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
len = PAGE_SIZE - zerofrom;
err = pagecache_write_begin(file, mapping, curpos, len, 0,
&page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
err = pagecache_write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
err = 0;
balance_dirty_pages_ratelimited(mapping);
if (unlikely(fatal_signal_pending(current))) {
err = -EINTR;
goto out;
}
}
/* page covers the boundary, find the boundary offset */
if (index == curidx) {
zerofrom = curpos & ~PAGE_MASK;
/* if we will expand the thing last block will be filled */
if (offset <= zerofrom) {
goto out;
}
if (zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
len = offset - zerofrom;
err = pagecache_write_begin(file, mapping, curpos, len, 0,
&page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
err = pagecache_write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
err = 0;
}
out:
return err;
}
/*
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
int cont_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
unsigned int blocksize = i_blocksize(inode);
unsigned int zerofrom;
int err;
err = cont_expand_zero(file, mapping, pos, bytes);
if (err)
return err;
zerofrom = *bytes & ~PAGE_MASK;
if (pos+len > *bytes && zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
return block_write_begin(mapping, pos, len, flags, pagep, get_block);
}
EXPORT_SYMBOL(cont_write_begin);
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
__block_commit_write(inode,page,from,to);
return 0;
}
EXPORT_SYMBOL(block_commit_write);
/*
* block_page_mkwrite() is not allowed to change the file size as it gets
* called from a page fault handler when a page is first dirtied. Hence we must
* be careful to check for EOF conditions here. We set the page up correctly
* for a written page which means we get ENOSPC checking when writing into
* holes and correct delalloc and unwritten extent mapping on filesystems that
* support these features.
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
* truncate writes the inode size before removing pages, once we have the
* page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*
* Direct callers of this function should protect against filesystem freezing
* using sb_start_pagefault() - sb_end_pagefault() functions.
*/
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
unsigned long end;
loff_t size;
int ret;
lock_page(page);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
(page_offset(page) > size)) {
/* We overload EFAULT to mean page got truncated */
ret = -EFAULT;
goto out_unlock;
}
/* page is wholly or partially inside EOF */
if (((page->index + 1) << PAGE_SHIFT) > size)
end = size & ~PAGE_MASK;
else
end = PAGE_SIZE;
ret = __block_write_begin(page, 0, end, get_block);
if (!ret)
ret = block_commit_write(page, 0, end);
if (unlikely(ret < 0))
goto out_unlock;
set_page_dirty(page);
wait_for_stable_page(page);
return 0;
out_unlock:
unlock_page(page);
return ret;
}
EXPORT_SYMBOL(block_page_mkwrite);
/*
* nobh_write_begin()'s prereads are special: the buffer_heads are freed
* immediately, while under the page lock. So it needs a special end_io
* handler which does not touch the bh after unlocking it.
*/
static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
{
__end_buffer_read_notouch(bh, uptodate);
}
/*
* Attach the singly-linked list of buffers created by nobh_write_begin, to
* the page (converting it to circular linked list and taking care of page
* dirty races).
*/
static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
{
struct buffer_head *bh;
BUG_ON(!PageLocked(page));
spin_lock(&page->mapping->private_lock);
bh = head;
do {
if (PageDirty(page))
set_buffer_dirty(bh);
if (!bh->b_this_page)
bh->b_this_page = head;
bh = bh->b_this_page;
} while (bh != head);
attach_page_buffers(page, head);
spin_unlock(&page->mapping->private_lock);
}
/*
* On entry, the page is fully not uptodate.
* On exit the page is fully uptodate in the areas outside (from,to)
* The filesystem needs to handle block truncation upon failure.
*/
int nobh_write_begin(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
{
struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits;
struct buffer_head *head, *bh;
struct page *page;
pgoff_t index;
unsigned from, to;
unsigned block_in_page;
unsigned block_start, block_end;
sector_t block_in_file;
int nr_reads = 0;
int ret = 0;
int is_mapped_to_disk = 1;
index = pos >> PAGE_SHIFT;
from = pos & (PAGE_SIZE - 1);
to = from + len;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
*fsdata = NULL;
if (page_has_buffers(page)) {
ret = __block_write_begin(page, pos, len, get_block);
if (unlikely(ret))
goto out_release;
return ret;
}
if (PageMappedToDisk(page))
return 0;
/*
* Allocate buffers so that we can keep track of state, and potentially
* attach them to the page if an error occurs. In the common case of
* no error, they will just be freed again without ever being attached
* to the page (which is all OK, because we're under the page lock).
*
* Be careful: the buffer linked list is a NULL terminated one, rather
* than the circular one we're used to.
*/
head = alloc_page_buffers(page, blocksize, 0);
if (!head) {
ret = -ENOMEM;
goto out_release;
}
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
/*
* We loop across all blocks in the page, whether or not they are
* part of the affected region. This is so we can discover if the
* page is fully mapped-to-disk.
*/
for (block_start = 0, block_in_page = 0, bh = head;
block_start < PAGE_SIZE;
block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
int create;
block_end = block_start + blocksize;
bh->b_state = 0;
create = 1;
if (block_start >= to)
create = 0;
ret = get_block(inode, block_in_file + block_in_page,
bh, create);
if (ret)
goto failed;
if (!buffer_mapped(bh))
is_mapped_to_disk = 0;
if (buffer_new(bh))
clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
set_buffer_uptodate(bh);
continue;
}
if (buffer_new(bh) || !buffer_mapped(bh)) {
zero_user_segments(page, block_start, from,
to, block_end);
continue;
}
if (buffer_uptodate(bh))
continue; /* reiserfs does this */
if (block_start < from || block_end > to) {
lock_buffer(bh);
bh->b_end_io = end_buffer_read_nobh;
submit_bh(REQ_OP_READ, 0, bh);
nr_reads++;
}
}
if (nr_reads) {
/*
* The page is locked, so these buffers are protected from
* any VM or truncate activity. Hence we don't need to care
* for the buffer_head refcounts.
*/
for (bh = head; bh; bh = bh->b_this_page) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
ret = -EIO;
}
if (ret)
goto failed;
}
if (is_mapped_to_disk)
SetPageMappedToDisk(page);
*fsdata = head; /* to be released by nobh_write_end */
return 0;
failed:
BUG_ON(!ret);
/*
* Error recovery is a bit difficult. We need to zero out blocks that
* were newly allocated, and dirty them to ensure they get written out.
* Buffers need to be attached to the page at this point, otherwise
* the handling of potential IO errors during writeout would be hard
* (could try doing synchronous writeout, but what if that fails too?)
*/
attach_nobh_buffers(page, head);
page_zero_new_buffers(page, from, to);
out_release:
unlock_page(page);
put_page(page);
*pagep = NULL;
return ret;
}
EXPORT_SYMBOL(nobh_write_begin);
int nobh_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = page->mapping->host;
struct buffer_head *head = fsdata;
struct buffer_head *bh;
BUG_ON(fsdata != NULL && page_has_buffers(page));
if (unlikely(copied < len) && head)
attach_nobh_buffers(page, head);
if (page_has_buffers(page))
return generic_write_end(file, mapping, pos, len,
copied, page, fsdata);
SetPageUptodate(page);
set_page_dirty(page);
if (pos+copied > inode->i_size) {
i_size_write(inode, pos+copied);
mark_inode_dirty(inode);
}
unlock_page(page);
put_page(page);
while (head) {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
}
return copied;
}
EXPORT_SYMBOL(nobh_write_end);
/*
* nobh_writepage() - based on block_full_write_page() except
* that it tries to operate without attaching bufferheads to
* the page.
*/
int nobh_writepage(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
int ret;
/* Is the page fully inside i_size? */
if (page->index < end_index)
goto out;
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
#if 0
/* Not really sure about this - do we need this ? */
if (page->mapping->a_ops->invalidatepage)
page->mapping->a_ops->invalidatepage(page, offset);
#endif
unlock_page(page);
return 0; /* don't care */
}
/*
* The page straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_SIZE);
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
ret = __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
return ret;
}
EXPORT_SYMBOL(nobh_writepage);
int nobh_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
unsigned length, pos;
struct inode *inode = mapping->host;
struct page *page;
struct buffer_head map_bh;
int err;
blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!length)
return 0;
length = blocksize - length;
iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
page = grab_cache_page(mapping, index);
err = -ENOMEM;
if (!page)
goto out;
if (page_has_buffers(page)) {
has_buffers:
unlock_page(page);
put_page(page);
return block_truncate_page(mapping, from, get_block);
}
/* Find the buffer that contains "offset" */
pos = blocksize;
while (offset >= pos) {
iblock++;
pos += blocksize;
}
map_bh.b_size = blocksize;
map_bh.b_state = 0;
err = get_block(inode, iblock, &map_bh, 0);
if (err)
goto unlock;
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(&map_bh))
goto unlock;
/* Ok, it's mapped. Make sure it's up-to-date */
if (!PageUptodate(page)) {
err = mapping->a_ops->readpage(NULL, page);
if (err) {
put_page(page);
goto out;
}
lock_page(page);
if (!PageUptodate(page)) {
err = -EIO;
goto unlock;
}
if (page_has_buffers(page))
goto has_buffers;
}
zero_user(page, offset, length);
set_page_dirty(page);
err = 0;
unlock:
unlock_page(page);
put_page(page);
out:
return err;
}
EXPORT_SYMBOL(nobh_truncate_page);
int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
unsigned length, pos;
struct inode *inode = mapping->host;
struct page *page;
struct buffer_head *bh;
int err;
blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!length)
return 0;
length = blocksize - length;
iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
page = grab_cache_page(mapping, index);
err = -ENOMEM;
if (!page)
goto out;
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
/* Find the buffer that contains "offset" */
bh = page_buffers(page);
pos = blocksize;
while (offset >= pos) {
bh = bh->b_this_page;
iblock++;
pos += blocksize;
}
err = 0;
if (!buffer_mapped(bh)) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
if (err)
goto unlock;
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(bh))
goto unlock;
}
/* Ok, it's mapped. Make sure it's up-to-date */
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
err = -EIO;
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
goto unlock;
}
zero_user(page, offset, length);
mark_buffer_dirty(bh);
err = 0;
unlock:
unlock_page(page);
put_page(page);
out:
return err;
}
EXPORT_SYMBOL(block_truncate_page);
/*
* The generic ->writepage function for buffer-backed address_spaces
*/
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
/* Is the page fully inside i_size? */
if (page->index < end_index)
return __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
/*
* The page straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_SIZE);
return __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
}
EXPORT_SYMBOL(block_write_full_page);
sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
get_block_t *get_block)
{
struct inode *inode = mapping->host;
struct buffer_head tmp = {
.b_size = i_blocksize(inode),
};
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
EXPORT_SYMBOL(generic_block_bmap);
static void end_bio_bh_io_sync(struct bio *bio)
{
struct buffer_head *bh = bio->bi_private;
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
/*
* This allows us to do IO even on the odd last sectors
* of a device, even if the block size is some multiple
* of the physical sector size.
*
* We'll just truncate the bio to the size of the device,
* and clear the end of the buffer head manually.
*
* Truly out-of-range accesses will turn into actual IO
* errors, this only handles the "we need to be able to
* do IO at the final sector" case.
*/
void guard_bio_eod(int op, struct bio *bio)
{
sector_t maxsector;
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned truncated_bytes;
struct hd_struct *part;
rcu_read_lock();
part = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (part)
maxsector = part_nr_sects_read(part);
else
maxsector = get_capacity(bio->bi_disk);
rcu_read_unlock();
if (!maxsector)
return;
/*
* If the *whole* IO is past the end of the device,
* let it through, and the IO layer will turn it into
* an EIO.
*/
if (unlikely(bio->bi_iter.bi_sector >= maxsector))
return;
maxsector -= bio->bi_iter.bi_sector;
if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
return;
/* Uhhuh. We've got a bio that straddles the device size! */
truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
/*
* The bio contains more than one segment which spans EOD, just return
* and let IO layer turn it into an EIO
*/
if (truncated_bytes > bvec->bv_len)
return;
/* Truncate the bio.. */
bio->bi_iter.bi_size -= truncated_bytes;
bvec->bv_len -= truncated_bytes;
/* ..and clear the end of the buffer for reads */
if (op == REQ_OP_READ) {
zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
truncated_bytes);
}
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
enum rw_hint write_hint, struct writeback_control *wbc)
{
struct bio *bio;
BUG_ON(!buffer_locked(bh));
BUG_ON(!buffer_mapped(bh));
BUG_ON(!bh->b_end_io);
BUG_ON(buffer_delay(bh));
BUG_ON(buffer_unwritten(bh));
/*
* Only clear out a write error when rewriting
*/
if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
clear_buffer_write_io_error(bh);
/*
* from here on down, it's all bio -- do the initial mapping,
* submit_bio -> generic_make_request may further map this bio around
*/
bio = bio_alloc(GFP_NOIO, 1);
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, bh->b_page, bh->b_size);
}
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
/* Take care of bh's that straddle the end of the device */
guard_bio_eod(op, bio);
if (buffer_meta(bh))
op_flags |= REQ_META;
if (buffer_prio(bh))
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
return 0;
}
int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
* @op: whether to %READ or %WRITE
* @op_flags: req_flag_bits
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
* requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
* @op_flags contains flags modifying the detailed I/O behavior, most notably
* %REQ_RAHEAD.
*
* This function drops any buffer that it cannot get a lock on (with the
* BH_Lock state bit), any buffer that appears to be clean when doing a write
* request, and any buffer that appears to be up-to-date when doing read
* request. Further it marks as clean buffers that are processed for
* writing (the buffer cache won't assume that they are actually clean
* until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
* the buffer up-to-date (if appropriate), unlocks the buffer and wakes
* any waiters.
*
* All of the buffers must be for the same device, and must also be a
* multiple of the current approved size for the device.
*/
void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
{
int i;
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
if (!trylock_buffer(bh))
continue;
if (op == WRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(op, op_flags, bh);
continue;
}
} else {
if (!buffer_uptodate(bh)) {
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
submit_bh(op, op_flags, bh);
continue;
}
}
unlock_buffer(bh);
}
}
EXPORT_SYMBOL(ll_rw_block);
void write_dirty_buffer(struct buffer_head *bh, int op_flags)
{
lock_buffer(bh);
if (!test_clear_buffer_dirty(bh)) {
unlock_buffer(bh);
return;
}
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
submit_bh(REQ_OP_WRITE, op_flags, bh);
}
EXPORT_SYMBOL(write_dirty_buffer);
/*
* For a data-integrity writeout, we need to wait upon any in-progress I/O
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
{
int ret = 0;
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
wait_on_buffer(bh);
if (!ret && !buffer_uptodate(bh))
ret = -EIO;
} else {
unlock_buffer(bh);
}
return ret;
}
EXPORT_SYMBOL(__sync_dirty_buffer);
int sync_dirty_buffer(struct buffer_head *bh)
{
return __sync_dirty_buffer(bh, REQ_SYNC);
}
EXPORT_SYMBOL(sync_dirty_buffer);
/*
* try_to_free_buffers() checks if all the buffers on this particular page
* are unused, and releases them if so.
*
* Exclusion against try_to_free_buffers may be obtained by either
* locking the page or by holding its mapping's private_lock.
*
* If the page is dirty but all the buffers are clean then we need to
* be sure to mark the page clean as well. This is because the page
* may be against a block device, and a later reattachment of buffers
* to a dirty page will set *all* buffers dirty. Which would corrupt
* filesystem data on the same device.
*
* The same applies to regular filesystem pages: if all the buffers are
* clean then we set the page clean and proceed. To do that, we require
* total exclusion from __set_page_dirty_buffers(). That is obtained with
* private_lock.
*
* try_to_free_buffers() is non-blocking.
*/
static inline int buffer_busy(struct buffer_head *bh)
{
return atomic_read(&bh->b_count) |
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
static int
drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
{
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh;
bh = head;
do {
if (buffer_busy(bh)) {
/*
* Check if the busy failure was due to an
* outstanding LRU reference
*/
evict_bh_lrus(bh);
if (buffer_busy(bh))
goto failed;
}
bh = bh->b_this_page;
} while (bh != head);
do {
struct buffer_head *next = bh->b_this_page;
if (bh->b_assoc_map)
__remove_assoc_queue(bh);
bh = next;
} while (bh != head);
*buffers_to_free = head;
__clear_page_buffers(page);
return 1;
failed:
return 0;
}
int try_to_free_buffers(struct page *page)
{
struct address_space * const mapping = page->mapping;
struct buffer_head *buffers_to_free = NULL;
int ret = 0;
BUG_ON(!PageLocked(page));
if (PageWriteback(page))
return 0;
if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free);
goto out;
}
spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free);
/*
* If the filesystem writes its buffers by hand (eg ext3)
* then we can have clean buffers against a dirty page. We
* clean the page here; otherwise the VM will never notice
* that the filesystem did any IO at all.
*
* Also, during truncate, discard_buffer will have marked all
* the page's buffers clean. We discover that here and clean
* the page also.
*
* private_lock must be held over this entire operation in order
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
if (ret)
cancel_dirty_page(page);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
do {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
bh = next;
} while (bh != buffers_to_free);
}
return ret;
}
EXPORT_SYMBOL(try_to_free_buffers);
/*
* There are no bdflush tunables left. But distributions are
* still running obsolete flush daemons, so we terminate them here.
*
* Use of bdflush() is deprecated and will be removed in a future kernel.
* The `flush-X' kernel threads fully replace bdflush daemons and this call.
*/
SYSCALL_DEFINE2(bdflush, int, func, long, data)
{
static int msg_count;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (msg_count < 5) {
msg_count++;
printk(KERN_INFO
"warning: process `%s' used the obsolete bdflush"
" system call\n", current->comm);
printk(KERN_INFO "Fix your initscripts?\n");
}
if (func == 1)
do_exit(0);
return 0;
}
/*
* Buffer-head allocation
*/
static struct kmem_cache *bh_cachep __read_mostly;
/*
* Once the number of bh's in the machine exceeds this level, we start
* stripping them in writeback.
*/
static unsigned long max_buffer_heads;
int buffer_heads_over_limit;
struct bh_accounting {
int nr; /* Number of live bh's */
int ratelimit; /* Limit cacheline bouncing */
};
static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
static void recalc_bh_state(void)
{
int i;
int tot = 0;
if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
return;
__this_cpu_write(bh_accounting.ratelimit, 0);
for_each_online_cpu(i)
tot += per_cpu(bh_accounting, i).nr;
buffer_heads_over_limit = (tot > max_buffer_heads);
}
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
{
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
preempt_disable();
__this_cpu_inc(bh_accounting.nr);
recalc_bh_state();
preempt_enable();
}
return ret;
}
EXPORT_SYMBOL(alloc_buffer_head);
void free_buffer_head(struct buffer_head *bh)
{
BUG_ON(!list_empty(&bh->b_assoc_buffers));
kmem_cache_free(bh_cachep, bh);
preempt_disable();
__this_cpu_dec(bh_accounting.nr);
recalc_bh_state();
preempt_enable();
}
EXPORT_SYMBOL(free_buffer_head);
static int buffer_exit_cpu_dead(unsigned int cpu)
{
int i;
struct bh_lru *b = &per_cpu(bh_lrus, cpu);
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(b->bhs[i]);
b->bhs[i] = NULL;
}
this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
per_cpu(bh_accounting, cpu).nr = 0;
return 0;
}
/**
* bh_uptodate_or_lock - Test whether the buffer is uptodate
* @bh: struct buffer_head
*
* Return true if the buffer is up-to-date and false,
* with the buffer locked, if not.
*/
int bh_uptodate_or_lock(struct buffer_head *bh)
{
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh))
return 0;
unlock_buffer(bh);
}
return 1;
}
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
* bh_submit_read - Submit a locked buffer for reading
* @bh: struct buffer_head
*
* Returns zero on success and -EIO on error.
*/
int bh_submit_read(struct buffer_head *bh)
{
BUG_ON(!buffer_locked(bh));
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return 0;
}
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, 0, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return 0;
return -EIO;
}
EXPORT_SYMBOL(bh_submit_read);
/*
* Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
*
* Returns the offset within the file on success, and -ENOENT otherwise.
*/
static loff_t
page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
{
loff_t offset = page_offset(page);
struct buffer_head *bh, *head;
bool seek_data = whence == SEEK_DATA;
if (lastoff < offset)
lastoff = offset;
bh = head = page_buffers(page);
do {
offset += bh->b_size;
if (lastoff >= offset)
continue;
/*
* Unwritten extents that have data in the page cache covering
* them can be identified by the BH_Unwritten state flag.
* Pages with multiple buffers might have a mix of holes, data
* and unwritten extents - any buffer with valid data in it
* should have BH_Uptodate flag set on it.
*/
if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
return lastoff;
lastoff = offset;
} while ((bh = bh->b_this_page) != head);
return -ENOENT;
}
/*
* Seek for SEEK_DATA / SEEK_HOLE in the page cache.
*
* Within unwritten extents, the page cache determines which parts are holes
* and which are data: unwritten and uptodate buffer heads count as data;
* everything else counts as a hole.
*
* Returns the resulting offset on successs, and -ENOENT otherwise.
*/
loff_t
page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
int whence)
{
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
loff_t lastoff = offset;
struct pagevec pvec;
if (length <= 0)
return -ENOENT;
pagevec_init(&pvec, 0);
do {
unsigned nr_pages, i;
nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
end - 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
/*
* At this point, the page may be truncated or
* invalidated (changing page->mapping to NULL), or
* even swizzled back from swapper_space to tmpfs file
* mapping. However, page->index will not change
* because we have a reference on the page.
*
* If current page offset is beyond where we've ended,
* we've found a hole.
*/
if (whence == SEEK_HOLE &&
lastoff < page_offset(page))
goto check_range;
lock_page(page);
if (likely(page->mapping == inode->i_mapping) &&
page_has_buffers(page)) {
lastoff = page_seek_hole_data(page, lastoff, whence);
if (lastoff >= 0) {
unlock_page(page);
goto check_range;
}
}
unlock_page(page);
lastoff = page_offset(page) + PAGE_SIZE;
}
pagevec_release(&pvec);
} while (index < end);
/* When no page at lastoff and we are not done, we found a hole. */
if (whence != SEEK_HOLE)
goto not_found;
check_range:
if (lastoff < offset + length)
goto out;
not_found:
lastoff = -ENOENT;
out:
pagevec_release(&pvec);
return lastoff;
}
void __init buffer_init(void)
{
unsigned long nrpages;
int ret;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
NULL);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
*/
nrpages = (nr_free_buffer_pages() * 10) / 100;
max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
NULL, buffer_exit_cpu_dead);
WARN_ON(ret < 0);
}