mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
* remotes/origin/tmp-b7e55e8: Linux 4.14.61 scsi: sg: fix minor memory leak in error path drm/vc4: Reset ->{x, y}_scaling[1] when dealing with uniplanar formats crypto: padlock-aes - Fix Nano workaround data corruption RDMA/uverbs: Expand primary and alt AV port checks iwlwifi: add more card IDs for 9000 series userfaultfd: remove uffd flags from vma->vm_flags if UFFD_EVENT_FORK fails audit: fix potential null dereference 'context->module.name' kvm: x86: vmx: fix vpid leak x86/entry/64: Remove %ebx handling from error_entry/exit x86/apic: Future-proof the TSC_DEADLINE quirk for SKX virtio_balloon: fix another race between migration and ballooning net: socket: fix potential spectre v1 gadget in socketcall can: ems_usb: Fix memory leak on ems_usb_disconnect() squashfs: more metadata hardenings squashfs: more metadata hardening net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager rxrpc: Fix user call ID check in rxrpc_service_prealloc_one net: stmmac: Fix WoL for PCI-based setups netlink: Fix spectre v1 gadget in netlink_create() net: dsa: Do not suspend/resume closed slave_dev ipv4: frags: handle possible skb truesize change inet: frag: enforce memory limits earlier bonding: avoid lockdep confusion in bond_get_stats() Linux 4.14.60 tcp: add one more quick ack after after ECN events tcp: refactor tcp_ecn_check_ce to remove sk type cast tcp: do not aggressively quick ack after ECN events tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode tcp: do not force quickack when receiving out-of-order packets netlink: Don't shift with UB on nlk->ngroups netlink: Do not subscribe to non-existent groups xen-netfront: wait xenbus state change when load module manually tcp_bbr: fix bw probing to raise in-flight data for very small BDPs NET: stmmac: align DMA stuff to largest cache line length net: mdio-mux: bcm-iproc: fix wrong getter and setter pair net: lan78xx: fix rx handling before first packet is send net: fix amd-xgbe flow-control issue net: ena: Fix use of uninitialized DMA address bits field ipv4: remove BUG_ON() from fib_compute_spec_dst net: dsa: qca8k: Allow overwriting CPU port setting net: dsa: qca8k: Add QCA8334 binding documentation net: dsa: qca8k: Enable RXMAC when bringing up a port net: dsa: qca8k: Force CPU port to its highest bandwidth RDMA/uverbs: Protect from attempts to create flows on unsupported QP usb: gadget: udc: renesas_usb3: should remove debugfs ovl: Sync upper dirty data when syncing overlayfs PCI: xgene: Remove leftover pci_scan_child_bus() call PCI: pciehp: Assume NoCompl+ for Thunderbolt ports ext4: fix check to prevent initializing reserved inodes ext4: check for allocation block validity with block group locked ext4: fix inline data updates with checksums enabled squashfs: be more careful about metadata corruption random: mix rdrand with entropy sent in from userspace block: reset bi_iter.bi_done after splitting bio blkdev: __blkdev_direct_IO_simple: fix leak in error case block: bio_iov_iter_get_pages: fix size of last iovec drm/dp/mst: Fix off-by-one typo when dump payload table drm/atomic-helper: Drop plane->fb references only for drm_atomic_helper_shutdown() drm: Add DP PSR2 sink enable bit ASoC: topology: Add missing clock gating parameter when parsing hw_configs ASoC: topology: Fix bclk and fsync inversion in set_link_hw_format() media: si470x: fix __be16 annotations media: atomisp: compat32: fix __user annotations scsi: cxlflash: Avoid clobbering context control register value scsi: cxlflash: Synchronize reset and remove ops scsi: megaraid_sas: Increase timeout by 1 sec for non-RAID fastpath IOs scsi: scsi_dh: replace too broad "TP9" string with the exact models regulator: Don't return or expect -errno from of_map_mode() media: omap3isp: fix unbalanced dma_iommu_mapping crypto: authenc - don't leak pointers to authenc keys crypto: authencesn - don't leak pointers to authenc keys usb: hub: Don't wait for connect state at resume for powered-off ports microblaze: Fix simpleImage format generation soc: imx: gpcv2: Do not pass static memory as platform data serial: core: Make sure compiler barfs for 16-byte earlycon names staging: lustre: ldlm: free resource when ldlm_lock_create() fails. staging: lustre: llite: correct removexattr detection staging: vchiq_core: Fix missing semaphore release in error case audit: allow not equal op for audit by executable rsi: fix nommu_map_sg overflow kernel panic rsi: Fix 'invalid vdd' warning in mmc ipconfig: Correctly initialise ic_nameservers drm/gma500: fix psb_intel_lvds_mode_valid()'s return type igb: Fix queue selection on MAC filters on i210 arm64: defconfig: Enable Rockchip io-domain driver nvme: lightnvm: add granby support memory: tegra: Apply interrupts mask per SoC memory: tegra: Do not handle spurious interrupts delayacct: Use raw_spinlocks stop_machine: Use raw spinlocks backlight: pwm_bl: Don't use GPIOF_* with gpiod_get_direction dt-bindings: net: meson-dwmac: new compatible name for AXG SoC net: hns3: Fixes the out of bounds access in hclge_map_tqp spi: meson-spicc: Fix error handling in meson_spicc_probe() dt-bindings: pinctrl: meson: add support for the Meson8m2 SoC mmc: pwrseq: Use kmalloc_array instead of stack VLA mmc: dw_mmc: update actual clock for mmc debugfs ALSA: hda/ca0132: fix build failure when a local macro is defined drm/atomic: Handling the case when setting old crtc for plane media: siano: get rid of __le32/__le16 cast warnings f2fs: avoid fsync() failure caused by EAGAIN in writepage() bpf: fix references to free_bpf_prog_info() in comments thermal: exynos: fix setting rising_threshold for Exynos5433 staging: lustre: o2iblnd: Fix FastReg map/unmap for MLX5 staging: lustre: o2iblnd: fix race at kiblnd_connect_peer scsi: qedf: Set the UNLOADING flag when removing a vport scsi: hisi_sas: config ATA de-reset as an constrained command for v3 hw scsi: megaraid: silence a static checker bug scsi: 3w-xxxx: fix a missing-check bug scsi: 3w-9xxx: fix a missing-check bug bnxt_en: Check unsupported speeds in bnxt_update_link() on PF only. perf: fix invalid bit in diagnostic entry s390/cpum_sf: Add data entry sizes to sampling trailer entry brcmfmac: Add support for bcm43364 wireless chipset mtd: rawnand: fsl_ifc: fix FSL NAND driver to read all ONFI parameter pages media: saa7164: Fix driver name in debug output media: media-device: fix ioctl function types ACPI / LPSS: Only call pwm_add_table() for Bay Trail PWM if PMIC HRV is 2 libata: Fix command retry decision media: rcar_jpu: Add missing clk_disable_unprepare() on error in jpu_open() net: phy: phylink: Release link GPIO dma-iommu: Fix compilation when !CONFIG_IOMMU_DMA tty: Fix data race in tty_insert_flip_string_fixed_flag i40e: free the skb after clearing the bitlock nvmem: properly handle returned value nvmem_reg_read ARM: dts: sh73a0: Add missing interrupt-affinity to PMU node ARM: dts: emev2: Add missing interrupt-affinity to PMU node ARM: dts: stih407-pinctrl: Fix complain about IRQ_TYPE_NONE usage EDAC, altera: Fix ARM64 build warning HID: i2c-hid: check if device is there before really probing powerpc/embedded6xx/hlwd-pic: Prevent interrupts from being handled by Starlet drm/amdgpu: Remove VRAM from shared bo domains. drm/radeon: fix mode_valid's return type arm64: dts: renesas: salvator-common: use audio-graph-card for Sound HID: hid-plantronics: Re-resend Update to map button for PTT products arm64: cmpwait: Clear event register before arming exclusive monitor media: atomisp: ov2680: don't declare unused vars ALSA: usb-audio: Apply rate limit to warning messages in URB complete callback net: ethernet: ti: cpsw-phy-sel: check bus_find_device() ret value media: smiapp: fix timeout checking in smiapp_read_nvm ixgbevf: fix MAC address changes through ixgbevf_set_mac() md: fix NULL dereference of mddev->pers in remove_and_add_spares() md/raid1: add error handling of read error from FailFast device regulator: pfuze100: add .is_enable() for pfuze100_swb_regulator_ops ALSA: emu10k1: Rate-limit error messages about page errors rtc: tps65910: fix possible race condition rtc: vr41xx: fix possible race condition rtc: tps6586x: fix possible race condition Bluetooth: btusb: add ID for LiteOn 04ca:301a drm/nouveau/fifo/gk104-: poll for runlist update completion scsi: zfcp: assert that the ERP lock is held when tracing a recovery trigger scsi: ufs: fix exception event handling scsi: ufs: ufshcd: fix possible unclocked register access fscrypt: use unbound workqueue for decryption net: hns3: Fix the missing client list node initialization spi: Add missing pm_runtime_put_noidle() after failed get drivers/perf: arm-ccn: don't log to dmesg in event_init ima: based on policy verify firmware signatures (pre-allocated buffer) mwifiex: correct histogram data with appropriate index net: dsa: qca8k: Add support for QCA8334 switch PCI: pciehp: Request control of native hotplug only if supported bpf: powerpc64: pad function address loads with NOPs pinctrl: at91-pio4: add missing of_node_put powerpc/8xx: fix invalid register expression in head_8xx.S spi: sh-msiof: Fix setting SIRMDR1.SYNCAC to match SITMDR1.SYNCAC powerpc: Add __printf verification to prom_printf powerpc/powermac: Mark variable x as unused powerpc/powermac: Add missing prototype for note_bootable_part() powerpc/chrp/time: Make some functions static, add missing header include powerpc/32: Add a missing include header ath: Add regulatory mapping for Bahamas ath: Add regulatory mapping for Bermuda ath: Add regulatory mapping for Serbia ath: Add regulatory mapping for Tanzania ath: Add regulatory mapping for Uganda ath: Add regulatory mapping for APL2_FCCA ath: Add regulatory mapping for APL13_WORLD ath: Add regulatory mapping for ETSI8_WORLD ath: Add regulatory mapping for FCC3_ETSIC nvme-pci: Fix AER reset handling nvme-rdma: stop admin queue before freeing it PCI: Prevent sysfs disable of device while driver is attached PM / wakeup: Make s2idle_lock a RAW_SPINLOCK x86/microcode: Make the late update update_lock a raw lock for RT btrfs: qgroup: Finish rescan when hit the last leaf of extent tree btrfs: add barriers to btrfs_sync_log before log_commit_wait wakeups Btrfs: don't BUG_ON() in btrfs_truncate_inode_items() Btrfs: don't return ino to ino cache if inode item removal fails media: videobuf2-core: don't call memop 'finish' when queueing media: tw686x: Fix incorrect vb2_mem_ops GFP flags net: hns3: Fixes the init of the VALID BD info in the descriptor wlcore: sdio: check for valid platform device data before suspend mwifiex: handle race during mwifiex_usb_disconnect mfd: cros_ec: Fail early if we cannot identify the EC ASoC: dpcm: fix BE dai not hw_free and shutdown Bluetooth: btusb: Add a new Realtek 8723DE ID 2ff8:b011 Bluetooth: hci_qca: Fix "Sleep inside atomic section" warning iwlwifi: pcie: fix race in Rx buffer allocator btrfs: balance dirty metadata pages in btrfs_finish_ordered_io PCI: Fix devm_pci_alloc_host_bridge() memory leak selftests: intel_pstate: return Kselftest Skip code for skipped tests selftests: memfd: return Kselftest Skip code for skipped tests selftests/intel_pstate: Improve test, minor fixes perf/x86/intel/uncore: Correct fixed counter index check for NHM perf/x86/intel/uncore: Correct fixed counter index check in generic code usbip: dynamically allocate idev by nports found in sysfs usbip: usbip_detach: Fix memory, udev context and udev leak block, bfq: remove wrong lock in bfq_requests_merged f2fs: fix race in between GC and atomic open f2fs: fix to detect failure of dquot_initialize f2fs: Fix deadlock in shutdown ioctl f2fs: fix to wait page writeback during revoking atomic write f2fs: fix to don't trigger writeback during recovery f2fs: fix error path of move_data_page disable loading f2fs module on PAGE_SIZE > 4KB pnfs: Don't release the sequence slot until we've processed layoutget on open netfilter: nf_tables: check msg_type before nft_trans_set(trans) lightnvm: pblk: warn in case of corrupted write buffer RDMA/mad: Convert BUG_ONs to error flows powerpc/64s: Fix compiler store ordering to SLB shadow area hvc_opal: don't set tb_ticks_per_usec in udbg_init_opal_common() powerpc/eeh: Fix use-after-release of EEH driver powerpc/64s: Add barrier_nospec powerpc/lib: Adjust .balign inside string functions for PPC32 infiniband: fix a possible use-after-free bug e1000e: Ignore TSYNCRXCTL when getting I219 clock attributes ceph: fix alignment of rasize bpf, arm32: fix inconsistent naming about emit_a32_lsr_{r64,i64} printk: drop in_nmi check from printk_safe_flush_on_panic() watchdog: da9063: Fix updating timeout value irqchip/ls-scfg-msi: Map MSIs in the iommu netfilter: ipset: List timing out entries with "timeout 1" instead of zero netfilter: ipset: forbid family for hash:mac sets perf tools: Fix pmu events parsing rule rtc: ensure rtc_set_alarm fails when alarms are not supported mm/slub.c: add __printf verification to slab_err() mm: vmalloc: avoid racy handling of debugobjects in vunmap mm: /proc/pid/pagemap: hide swap entries from unprivileged users kernel/hung_task.c: show all hung tasks before panic vfio/type1: Fix task tracking for QEMU vCPU hotplug vfio/mdev: Check globally for duplicate devices vfio: platform: Fix reset module leak in error path nfsd: fix potential use-after-free in nfsd4_decode_getdeviceinfo NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY ALSA: fm801: add error handling for snd_ctl_add ALSA: emu10k1: add error handling for snd_ctl_add skip LAYOUTRETURN if layout is invalid hv_netvsc: fix network namespace issues with VF support xen/netfront: raise max number of slots in xennet_get_responses() kcov: ensure irq code sees a valid area mlxsw: spectrum_switchdev: Fix port_vlan refcounting arm64: fix vmemmap BUILD_BUG_ON() triggering on !vmemmap setups tracing: Quiet gcc warning about maybe unused link variable tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure kthread, tracing: Don't expose half-written comm when creating kthreads tracing: Fix possible double free in event_enable_trigger_func() tracing: Fix double free of event_trigger_data delayacct: fix crash in delayacct_blkio_end() after delayacct init failure kvm, mm: account shadow page tables to kmemcg Input: elan_i2c - add another ACPI ID for Lenovo Ideapad 330-15AST Input: i8042 - add Lenovo LaVie Z to the i8042 reset list Input: elan_i2c - add ACPI ID for lenovo ideapad 330 spi: spi-s3c64xx: Fix system resume support drivers/infiniband/ulp/srpt/ib_srpt.c: fix build with gcc-4.4.4 IB/srpt: Fix an out-of-bounds stack access in srpt_zerolength_write() drivers/infiniband/core/verbs.c: fix build with gcc-4.4.4 RDMA/core: Avoid that ib_drain_qp() triggers an out-of-bounds stack access i2c: core: decrease reference count of device node in i2c_unregister_device fork: unconditionally clear stack on fork Linux 4.14.59 turn off -Wattribute-alias can: m_can.c: fix setup of CCCR register: clear CCCR NISO bit before checking can.ctrlmode can: peak_canfd: fix firmware < v3.3.0: limit allocation to 32-bit DMA addr only can: xilinx_can: fix RX overflow interrupt not being enabled can: xilinx_can: fix incorrect clear of non-processed interrupts can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting can: xilinx_can: fix device dropping off bus on RX overrun can: xilinx_can: fix recovery from error states not being propagated can: xilinx_can: fix power management handling can: xilinx_can: fix RX loop if RXNEMP is asserted without RXOK driver core: Partially revert "driver core: correct device's shutdown order" usb: gadget: f_fs: Only return delayed status when len is 0 usb: dwc2: Fix DMA alignment to start at allocated boundary usb: core: handle hub C_PORT_OVER_CURRENT condition usb: cdc_acm: Add quirk for Castles VEGA3000 staging: speakup: fix wraparound in uaccess length check tcp: add tcp_ooo_try_coalesce() helper tcp: call tcp_drop() from tcp_data_queue_ofo() tcp: detect malicious patterns in tcp_collapse_ofo_queue() tcp: avoid collapses in tcp_prune_queue() if possible tcp: free batches of packets in tcp_prune_ofo_queue() tcp: do not delay ACK in DCTCP upon CE status change tcp: do not cancel delay-AcK on DCTCP special ACK tcp: helpers to send special DCTCP ack tcp: fix dctcp delayed ACK schedule vxlan: fix default fdb entry netlink notify ordering during netdev create vxlan: make netlink notify in vxlan_fdb_destroy optional vxlan: add new fdb alloc and create helpers rtnetlink: add rtnl_link_state check in rtnl_configure_link sock: fix sg page frag coalescing in sk_alloc_sg net: phy: consider PHY_IGNORE_INTERRUPT in phy_start_aneg_priv multicast: do not restore deleted record source filter mode to new one net/ipv6: Fix linklocal to global address with VRF net/mlx5e: Fix quota counting in aRFS expire flow net/mlx5e: Don't allow aRFS for encapsulated packets net/mlx5: Adjust clock overflow work period net: skb_segment() should not return NULL net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull ip: hash fragments consistently bonding: set default miimon value for non-arp modes if not set drm/nouveau: Set DRIVER_ATOMIC cap earlier to fix debugfs drm/nouveau/drm/nouveau: Fix runtime PM leak in nv50_disp_atomic_commit() KVM: PPC: Check if IOMMU page is contained in the pinned physical page xen/PVH: Set up GS segment for stack canary MIPS: Fix off-by-one in pci_resource_to_user() MIPS: ath79: fix register address in ath79_ddr_wb_flush() Revert "cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting" ANDROID: verity: really fix android-verity Kconfig tcp: add tcp_ooo_try_coalesce() helper tcp: call tcp_drop() from tcp_data_queue_ofo() tcp: detect malicious patterns in tcp_collapse_ofo_queue() tcp: avoid collapses in tcp_prune_queue() if possible tcp: free batches of packets in tcp_prune_ofo_queue() x86_64_cuttlefish_defconfig: Enable android-verity x86_64_cuttlefish_defconfig: enable verity cert ANDROID: android-verity: Fix broken parameter handling. ANDROID: android-verity: Make it work with newer kernels ANDROID: android-verity: Add API to verify signature with builtin keys. ANDROID: verity: fix android-verity Kconfig dependencies Linux 4.14.58 xhci: Fix perceived dead host due to runtime suspend race with event handler powerpc/powernv: Fix save/restore of SPRG3 on entry/exit from stop (idle) cxl_getfile(): fix double-iput() on alloc_file() failures alpha: fix osf_wait4() breakage net: usb: asix: replace mii_nway_restart in resume path ipv6: make DAD fail with enhanced DAD when nonce length differs net: systemport: Fix CRC forwarding check for SYSTEMPORT Lite net/mlx4_en: Don't reuse RX page when XDP is set hv_netvsc: Fix napi reschedule while receive completion is busy tg3: Add higher cpu clock for 5762. qmi_wwan: add support for Quectel EG91 ptp: fix missing break in switch net: phy: fix flag masking in __set_phy_supported net/ipv4: Set oif in fib_compute_spec_dst skbuff: Unconditionally copy pfmemalloc in __skb_clone() net: Don't copy pfmemalloc flag in __copy_skb_header() net: diag: Don't double-free TCP_NEW_SYN_RECV sockets in tcp_abort lib/rhashtable: consider param->min_size when setting initial table size ipv6: ila: select CONFIG_DST_CACHE ipv6: fix useless rol32 call on hash ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns gen_stats: Fix netlink stats dumping in the presence of padding drm/nouveau: Avoid looping through fake MST connectors drm/nouveau: Use drm_connector_list_iter_* for iterating connectors drm/i915: Fix hotplug irq ack on i965/g4x stop_machine: Disable preemption when waking two stopper threads vfio/spapr: Use IOMMU pageshift rather than pagesize vfio/pci: Fix potential Spectre v1 cpufreq: intel_pstate: Register when ACPI PCCH is present mm/huge_memory.c: fix data loss when splitting a file pmd mm: memcg: fix use after free in mem_cgroup_iter() ARC: mm: allow mprotect to make stack mappings executable ARC: configs: Remove CONFIG_INITRAMFS_SOURCE from defconfigs ARC: Fix CONFIG_SWAP ARCv2: [plat-hsdk]: Save accl reg pair by default ALSA: hda: add mute led support for HP ProBook 455 G5 ALSA: hda/realtek - Add Panasonic CF-SZ6 headset jack quirk ALSA: rawmidi: Change resized buffers atomically fat: fix memory allocation failure handling of match_strdup() x86/MCE: Remove min interval polling limitation x86/events/intel/ds: Fix bts_interrupt_threshold alignment x86/apm: Don't access __preempt_count with zeroed fs KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel. scsi: sd_zbc: Fix variable type and bogus comment ANDROID: uid_sys_stats: Replace tasklist lock with RCU in uid_cputime_show Linux 4.14.57 string: drop __must_check from strscpy() and restore strscpy() usages in cgroup arm64: KVM: Add ARCH_WORKAROUND_2 discovery through ARCH_FEATURES_FUNC_ID arm64: KVM: Handle guest's ARCH_WORKAROUND_2 requests arm64: KVM: Add ARCH_WORKAROUND_2 support for guests arm64: KVM: Add HYP per-cpu accessors arm64: ssbd: Add prctl interface for per-thread mitigation arm64: ssbd: Introduce thread flag to control userspace mitigation arm64: ssbd: Restore mitigation status on CPU resume arm64: ssbd: Skip apply_ssbd if not using dynamic mitigation arm64: ssbd: Add global mitigation state accessor arm64: Add 'ssbd' command-line option arm64: Add ARCH_WORKAROUND_2 probing arm64: Add per-cpu infrastructure to call ARCH_WORKAROUND_2 arm64: Call ARCH_WORKAROUND_2 on transitions between EL0 and EL1 arm/arm64: smccc: Add SMCCC-specific return codes KVM: arm64: Avoid storing the vcpu pointer on the stack KVM: arm/arm64: Do not use kern_hyp_va() with kvm_vgic_global_state arm64: alternatives: Add dynamic patching feature KVM: arm64: Stop save/restoring host tpidr_el1 on VHE arm64: alternatives: use tpidr_el2 on VHE hosts KVM: arm64: Change hyp_panic()s dependency on tpidr_el2 KVM: arm/arm64: Convert kvm_host_cpu_state to a static per-cpu allocation KVM: arm64: Store vcpu on the stack during __guest_enter() net/nfc: Avoid stalls when nfc_alloc_send_skb() returned NULL. rds: avoid unenecessary cong_update in loop transport bdi: Fix another oops in wb_workfn() netfilter: ipv6: nf_defrag: drop skb dst before queueing nsh: set mac len based on inner packet autofs: fix slab out of bounds read in getname_kernel() tls: Stricter error checking in zerocopy sendmsg path KEYS: DNS: fix parsing multiple options reiserfs: fix buffer overflow with long warning messages netfilter: ebtables: reject non-bridge targets PCI: hv: Disable/enable IRQs rather than BH in hv_compose_msi_msg() block: do not use interruptible wait anywhere mtd: rawnand: denali_dt: set clk_x_rate to 200 MHz unconditionally crypto: af_alg - Initialize sg_num_bytes in error code path clocksource: Initialize cs->wd_list media: rc: oops in ir_timer_keyup after device unplug xhci: Fix USB3 NULL pointer dereference at logical disconnect. net: lan78xx: Fix race in tx pending skb size calculation rtlwifi: rtl8821ae: fix firmware is not ready to run rtlwifi: Fix kernel Oops "Fw download fail!!" net: cxgb3_main: fix potential Spectre v1 VSOCK: fix loopback on big-endian systems vhost_net: validate sock before trying to put its fd tcp: prevent bogus FRTO undos with non-SACK flows tcp: fix Fast Open key endianness strparser: Remove early eaten to fix full tcp receive buffer stall stmmac: fix DMA channel hang in half-duplex mode r8152: napi hangup fix after disconnect qmi_wwan: add support for the Dell Wireless 5821e module qed: Limit msix vectors in kdump kernel to the minimum required count. qed: Fix use of incorrect size in memcpy call. qed: Fix setting of incorrect eswitch mode. qede: Adverstise software timestamp caps when PHC is not available. net/tcp: Fix socket lookups with SO_BINDTODEVICE net: sungem: fix rx checksum support net_sched: blackhole: tell upper qdisc about dropped packets net/packet: fix use-after-free net: mvneta: fix the Rx desc DMA address in the Rx path net/mlx5: Fix wrong size allocation for QoS ETC TC regitster net/mlx5: Fix required capability for manipulating MPFS net/mlx5: Fix incorrect raw command length parsing net/mlx5: Fix command interface race in polling mode net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager net/mlx5e: Don't attempt to dereference the ppriv struct if not being eswitch manager net/mlx5e: Avoid dealing with vport representors if not being e-switch manager net: macb: Fix ptp time adjustment for large negative delta net: fix use-after-free in GRO with ESP net: dccp: switch rx_tstamp_last_feedback to monotonic clock net: dccp: avoid crash in ccid3_hc_rx_send_feedback() ixgbe: split XDP_TX tail and XDP_REDIRECT map flushing ipvlan: fix IFLA_MTU ignored on NEWLINK ipv6: sr: fix passing wrong flags to crypto_alloc_shash() hv_netvsc: split sub-channel setup into async and sync atm: zatm: Fix potential Spectre v1 atm: Preserve value of skb->truesize when accounting to vcc alx: take rtnl before calling __alx_open from resume crypto: crypto4xx - fix crypto4xx_build_pdr, crypto4xx_build_sdr leak crypto: crypto4xx - remove bad list_del PCI: exynos: Fix a potential init_clk_resources NULL pointer dereference bcm63xx_enet: do not write to random DMA channel on BCM6345 bcm63xx_enet: correct clock usage ocfs2: ip_alloc_sem should be taken in ocfs2_get_block() ocfs2: subsystem.su_mutex is required while accessing the item->ci_parent xprtrdma: Fix corner cases when handling device removal cpufreq / CPPC: Set platform specific transition_delay_us Btrfs: fix duplicate extents after fsync of file with prealloc extents x86/paravirt: Make native_save_fl() extern inline x86/asm: Add _ASM_ARG* constants for argument registers to <asm/asm.h> compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations ANDROID: Add hold functionality to schedtune CPU boost ANDROID: sched/rt: Add schedtune accounting to rt task enqueue/dequeue UPSTREAM: cpuidle: menu: Avoid selecting shallow states with stopped tick UPSTREAM: cpuidle: menu: Refine idle state selection for running tick UPSTREAM: sched: idle: Select idle state before stopping the tick BACKPORT: time: hrtimer: Introduce hrtimer_next_event_without() BACKPORT: time: tick-sched: Split tick_nohz_stop_sched_tick() UPSTREAM: cpuidle: Return nohz hint from cpuidle_select() UPSTREAM: jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC UPSTREAM: sched: idle: Do not stop the tick before cpuidle_idle_call() BACKPORT: sched: idle: Do not stop the tick upfront in the idle loop BACKPORT: time: tick-sched: Reorganize idle tick management code ANDROID: sched/fair: fix a warning ANDROID: sched/walt: Fix compilation issue for x86_64 ANDROID: mnt: Fix next_descendent ANDROID: sched/events: Introduce util_est trace events ANDROID: sched/fair: schedtune: update before schedutil FROMLIST: sched/fair: add support to tune PELT ramp/decay timings BACKPORT: sched/fair: Update util_est before updating schedutil BACKPORT: sched/fair: Update util_est only on util_avg updates BACKPORT: sched/fair: Use util_est in LB and WU paths BACKPORT: sched/fair: Add util_est on top of PELT ANDROID: sched/fair: Cleanup cpu_util{_wake}() ANDROID: sched: Update max cpu capacity in case of max frequency constraints ANDROID: arm: enable max frequency capping ANDROID: arm64: enable max frequency capping ANDROID: implement max frequency capping ANDROID: sched/fair: add arch scaling function for max frequency capping ANDROID: trace: Add WALT util signal to trace event sched_load_cfs_rq ANDROID: sched, trace: Remove trace event sched_load_avg_cpu ANDROID: Rename and move include/linux/sched_energy.h ANDROID: Adjust juno energy model ANDROID: Check equality of max cap state cap and cpu scale ANDROID: Move energy model init call into arch_topology driver ANDROID: Streamline sched_domain_energy_f functions ANDROID: Separate cpu_scale and energy model setup ANDROID: update_group_capacity for single cpu in cluster ANDROID: sched/fair: return idle CPU immediately for prefer_idle ANDROID: sched/fair: add idle state filter to prefer_idle case ANDROID: sched/fair: remove order from CPU selection ANDROID: sched/fair: unify spare capacity calculation ANDROID:sched/fair: prefer energy efficient CPUs for !prefer_idle tasks ANDROID: sched/fair: fix CPU selection for non latency sensitive tasks ANDROID: sched/fair: Also do misfit in overloaded groups ANDROID: sched/fair: Don't balance misfits if it would overload local group ANDROID: sched/fair: Attempt to improve throughput for asym cap systems FROMLIST: sched/fair: Don't move tasks to lower capacity cpus unless necessary FROMLIST: sched/core: Disable SD_PREFER_SIBLING on asymmetric cpu capacity domains FROMLIST: sched/core: Disable SD_ASYM_CPUCAPACITY for root_domains without asymmetry FROMLIST: sched/fair: Set rq->rd->overload when misfit FROMLIST: sched: Wrap rq->rd->overload accesses with READ/WRITE_ONCE FROMLIST: sched: Change root_domain->overload type to int FROMLIST: sched/fair: Change prefer_sibling type to bool FROMLIST: sched/fair: Consider misfit tasks when load-balancing FROMLIST: sched: Add sched_group per-cpu max capacity FROMLIST: sched/fair: Add group_misfit_task load-balance type FROMLIST: sched: Add static_key for asymmetric cpu capacity optimizations UPSTREAM: ANDROID: binder: change down_write to down_read UPSTREAM: ANDROID: binder: correct the cmd print for BINDER_WORK_RETURN_ERROR UPSTREAM: ANDROID: binder: remove 32-bit binder interface. UPSTREAM: android: binder: Use true and false for boolean values UPSTREAM: android: binder: Use octal permissions UPSTREAM: android: binder: Prefer __func__ to using hardcoded function name UPSTREAM: ANDROID: binder: make binder_alloc_new_buf_locked static and indent its arguments UPSTREAM: android: binder: Check for errors in binder_alloc_shrinker_init(). Conflicts: arch/arm64/Kconfig arch/arm64/include/asm/cpucaps.h arch/arm64/include/asm/cpufeature.h arch/arm64/include/asm/thread_info.h arch/arm64/kernel/cpu_errata.c arch/arm64/kernel/cpufeature.c arch/arm64/kernel/entry.S arch/arm64/kernel/ssbd.c drivers/base/arch_topology.c drivers/md/Kconfig drivers/scsi/ufs/ufshcd.c drivers/usb/gadget/function/f_fs.c include/trace/events/sched.h kernel/sched/cpufreq_schedutil.c kernel/sched/energy.c kernel/sched/fair.c kernel/sched/features.h kernel/sched/sched.h kernel/sched/topology.c kernel/sched/tune.c kernel/sched/walt.c kernel/sched/walt.h kernel/stop_machine.c kernel/time/tick-sched.c net/socket.c sound/core/rawmidi.c Change-Id: Ia246711317930ecd55bb42565a04e6b4fdfc26d2 Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
2190 lines
51 KiB
C
2190 lines
51 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/mm.h>
|
|
#include <linux/vmacache.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/huge_mm.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/rmap.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/swapops.h>
|
|
#include <linux/mmu_notifier.h>
|
|
#include <linux/page_idle.h>
|
|
#include <linux/shmem_fs.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/mm_inline.h>
|
|
#include <linux/ctype.h>
|
|
|
|
#include <asm/elf.h>
|
|
#include <asm/tlb.h>
|
|
#include <asm/tlbflush.h>
|
|
#include "internal.h"
|
|
|
|
void task_mem(struct seq_file *m, struct mm_struct *mm)
|
|
{
|
|
unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
|
|
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
|
|
|
|
anon = get_mm_counter(mm, MM_ANONPAGES);
|
|
file = get_mm_counter(mm, MM_FILEPAGES);
|
|
shmem = get_mm_counter(mm, MM_SHMEMPAGES);
|
|
|
|
/*
|
|
* Note: to minimize their overhead, mm maintains hiwater_vm and
|
|
* hiwater_rss only when about to *lower* total_vm or rss. Any
|
|
* collector of these hiwater stats must therefore get total_vm
|
|
* and rss too, which will usually be the higher. Barriers? not
|
|
* worth the effort, such snapshots can always be inconsistent.
|
|
*/
|
|
hiwater_vm = total_vm = mm->total_vm;
|
|
if (hiwater_vm < mm->hiwater_vm)
|
|
hiwater_vm = mm->hiwater_vm;
|
|
hiwater_rss = total_rss = anon + file + shmem;
|
|
if (hiwater_rss < mm->hiwater_rss)
|
|
hiwater_rss = mm->hiwater_rss;
|
|
|
|
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
|
|
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
|
|
swap = get_mm_counter(mm, MM_SWAPENTS);
|
|
ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
|
|
pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
|
|
seq_printf(m,
|
|
"VmPeak:\t%8lu kB\n"
|
|
"VmSize:\t%8lu kB\n"
|
|
"VmLck:\t%8lu kB\n"
|
|
"VmPin:\t%8lu kB\n"
|
|
"VmHWM:\t%8lu kB\n"
|
|
"VmRSS:\t%8lu kB\n"
|
|
"RssAnon:\t%8lu kB\n"
|
|
"RssFile:\t%8lu kB\n"
|
|
"RssShmem:\t%8lu kB\n"
|
|
"VmData:\t%8lu kB\n"
|
|
"VmStk:\t%8lu kB\n"
|
|
"VmExe:\t%8lu kB\n"
|
|
"VmLib:\t%8lu kB\n"
|
|
"VmPTE:\t%8lu kB\n"
|
|
"VmPMD:\t%8lu kB\n"
|
|
"VmSwap:\t%8lu kB\n",
|
|
hiwater_vm << (PAGE_SHIFT-10),
|
|
total_vm << (PAGE_SHIFT-10),
|
|
mm->locked_vm << (PAGE_SHIFT-10),
|
|
mm->pinned_vm << (PAGE_SHIFT-10),
|
|
hiwater_rss << (PAGE_SHIFT-10),
|
|
total_rss << (PAGE_SHIFT-10),
|
|
anon << (PAGE_SHIFT-10),
|
|
file << (PAGE_SHIFT-10),
|
|
shmem << (PAGE_SHIFT-10),
|
|
mm->data_vm << (PAGE_SHIFT-10),
|
|
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
|
|
ptes >> 10,
|
|
pmds >> 10,
|
|
swap << (PAGE_SHIFT-10));
|
|
hugetlb_report_usage(m, mm);
|
|
}
|
|
|
|
unsigned long task_vsize(struct mm_struct *mm)
|
|
{
|
|
return PAGE_SIZE * mm->total_vm;
|
|
}
|
|
|
|
unsigned long task_statm(struct mm_struct *mm,
|
|
unsigned long *shared, unsigned long *text,
|
|
unsigned long *data, unsigned long *resident)
|
|
{
|
|
*shared = get_mm_counter(mm, MM_FILEPAGES) +
|
|
get_mm_counter(mm, MM_SHMEMPAGES);
|
|
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
|
|
>> PAGE_SHIFT;
|
|
*data = mm->data_vm + mm->stack_vm;
|
|
*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
|
|
return mm->total_vm;
|
|
}
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/*
|
|
* Save get_task_policy() for show_numa_map().
|
|
*/
|
|
static void hold_task_mempolicy(struct proc_maps_private *priv)
|
|
{
|
|
struct task_struct *task = priv->task;
|
|
|
|
task_lock(task);
|
|
priv->task_mempolicy = get_task_policy(task);
|
|
mpol_get(priv->task_mempolicy);
|
|
task_unlock(task);
|
|
}
|
|
static void release_task_mempolicy(struct proc_maps_private *priv)
|
|
{
|
|
mpol_put(priv->task_mempolicy);
|
|
}
|
|
#else
|
|
static void hold_task_mempolicy(struct proc_maps_private *priv)
|
|
{
|
|
}
|
|
static void release_task_mempolicy(struct proc_maps_private *priv)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
|
|
{
|
|
const char __user *name = vma_get_anon_name(vma);
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
|
unsigned long page_start_vaddr;
|
|
unsigned long page_offset;
|
|
unsigned long num_pages;
|
|
unsigned long max_len = NAME_MAX;
|
|
int i;
|
|
|
|
page_start_vaddr = (unsigned long)name & PAGE_MASK;
|
|
page_offset = (unsigned long)name - page_start_vaddr;
|
|
num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
|
|
|
|
seq_puts(m, "[anon:");
|
|
|
|
for (i = 0; i < num_pages; i++) {
|
|
int len;
|
|
int write_len;
|
|
const char *kaddr;
|
|
long pages_pinned;
|
|
struct page *page;
|
|
|
|
pages_pinned = get_user_pages_remote(current, mm,
|
|
page_start_vaddr, 1, 0, &page, NULL, NULL);
|
|
if (pages_pinned < 1) {
|
|
seq_puts(m, "<fault>]");
|
|
return;
|
|
}
|
|
|
|
kaddr = (const char *)kmap(page);
|
|
len = min(max_len, PAGE_SIZE - page_offset);
|
|
write_len = strnlen(kaddr + page_offset, len);
|
|
seq_write(m, kaddr + page_offset, write_len);
|
|
kunmap(page);
|
|
put_page(page);
|
|
|
|
/* if strnlen hit a null terminator then we're done */
|
|
if (write_len != len)
|
|
break;
|
|
|
|
max_len -= len;
|
|
page_offset = 0;
|
|
page_start_vaddr += PAGE_SIZE;
|
|
}
|
|
|
|
seq_putc(m, ']');
|
|
}
|
|
|
|
static void vma_stop(struct proc_maps_private *priv)
|
|
{
|
|
struct mm_struct *mm = priv->mm;
|
|
|
|
release_task_mempolicy(priv);
|
|
up_read(&mm->mmap_sem);
|
|
mmput(mm);
|
|
}
|
|
|
|
static struct vm_area_struct *
|
|
m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma)
|
|
{
|
|
if (vma == priv->tail_vma)
|
|
return NULL;
|
|
return vma->vm_next ?: priv->tail_vma;
|
|
}
|
|
|
|
static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma)
|
|
{
|
|
if (m->count < m->size) /* vma is copied successfully */
|
|
m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL;
|
|
}
|
|
|
|
static void *m_start(struct seq_file *m, loff_t *ppos)
|
|
{
|
|
struct proc_maps_private *priv = m->private;
|
|
unsigned long last_addr = m->version;
|
|
struct mm_struct *mm;
|
|
struct vm_area_struct *vma;
|
|
unsigned int pos = *ppos;
|
|
|
|
/* See m_cache_vma(). Zero at the start or after lseek. */
|
|
if (last_addr == -1UL)
|
|
return NULL;
|
|
|
|
priv->task = get_proc_task(priv->inode);
|
|
if (!priv->task)
|
|
return ERR_PTR(-ESRCH);
|
|
|
|
mm = priv->mm;
|
|
if (!mm || !mmget_not_zero(mm))
|
|
return NULL;
|
|
|
|
down_read(&mm->mmap_sem);
|
|
hold_task_mempolicy(priv);
|
|
priv->tail_vma = get_gate_vma(mm);
|
|
|
|
if (last_addr) {
|
|
vma = find_vma(mm, last_addr - 1);
|
|
if (vma && vma->vm_start <= last_addr)
|
|
vma = m_next_vma(priv, vma);
|
|
if (vma)
|
|
return vma;
|
|
}
|
|
|
|
m->version = 0;
|
|
if (pos < mm->map_count) {
|
|
for (vma = mm->mmap; pos; pos--) {
|
|
m->version = vma->vm_start;
|
|
vma = vma->vm_next;
|
|
}
|
|
return vma;
|
|
}
|
|
|
|
/* we do not bother to update m->version in this case */
|
|
if (pos == mm->map_count && priv->tail_vma)
|
|
return priv->tail_vma;
|
|
|
|
vma_stop(priv);
|
|
return NULL;
|
|
}
|
|
|
|
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
|
|
{
|
|
struct proc_maps_private *priv = m->private;
|
|
struct vm_area_struct *next;
|
|
|
|
(*pos)++;
|
|
next = m_next_vma(priv, v);
|
|
if (!next)
|
|
vma_stop(priv);
|
|
return next;
|
|
}
|
|
|
|
static void m_stop(struct seq_file *m, void *v)
|
|
{
|
|
struct proc_maps_private *priv = m->private;
|
|
|
|
if (!IS_ERR_OR_NULL(v))
|
|
vma_stop(priv);
|
|
if (priv->task) {
|
|
put_task_struct(priv->task);
|
|
priv->task = NULL;
|
|
}
|
|
}
|
|
|
|
static int proc_maps_open(struct inode *inode, struct file *file,
|
|
const struct seq_operations *ops, int psize)
|
|
{
|
|
struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
|
|
|
|
if (!priv)
|
|
return -ENOMEM;
|
|
|
|
priv->inode = inode;
|
|
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
|
|
if (IS_ERR(priv->mm)) {
|
|
int err = PTR_ERR(priv->mm);
|
|
|
|
seq_release_private(inode, file);
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int proc_map_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct seq_file *seq = file->private_data;
|
|
struct proc_maps_private *priv = seq->private;
|
|
|
|
if (priv->mm)
|
|
mmdrop(priv->mm);
|
|
|
|
kfree(priv->rollup);
|
|
return seq_release_private(inode, file);
|
|
}
|
|
|
|
static int do_maps_open(struct inode *inode, struct file *file,
|
|
const struct seq_operations *ops)
|
|
{
|
|
return proc_maps_open(inode, file, ops,
|
|
sizeof(struct proc_maps_private));
|
|
}
|
|
|
|
/*
|
|
* Indicate if the VMA is a stack for the given task; for
|
|
* /proc/PID/maps that is the stack of the main task.
|
|
*/
|
|
static int is_stack(struct vm_area_struct *vma)
|
|
{
|
|
/*
|
|
* We make no effort to guess what a given thread considers to be
|
|
* its "stack". It's not even well-defined for programs written
|
|
* languages like Go.
|
|
*/
|
|
return vma->vm_start <= vma->vm_mm->start_stack &&
|
|
vma->vm_end >= vma->vm_mm->start_stack;
|
|
}
|
|
|
|
static void show_vma_header_prefix(struct seq_file *m,
|
|
unsigned long start, unsigned long end,
|
|
vm_flags_t flags, unsigned long long pgoff,
|
|
dev_t dev, unsigned long ino)
|
|
{
|
|
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
|
|
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
|
|
start,
|
|
end,
|
|
flags & VM_READ ? 'r' : '-',
|
|
flags & VM_WRITE ? 'w' : '-',
|
|
flags & VM_EXEC ? 'x' : '-',
|
|
flags & VM_MAYSHARE ? 's' : 'p',
|
|
pgoff,
|
|
MAJOR(dev), MINOR(dev), ino);
|
|
}
|
|
|
|
static void
|
|
show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct file *file = vma->vm_file;
|
|
vm_flags_t flags = vma->vm_flags;
|
|
unsigned long ino = 0;
|
|
unsigned long long pgoff = 0;
|
|
unsigned long start, end;
|
|
dev_t dev = 0;
|
|
const char *name = NULL;
|
|
|
|
if (file) {
|
|
struct inode *inode = file_inode(vma->vm_file);
|
|
dev = inode->i_sb->s_dev;
|
|
ino = inode->i_ino;
|
|
pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
|
|
}
|
|
|
|
start = vma->vm_start;
|
|
end = vma->vm_end;
|
|
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
|
|
|
|
/*
|
|
* Print the dentry name for named mappings, and a
|
|
* special [heap] marker for the heap:
|
|
*/
|
|
if (file) {
|
|
seq_pad(m, ' ');
|
|
seq_file_path(m, file, "\n");
|
|
goto done;
|
|
}
|
|
|
|
if (vma->vm_ops && vma->vm_ops->name) {
|
|
name = vma->vm_ops->name(vma);
|
|
if (name)
|
|
goto done;
|
|
}
|
|
|
|
name = arch_vma_name(vma);
|
|
if (!name) {
|
|
if (!mm) {
|
|
name = "[vdso]";
|
|
goto done;
|
|
}
|
|
|
|
if (vma->vm_start <= mm->brk &&
|
|
vma->vm_end >= mm->start_brk) {
|
|
name = "[heap]";
|
|
goto done;
|
|
}
|
|
|
|
if (is_stack(vma)) {
|
|
name = "[stack]";
|
|
goto done;
|
|
}
|
|
|
|
if (vma_get_anon_name(vma)) {
|
|
seq_pad(m, ' ');
|
|
seq_print_vma_name(m, vma);
|
|
}
|
|
}
|
|
|
|
done:
|
|
if (name) {
|
|
seq_pad(m, ' ');
|
|
seq_puts(m, name);
|
|
}
|
|
seq_putc(m, '\n');
|
|
}
|
|
|
|
static int show_map(struct seq_file *m, void *v, int is_pid)
|
|
{
|
|
show_map_vma(m, v, is_pid);
|
|
m_cache_vma(m, v);
|
|
return 0;
|
|
}
|
|
|
|
static int show_pid_map(struct seq_file *m, void *v)
|
|
{
|
|
return show_map(m, v, 1);
|
|
}
|
|
|
|
static int show_tid_map(struct seq_file *m, void *v)
|
|
{
|
|
return show_map(m, v, 0);
|
|
}
|
|
|
|
static const struct seq_operations proc_pid_maps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_pid_map
|
|
};
|
|
|
|
static const struct seq_operations proc_tid_maps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_tid_map
|
|
};
|
|
|
|
static int pid_maps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return do_maps_open(inode, file, &proc_pid_maps_op);
|
|
}
|
|
|
|
static int tid_maps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return do_maps_open(inode, file, &proc_tid_maps_op);
|
|
}
|
|
|
|
const struct file_operations proc_pid_maps_operations = {
|
|
.open = pid_maps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
const struct file_operations proc_tid_maps_operations = {
|
|
.open = tid_maps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
/*
|
|
* Proportional Set Size(PSS): my share of RSS.
|
|
*
|
|
* PSS of a process is the count of pages it has in memory, where each
|
|
* page is divided by the number of processes sharing it. So if a
|
|
* process has 1000 pages all to itself, and 1000 shared with one other
|
|
* process, its PSS will be 1500.
|
|
*
|
|
* To keep (accumulated) division errors low, we adopt a 64bit
|
|
* fixed-point pss counter to minimize division errors. So (pss >>
|
|
* PSS_SHIFT) would be the real byte count.
|
|
*
|
|
* A shift of 12 before division means (assuming 4K page size):
|
|
* - 1M 3-user-pages add up to 8KB errors;
|
|
* - supports mapcount up to 2^24, or 16M;
|
|
* - supports PSS up to 2^52 bytes, or 4PB.
|
|
*/
|
|
#define PSS_SHIFT 12
|
|
|
|
#ifdef CONFIG_PROC_PAGE_MONITOR
|
|
struct mem_size_stats {
|
|
bool first;
|
|
unsigned long resident;
|
|
unsigned long shared_clean;
|
|
unsigned long shared_dirty;
|
|
unsigned long private_clean;
|
|
unsigned long private_dirty;
|
|
unsigned long referenced;
|
|
unsigned long anonymous;
|
|
unsigned long lazyfree;
|
|
unsigned long anonymous_thp;
|
|
unsigned long shmem_thp;
|
|
unsigned long swap;
|
|
unsigned long shared_hugetlb;
|
|
unsigned long private_hugetlb;
|
|
unsigned long first_vma_start;
|
|
u64 pss;
|
|
u64 pss_locked;
|
|
u64 swap_pss;
|
|
bool check_shmem_swap;
|
|
};
|
|
|
|
static void smaps_account(struct mem_size_stats *mss, struct page *page,
|
|
bool compound, bool young, bool dirty)
|
|
{
|
|
int i, nr = compound ? 1 << compound_order(page) : 1;
|
|
unsigned long size = nr * PAGE_SIZE;
|
|
|
|
if (PageAnon(page)) {
|
|
mss->anonymous += size;
|
|
if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
|
|
mss->lazyfree += size;
|
|
}
|
|
|
|
mss->resident += size;
|
|
/* Accumulate the size in pages that have been accessed. */
|
|
if (young || page_is_young(page) || PageReferenced(page))
|
|
mss->referenced += size;
|
|
|
|
/*
|
|
* page_count(page) == 1 guarantees the page is mapped exactly once.
|
|
* If any subpage of the compound page mapped with PTE it would elevate
|
|
* page_count().
|
|
*/
|
|
if (page_count(page) == 1) {
|
|
if (dirty || PageDirty(page))
|
|
mss->private_dirty += size;
|
|
else
|
|
mss->private_clean += size;
|
|
mss->pss += (u64)size << PSS_SHIFT;
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < nr; i++, page++) {
|
|
int mapcount = page_mapcount(page);
|
|
|
|
if (mapcount >= 2) {
|
|
if (dirty || PageDirty(page))
|
|
mss->shared_dirty += PAGE_SIZE;
|
|
else
|
|
mss->shared_clean += PAGE_SIZE;
|
|
mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
|
|
} else {
|
|
if (dirty || PageDirty(page))
|
|
mss->private_dirty += PAGE_SIZE;
|
|
else
|
|
mss->private_clean += PAGE_SIZE;
|
|
mss->pss += PAGE_SIZE << PSS_SHIFT;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_SHMEM
|
|
static int smaps_pte_hole(unsigned long addr, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct mem_size_stats *mss = walk->private;
|
|
|
|
mss->swap += shmem_partial_swap_usage(
|
|
walk->vma->vm_file->f_mapping, addr, end);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct mem_size_stats *mss = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
struct page *page = NULL;
|
|
|
|
if (pte_present(*pte)) {
|
|
page = vm_normal_page(vma, addr, *pte);
|
|
} else if (is_swap_pte(*pte)) {
|
|
swp_entry_t swpent = pte_to_swp_entry(*pte);
|
|
|
|
if (!non_swap_entry(swpent)) {
|
|
int mapcount;
|
|
|
|
mss->swap += PAGE_SIZE;
|
|
mapcount = swp_swapcount(swpent);
|
|
if (mapcount >= 2) {
|
|
u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
|
|
|
|
do_div(pss_delta, mapcount);
|
|
mss->swap_pss += pss_delta;
|
|
} else {
|
|
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
|
|
}
|
|
} else if (is_migration_entry(swpent))
|
|
page = migration_entry_to_page(swpent);
|
|
else if (is_device_private_entry(swpent))
|
|
page = device_private_entry_to_page(swpent);
|
|
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
|
|
&& pte_none(*pte))) {
|
|
page = find_get_entry(vma->vm_file->f_mapping,
|
|
linear_page_index(vma, addr));
|
|
if (!page)
|
|
return;
|
|
|
|
if (radix_tree_exceptional_entry(page))
|
|
mss->swap += PAGE_SIZE;
|
|
else
|
|
put_page(page);
|
|
|
|
return;
|
|
}
|
|
|
|
if (!page)
|
|
return;
|
|
|
|
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
|
|
}
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct mem_size_stats *mss = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
struct page *page;
|
|
|
|
/* FOLL_DUMP will return -EFAULT on huge zero page */
|
|
page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
|
|
if (IS_ERR_OR_NULL(page))
|
|
return;
|
|
if (PageAnon(page))
|
|
mss->anonymous_thp += HPAGE_PMD_SIZE;
|
|
else if (PageSwapBacked(page))
|
|
mss->shmem_thp += HPAGE_PMD_SIZE;
|
|
else if (is_zone_device_page(page))
|
|
/* pass */;
|
|
else
|
|
VM_BUG_ON_PAGE(1, page);
|
|
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
|
|
}
|
|
#else
|
|
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
|
|
struct mm_walk *walk)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct vm_area_struct *vma = walk->vma;
|
|
pte_t *pte;
|
|
spinlock_t *ptl;
|
|
|
|
ptl = pmd_trans_huge_lock(pmd, vma);
|
|
if (ptl) {
|
|
if (pmd_present(*pmd))
|
|
smaps_pmd_entry(pmd, addr, walk);
|
|
spin_unlock(ptl);
|
|
goto out;
|
|
}
|
|
|
|
if (pmd_trans_unstable(pmd))
|
|
goto out;
|
|
/*
|
|
* The mmap_sem held all the way back in m_start() is what
|
|
* keeps khugepaged out of here and from collapsing things
|
|
* in here.
|
|
*/
|
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
for (; addr != end; pte++, addr += PAGE_SIZE)
|
|
smaps_pte_entry(pte, addr, walk);
|
|
pte_unmap_unlock(pte - 1, ptl);
|
|
out:
|
|
cond_resched();
|
|
return 0;
|
|
}
|
|
|
|
static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
|
|
{
|
|
/*
|
|
* Don't forget to update Documentation/ on changes.
|
|
*/
|
|
static const char mnemonics[BITS_PER_LONG][2] = {
|
|
/*
|
|
* In case if we meet a flag we don't know about.
|
|
*/
|
|
[0 ... (BITS_PER_LONG-1)] = "??",
|
|
|
|
[ilog2(VM_READ)] = "rd",
|
|
[ilog2(VM_WRITE)] = "wr",
|
|
[ilog2(VM_EXEC)] = "ex",
|
|
[ilog2(VM_SHARED)] = "sh",
|
|
[ilog2(VM_MAYREAD)] = "mr",
|
|
[ilog2(VM_MAYWRITE)] = "mw",
|
|
[ilog2(VM_MAYEXEC)] = "me",
|
|
[ilog2(VM_MAYSHARE)] = "ms",
|
|
[ilog2(VM_GROWSDOWN)] = "gd",
|
|
[ilog2(VM_PFNMAP)] = "pf",
|
|
[ilog2(VM_DENYWRITE)] = "dw",
|
|
#ifdef CONFIG_X86_INTEL_MPX
|
|
[ilog2(VM_MPX)] = "mp",
|
|
#endif
|
|
[ilog2(VM_LOCKED)] = "lo",
|
|
[ilog2(VM_IO)] = "io",
|
|
[ilog2(VM_SEQ_READ)] = "sr",
|
|
[ilog2(VM_RAND_READ)] = "rr",
|
|
[ilog2(VM_DONTCOPY)] = "dc",
|
|
[ilog2(VM_DONTEXPAND)] = "de",
|
|
[ilog2(VM_ACCOUNT)] = "ac",
|
|
[ilog2(VM_NORESERVE)] = "nr",
|
|
[ilog2(VM_HUGETLB)] = "ht",
|
|
[ilog2(VM_ARCH_1)] = "ar",
|
|
[ilog2(VM_WIPEONFORK)] = "wf",
|
|
[ilog2(VM_DONTDUMP)] = "dd",
|
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
|
[ilog2(VM_SOFTDIRTY)] = "sd",
|
|
#endif
|
|
[ilog2(VM_MIXEDMAP)] = "mm",
|
|
[ilog2(VM_HUGEPAGE)] = "hg",
|
|
[ilog2(VM_NOHUGEPAGE)] = "nh",
|
|
[ilog2(VM_MERGEABLE)] = "mg",
|
|
[ilog2(VM_UFFD_MISSING)]= "um",
|
|
[ilog2(VM_UFFD_WP)] = "uw",
|
|
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
|
/* These come out via ProtectionKey: */
|
|
[ilog2(VM_PKEY_BIT0)] = "",
|
|
[ilog2(VM_PKEY_BIT1)] = "",
|
|
[ilog2(VM_PKEY_BIT2)] = "",
|
|
[ilog2(VM_PKEY_BIT3)] = "",
|
|
#endif
|
|
};
|
|
size_t i;
|
|
|
|
seq_puts(m, "VmFlags: ");
|
|
for (i = 0; i < BITS_PER_LONG; i++) {
|
|
if (!mnemonics[i][0])
|
|
continue;
|
|
if (vma->vm_flags & (1UL << i)) {
|
|
seq_printf(m, "%c%c ",
|
|
mnemonics[i][0], mnemonics[i][1]);
|
|
}
|
|
}
|
|
seq_putc(m, '\n');
|
|
}
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
|
|
unsigned long addr, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct mem_size_stats *mss = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
struct page *page = NULL;
|
|
|
|
if (pte_present(*pte)) {
|
|
page = vm_normal_page(vma, addr, *pte);
|
|
} else if (is_swap_pte(*pte)) {
|
|
swp_entry_t swpent = pte_to_swp_entry(*pte);
|
|
|
|
if (is_migration_entry(swpent))
|
|
page = migration_entry_to_page(swpent);
|
|
else if (is_device_private_entry(swpent))
|
|
page = device_private_entry_to_page(swpent);
|
|
}
|
|
if (page) {
|
|
int mapcount = page_mapcount(page);
|
|
|
|
if (mapcount >= 2)
|
|
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
|
|
else
|
|
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
|
|
}
|
|
return 0;
|
|
}
|
|
#endif /* HUGETLB_PAGE */
|
|
|
|
void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
|
|
{
|
|
}
|
|
|
|
static int show_smap(struct seq_file *m, void *v, int is_pid)
|
|
{
|
|
struct proc_maps_private *priv = m->private;
|
|
struct vm_area_struct *vma = v;
|
|
struct mem_size_stats mss_stack;
|
|
struct mem_size_stats *mss;
|
|
struct mm_walk smaps_walk = {
|
|
.pmd_entry = smaps_pte_range,
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
.hugetlb_entry = smaps_hugetlb_range,
|
|
#endif
|
|
.mm = vma->vm_mm,
|
|
};
|
|
int ret = 0;
|
|
bool rollup_mode;
|
|
bool last_vma;
|
|
|
|
if (priv->rollup) {
|
|
rollup_mode = true;
|
|
mss = priv->rollup;
|
|
if (mss->first) {
|
|
mss->first_vma_start = vma->vm_start;
|
|
mss->first = false;
|
|
}
|
|
last_vma = !m_next_vma(priv, vma);
|
|
} else {
|
|
rollup_mode = false;
|
|
memset(&mss_stack, 0, sizeof(mss_stack));
|
|
mss = &mss_stack;
|
|
}
|
|
|
|
smaps_walk.private = mss;
|
|
|
|
#ifdef CONFIG_SHMEM
|
|
if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
|
|
/*
|
|
* For shared or readonly shmem mappings we know that all
|
|
* swapped out pages belong to the shmem object, and we can
|
|
* obtain the swap value much more efficiently. For private
|
|
* writable mappings, we might have COW pages that are
|
|
* not affected by the parent swapped out pages of the shmem
|
|
* object, so we have to distinguish them during the page walk.
|
|
* Unless we know that the shmem object (or the part mapped by
|
|
* our VMA) has no swapped out pages at all.
|
|
*/
|
|
unsigned long shmem_swapped = shmem_swap_usage(vma);
|
|
|
|
if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
|
|
!(vma->vm_flags & VM_WRITE)) {
|
|
mss->swap = shmem_swapped;
|
|
} else {
|
|
mss->check_shmem_swap = true;
|
|
smaps_walk.pte_hole = smaps_pte_hole;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* mmap_sem is held in m_start */
|
|
walk_page_vma(vma, &smaps_walk);
|
|
if (vma->vm_flags & VM_LOCKED)
|
|
mss->pss_locked += mss->pss;
|
|
|
|
if (!rollup_mode) {
|
|
show_map_vma(m, vma, is_pid);
|
|
if (vma_get_anon_name(vma)) {
|
|
seq_puts(m, "Name: ");
|
|
seq_print_vma_name(m, vma);
|
|
seq_putc(m, '\n');
|
|
}
|
|
} else if (last_vma) {
|
|
show_vma_header_prefix(
|
|
m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
|
|
seq_pad(m, ' ');
|
|
seq_puts(m, "[rollup]\n");
|
|
} else {
|
|
ret = SEQ_SKIP;
|
|
}
|
|
|
|
if (vma_get_anon_name(vma)) {
|
|
seq_puts(m, "Name: ");
|
|
seq_print_vma_name(m, vma);
|
|
seq_putc(m, '\n');
|
|
}
|
|
|
|
if (!rollup_mode)
|
|
seq_printf(m,
|
|
"Size: %8lu kB\n"
|
|
"KernelPageSize: %8lu kB\n"
|
|
"MMUPageSize: %8lu kB\n",
|
|
(vma->vm_end - vma->vm_start) >> 10,
|
|
vma_kernel_pagesize(vma) >> 10,
|
|
vma_mmu_pagesize(vma) >> 10);
|
|
|
|
|
|
if (!rollup_mode || last_vma)
|
|
seq_printf(m,
|
|
"Rss: %8lu kB\n"
|
|
"Pss: %8lu kB\n"
|
|
"Shared_Clean: %8lu kB\n"
|
|
"Shared_Dirty: %8lu kB\n"
|
|
"Private_Clean: %8lu kB\n"
|
|
"Private_Dirty: %8lu kB\n"
|
|
"Referenced: %8lu kB\n"
|
|
"Anonymous: %8lu kB\n"
|
|
"LazyFree: %8lu kB\n"
|
|
"AnonHugePages: %8lu kB\n"
|
|
"ShmemPmdMapped: %8lu kB\n"
|
|
"Shared_Hugetlb: %8lu kB\n"
|
|
"Private_Hugetlb: %7lu kB\n"
|
|
"Swap: %8lu kB\n"
|
|
"SwapPss: %8lu kB\n"
|
|
"Locked: %8lu kB\n",
|
|
mss->resident >> 10,
|
|
(unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
|
|
mss->shared_clean >> 10,
|
|
mss->shared_dirty >> 10,
|
|
mss->private_clean >> 10,
|
|
mss->private_dirty >> 10,
|
|
mss->referenced >> 10,
|
|
mss->anonymous >> 10,
|
|
mss->lazyfree >> 10,
|
|
mss->anonymous_thp >> 10,
|
|
mss->shmem_thp >> 10,
|
|
mss->shared_hugetlb >> 10,
|
|
mss->private_hugetlb >> 10,
|
|
mss->swap >> 10,
|
|
(unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
|
|
(unsigned long)(mss->pss_locked >> (10 + PSS_SHIFT)));
|
|
|
|
if (!rollup_mode) {
|
|
arch_show_smap(m, vma);
|
|
show_smap_vma_flags(m, vma);
|
|
}
|
|
m_cache_vma(m, vma);
|
|
return ret;
|
|
}
|
|
|
|
static int show_pid_smap(struct seq_file *m, void *v)
|
|
{
|
|
return show_smap(m, v, 1);
|
|
}
|
|
|
|
static int show_tid_smap(struct seq_file *m, void *v)
|
|
{
|
|
return show_smap(m, v, 0);
|
|
}
|
|
|
|
static const struct seq_operations proc_pid_smaps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_pid_smap
|
|
};
|
|
|
|
static const struct seq_operations proc_tid_smaps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_tid_smap
|
|
};
|
|
|
|
static int pid_smaps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return do_maps_open(inode, file, &proc_pid_smaps_op);
|
|
}
|
|
|
|
static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
|
|
{
|
|
struct seq_file *seq;
|
|
struct proc_maps_private *priv;
|
|
int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
|
|
|
|
if (ret < 0)
|
|
return ret;
|
|
seq = file->private_data;
|
|
priv = seq->private;
|
|
priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
|
|
if (!priv->rollup) {
|
|
proc_map_release(inode, file);
|
|
return -ENOMEM;
|
|
}
|
|
priv->rollup->first = true;
|
|
return 0;
|
|
}
|
|
|
|
static int tid_smaps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return do_maps_open(inode, file, &proc_tid_smaps_op);
|
|
}
|
|
|
|
const struct file_operations proc_pid_smaps_operations = {
|
|
.open = pid_smaps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
const struct file_operations proc_pid_smaps_rollup_operations = {
|
|
.open = pid_smaps_rollup_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
const struct file_operations proc_tid_smaps_operations = {
|
|
.open = tid_smaps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
enum clear_refs_types {
|
|
CLEAR_REFS_ALL = 1,
|
|
CLEAR_REFS_ANON,
|
|
CLEAR_REFS_MAPPED,
|
|
CLEAR_REFS_SOFT_DIRTY,
|
|
CLEAR_REFS_MM_HIWATER_RSS,
|
|
CLEAR_REFS_LAST,
|
|
};
|
|
|
|
struct clear_refs_private {
|
|
enum clear_refs_types type;
|
|
};
|
|
|
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
|
static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *pte)
|
|
{
|
|
/*
|
|
* The soft-dirty tracker uses #PF-s to catch writes
|
|
* to pages, so write-protect the pte as well. See the
|
|
* Documentation/vm/soft-dirty.txt for full description
|
|
* of how soft-dirty works.
|
|
*/
|
|
pte_t ptent = *pte;
|
|
|
|
if (pte_present(ptent)) {
|
|
ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
|
|
ptent = pte_wrprotect(ptent);
|
|
ptent = pte_clear_soft_dirty(ptent);
|
|
ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
|
|
} else if (is_swap_pte(ptent)) {
|
|
ptent = pte_swp_clear_soft_dirty(ptent);
|
|
set_pte_at(vma->vm_mm, addr, pte, ptent);
|
|
}
|
|
}
|
|
#else
|
|
static inline void clear_soft_dirty(struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *pte)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
|
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
|
unsigned long addr, pmd_t *pmdp)
|
|
{
|
|
pmd_t pmd = *pmdp;
|
|
|
|
if (pmd_present(pmd)) {
|
|
/* See comment in change_huge_pmd() */
|
|
pmdp_invalidate(vma, addr, pmdp);
|
|
if (pmd_dirty(*pmdp))
|
|
pmd = pmd_mkdirty(pmd);
|
|
if (pmd_young(*pmdp))
|
|
pmd = pmd_mkyoung(pmd);
|
|
|
|
pmd = pmd_wrprotect(pmd);
|
|
pmd = pmd_clear_soft_dirty(pmd);
|
|
|
|
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
|
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
|
|
pmd = pmd_swp_clear_soft_dirty(pmd);
|
|
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
|
}
|
|
}
|
|
#else
|
|
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
|
|
unsigned long addr, pmd_t *pmdp)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
|
|
unsigned long end, struct mm_walk *walk)
|
|
{
|
|
struct clear_refs_private *cp = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
pte_t *pte, ptent;
|
|
spinlock_t *ptl;
|
|
struct page *page;
|
|
|
|
ptl = pmd_trans_huge_lock(pmd, vma);
|
|
if (ptl) {
|
|
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
|
|
clear_soft_dirty_pmd(vma, addr, pmd);
|
|
goto out;
|
|
}
|
|
|
|
if (!pmd_present(*pmd))
|
|
goto out;
|
|
|
|
page = pmd_page(*pmd);
|
|
|
|
/* Clear accessed and referenced bits. */
|
|
pmdp_test_and_clear_young(vma, addr, pmd);
|
|
test_and_clear_page_young(page);
|
|
ClearPageReferenced(page);
|
|
out:
|
|
spin_unlock(ptl);
|
|
return 0;
|
|
}
|
|
|
|
if (pmd_trans_unstable(pmd))
|
|
return 0;
|
|
|
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
|
ptent = *pte;
|
|
|
|
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
|
|
clear_soft_dirty(vma, addr, pte);
|
|
continue;
|
|
}
|
|
|
|
if (!pte_present(ptent))
|
|
continue;
|
|
|
|
page = vm_normal_page(vma, addr, ptent);
|
|
if (!page)
|
|
continue;
|
|
|
|
/* Clear accessed and referenced bits. */
|
|
ptep_test_and_clear_young(vma, addr, pte);
|
|
test_and_clear_page_young(page);
|
|
ClearPageReferenced(page);
|
|
}
|
|
pte_unmap_unlock(pte - 1, ptl);
|
|
cond_resched();
|
|
return 0;
|
|
}
|
|
|
|
static int clear_refs_test_walk(unsigned long start, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct clear_refs_private *cp = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
|
|
if (vma->vm_flags & VM_PFNMAP)
|
|
return 1;
|
|
|
|
/*
|
|
* Writing 1 to /proc/pid/clear_refs affects all pages.
|
|
* Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
|
|
* Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
|
|
* Writing 4 to /proc/pid/clear_refs affects all pages.
|
|
*/
|
|
if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
|
|
return 1;
|
|
if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
struct task_struct *task;
|
|
char buffer[PROC_NUMBUF];
|
|
struct mm_struct *mm;
|
|
struct vm_area_struct *vma;
|
|
enum clear_refs_types type;
|
|
struct mmu_gather tlb;
|
|
int itype;
|
|
int rv;
|
|
|
|
memset(buffer, 0, sizeof(buffer));
|
|
if (count > sizeof(buffer) - 1)
|
|
count = sizeof(buffer) - 1;
|
|
if (copy_from_user(buffer, buf, count))
|
|
return -EFAULT;
|
|
rv = kstrtoint(strstrip(buffer), 10, &itype);
|
|
if (rv < 0)
|
|
return rv;
|
|
type = (enum clear_refs_types)itype;
|
|
if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
|
|
return -EINVAL;
|
|
|
|
task = get_proc_task(file_inode(file));
|
|
if (!task)
|
|
return -ESRCH;
|
|
mm = get_task_mm(task);
|
|
if (mm) {
|
|
struct clear_refs_private cp = {
|
|
.type = type,
|
|
};
|
|
struct mm_walk clear_refs_walk = {
|
|
.pmd_entry = clear_refs_pte_range,
|
|
.test_walk = clear_refs_test_walk,
|
|
.mm = mm,
|
|
.private = &cp,
|
|
};
|
|
|
|
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
|
|
if (down_write_killable(&mm->mmap_sem)) {
|
|
count = -EINTR;
|
|
goto out_mm;
|
|
}
|
|
|
|
/*
|
|
* Writing 5 to /proc/pid/clear_refs resets the peak
|
|
* resident set size to this mm's current rss value.
|
|
*/
|
|
reset_mm_hiwater_rss(mm);
|
|
up_write(&mm->mmap_sem);
|
|
goto out_mm;
|
|
}
|
|
|
|
down_read(&mm->mmap_sem);
|
|
tlb_gather_mmu(&tlb, mm, 0, -1);
|
|
if (type == CLEAR_REFS_SOFT_DIRTY) {
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
if (!(vma->vm_flags & VM_SOFTDIRTY))
|
|
continue;
|
|
up_read(&mm->mmap_sem);
|
|
if (down_write_killable(&mm->mmap_sem)) {
|
|
count = -EINTR;
|
|
goto out_mm;
|
|
}
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
vm_write_begin(vma);
|
|
WRITE_ONCE(vma->vm_flags,
|
|
vma->vm_flags & ~VM_SOFTDIRTY);
|
|
vma_set_page_prot(vma);
|
|
vm_write_end(vma);
|
|
}
|
|
downgrade_write(&mm->mmap_sem);
|
|
break;
|
|
}
|
|
mmu_notifier_invalidate_range_start(mm, 0, -1);
|
|
}
|
|
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
|
|
if (type == CLEAR_REFS_SOFT_DIRTY)
|
|
mmu_notifier_invalidate_range_end(mm, 0, -1);
|
|
tlb_finish_mmu(&tlb, 0, -1);
|
|
up_read(&mm->mmap_sem);
|
|
out_mm:
|
|
mmput(mm);
|
|
}
|
|
put_task_struct(task);
|
|
|
|
return count;
|
|
}
|
|
|
|
const struct file_operations proc_clear_refs_operations = {
|
|
.write = clear_refs_write,
|
|
.llseek = noop_llseek,
|
|
};
|
|
|
|
typedef struct {
|
|
u64 pme;
|
|
} pagemap_entry_t;
|
|
|
|
struct pagemapread {
|
|
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
|
|
pagemap_entry_t *buffer;
|
|
bool show_pfn;
|
|
};
|
|
|
|
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
|
|
#define PAGEMAP_WALK_MASK (PMD_MASK)
|
|
|
|
#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
|
|
#define PM_PFRAME_BITS 55
|
|
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
|
|
#define PM_SOFT_DIRTY BIT_ULL(55)
|
|
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
|
|
#define PM_FILE BIT_ULL(61)
|
|
#define PM_SWAP BIT_ULL(62)
|
|
#define PM_PRESENT BIT_ULL(63)
|
|
|
|
#define PM_END_OF_BUFFER 1
|
|
|
|
static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
|
|
{
|
|
return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
|
|
}
|
|
|
|
static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
|
|
struct pagemapread *pm)
|
|
{
|
|
pm->buffer[pm->pos++] = *pme;
|
|
if (pm->pos >= pm->len)
|
|
return PM_END_OF_BUFFER;
|
|
return 0;
|
|
}
|
|
|
|
static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct pagemapread *pm = walk->private;
|
|
unsigned long addr = start;
|
|
int err = 0;
|
|
|
|
while (addr < end) {
|
|
struct vm_area_struct *vma = find_vma(walk->mm, addr);
|
|
pagemap_entry_t pme = make_pme(0, 0);
|
|
/* End of address space hole, which we mark as non-present. */
|
|
unsigned long hole_end;
|
|
|
|
if (vma)
|
|
hole_end = min(end, vma->vm_start);
|
|
else
|
|
hole_end = end;
|
|
|
|
for (; addr < hole_end; addr += PAGE_SIZE) {
|
|
err = add_to_pagemap(addr, &pme, pm);
|
|
if (err)
|
|
goto out;
|
|
}
|
|
|
|
if (!vma)
|
|
break;
|
|
|
|
/* Addresses in the VMA. */
|
|
if (vma->vm_flags & VM_SOFTDIRTY)
|
|
pme = make_pme(0, PM_SOFT_DIRTY);
|
|
for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
|
|
err = add_to_pagemap(addr, &pme, pm);
|
|
if (err)
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
|
|
struct vm_area_struct *vma, unsigned long addr, pte_t pte)
|
|
{
|
|
u64 frame = 0, flags = 0;
|
|
struct page *page = NULL;
|
|
|
|
if (pte_present(pte)) {
|
|
if (pm->show_pfn)
|
|
frame = pte_pfn(pte);
|
|
flags |= PM_PRESENT;
|
|
page = _vm_normal_page(vma, addr, pte, true);
|
|
if (pte_soft_dirty(pte))
|
|
flags |= PM_SOFT_DIRTY;
|
|
} else if (is_swap_pte(pte)) {
|
|
swp_entry_t entry;
|
|
if (pte_swp_soft_dirty(pte))
|
|
flags |= PM_SOFT_DIRTY;
|
|
entry = pte_to_swp_entry(pte);
|
|
if (pm->show_pfn)
|
|
frame = swp_type(entry) |
|
|
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
|
|
flags |= PM_SWAP;
|
|
if (is_migration_entry(entry))
|
|
page = migration_entry_to_page(entry);
|
|
|
|
if (is_device_private_entry(entry))
|
|
page = device_private_entry_to_page(entry);
|
|
}
|
|
|
|
if (page && !PageAnon(page))
|
|
flags |= PM_FILE;
|
|
if (page && page_mapcount(page) == 1)
|
|
flags |= PM_MMAP_EXCLUSIVE;
|
|
if (vma->vm_flags & VM_SOFTDIRTY)
|
|
flags |= PM_SOFT_DIRTY;
|
|
|
|
return make_pme(frame, flags);
|
|
}
|
|
|
|
static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct vm_area_struct *vma = walk->vma;
|
|
struct pagemapread *pm = walk->private;
|
|
spinlock_t *ptl;
|
|
pte_t *pte, *orig_pte;
|
|
int err = 0;
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
ptl = pmd_trans_huge_lock(pmdp, vma);
|
|
if (ptl) {
|
|
u64 flags = 0, frame = 0;
|
|
pmd_t pmd = *pmdp;
|
|
struct page *page = NULL;
|
|
|
|
if (vma->vm_flags & VM_SOFTDIRTY)
|
|
flags |= PM_SOFT_DIRTY;
|
|
|
|
if (pmd_present(pmd)) {
|
|
page = pmd_page(pmd);
|
|
|
|
flags |= PM_PRESENT;
|
|
if (pmd_soft_dirty(pmd))
|
|
flags |= PM_SOFT_DIRTY;
|
|
if (pm->show_pfn)
|
|
frame = pmd_pfn(pmd) +
|
|
((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
|
}
|
|
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
|
|
else if (is_swap_pmd(pmd)) {
|
|
swp_entry_t entry = pmd_to_swp_entry(pmd);
|
|
unsigned long offset;
|
|
|
|
if (pm->show_pfn) {
|
|
offset = swp_offset(entry) +
|
|
((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
|
frame = swp_type(entry) |
|
|
(offset << MAX_SWAPFILES_SHIFT);
|
|
}
|
|
flags |= PM_SWAP;
|
|
if (pmd_swp_soft_dirty(pmd))
|
|
flags |= PM_SOFT_DIRTY;
|
|
VM_BUG_ON(!is_pmd_migration_entry(pmd));
|
|
page = migration_entry_to_page(entry);
|
|
}
|
|
#endif
|
|
|
|
if (page && page_mapcount(page) == 1)
|
|
flags |= PM_MMAP_EXCLUSIVE;
|
|
|
|
for (; addr != end; addr += PAGE_SIZE) {
|
|
pagemap_entry_t pme = make_pme(frame, flags);
|
|
|
|
err = add_to_pagemap(addr, &pme, pm);
|
|
if (err)
|
|
break;
|
|
if (pm->show_pfn) {
|
|
if (flags & PM_PRESENT)
|
|
frame++;
|
|
else if (flags & PM_SWAP)
|
|
frame += (1 << MAX_SWAPFILES_SHIFT);
|
|
}
|
|
}
|
|
spin_unlock(ptl);
|
|
return err;
|
|
}
|
|
|
|
if (pmd_trans_unstable(pmdp))
|
|
return 0;
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
/*
|
|
* We can assume that @vma always points to a valid one and @end never
|
|
* goes beyond vma->vm_end.
|
|
*/
|
|
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
|
|
for (; addr < end; pte++, addr += PAGE_SIZE) {
|
|
pagemap_entry_t pme;
|
|
|
|
pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
|
|
err = add_to_pagemap(addr, &pme, pm);
|
|
if (err)
|
|
break;
|
|
}
|
|
pte_unmap_unlock(orig_pte, ptl);
|
|
|
|
cond_resched();
|
|
|
|
return err;
|
|
}
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
/* This function walks within one hugetlb entry in the single call */
|
|
static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
|
|
unsigned long addr, unsigned long end,
|
|
struct mm_walk *walk)
|
|
{
|
|
struct pagemapread *pm = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
u64 flags = 0, frame = 0;
|
|
int err = 0;
|
|
pte_t pte;
|
|
|
|
if (vma->vm_flags & VM_SOFTDIRTY)
|
|
flags |= PM_SOFT_DIRTY;
|
|
|
|
pte = huge_ptep_get(ptep);
|
|
if (pte_present(pte)) {
|
|
struct page *page = pte_page(pte);
|
|
|
|
if (!PageAnon(page))
|
|
flags |= PM_FILE;
|
|
|
|
if (page_mapcount(page) == 1)
|
|
flags |= PM_MMAP_EXCLUSIVE;
|
|
|
|
flags |= PM_PRESENT;
|
|
if (pm->show_pfn)
|
|
frame = pte_pfn(pte) +
|
|
((addr & ~hmask) >> PAGE_SHIFT);
|
|
}
|
|
|
|
for (; addr != end; addr += PAGE_SIZE) {
|
|
pagemap_entry_t pme = make_pme(frame, flags);
|
|
|
|
err = add_to_pagemap(addr, &pme, pm);
|
|
if (err)
|
|
return err;
|
|
if (pm->show_pfn && (flags & PM_PRESENT))
|
|
frame++;
|
|
}
|
|
|
|
cond_resched();
|
|
|
|
return err;
|
|
}
|
|
#endif /* HUGETLB_PAGE */
|
|
|
|
/*
|
|
* /proc/pid/pagemap - an array mapping virtual pages to pfns
|
|
*
|
|
* For each page in the address space, this file contains one 64-bit entry
|
|
* consisting of the following:
|
|
*
|
|
* Bits 0-54 page frame number (PFN) if present
|
|
* Bits 0-4 swap type if swapped
|
|
* Bits 5-54 swap offset if swapped
|
|
* Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
|
|
* Bit 56 page exclusively mapped
|
|
* Bits 57-60 zero
|
|
* Bit 61 page is file-page or shared-anon
|
|
* Bit 62 page swapped
|
|
* Bit 63 page present
|
|
*
|
|
* If the page is not present but in swap, then the PFN contains an
|
|
* encoding of the swap file number and the page's offset into the
|
|
* swap. Unmapped pages return a null PFN. This allows determining
|
|
* precisely which pages are mapped (or in swap) and comparing mapped
|
|
* pages between processes.
|
|
*
|
|
* Efficient users of this interface will use /proc/pid/maps to
|
|
* determine which areas of memory are actually mapped and llseek to
|
|
* skip over unmapped regions.
|
|
*/
|
|
static ssize_t pagemap_read(struct file *file, char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
struct mm_struct *mm = file->private_data;
|
|
struct pagemapread pm;
|
|
struct mm_walk pagemap_walk = {};
|
|
unsigned long src;
|
|
unsigned long svpfn;
|
|
unsigned long start_vaddr;
|
|
unsigned long end_vaddr;
|
|
int ret = 0, copied = 0;
|
|
|
|
if (!mm || !mmget_not_zero(mm))
|
|
goto out;
|
|
|
|
ret = -EINVAL;
|
|
/* file position must be aligned */
|
|
if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
|
|
goto out_mm;
|
|
|
|
ret = 0;
|
|
if (!count)
|
|
goto out_mm;
|
|
|
|
/* do not disclose physical addresses: attack vector */
|
|
pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
|
|
|
|
pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
|
|
pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_KERNEL);
|
|
ret = -ENOMEM;
|
|
if (!pm.buffer)
|
|
goto out_mm;
|
|
|
|
pagemap_walk.pmd_entry = pagemap_pmd_range;
|
|
pagemap_walk.pte_hole = pagemap_pte_hole;
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
|
|
#endif
|
|
pagemap_walk.mm = mm;
|
|
pagemap_walk.private = ±
|
|
|
|
src = *ppos;
|
|
svpfn = src / PM_ENTRY_BYTES;
|
|
start_vaddr = svpfn << PAGE_SHIFT;
|
|
end_vaddr = mm->task_size;
|
|
|
|
/* watch out for wraparound */
|
|
if (svpfn > mm->task_size >> PAGE_SHIFT)
|
|
start_vaddr = end_vaddr;
|
|
|
|
/*
|
|
* The odds are that this will stop walking way
|
|
* before end_vaddr, because the length of the
|
|
* user buffer is tracked in "pm", and the walk
|
|
* will stop when we hit the end of the buffer.
|
|
*/
|
|
ret = 0;
|
|
while (count && (start_vaddr < end_vaddr)) {
|
|
int len;
|
|
unsigned long end;
|
|
|
|
pm.pos = 0;
|
|
end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
|
|
/* overflow ? */
|
|
if (end < start_vaddr || end > end_vaddr)
|
|
end = end_vaddr;
|
|
down_read(&mm->mmap_sem);
|
|
ret = walk_page_range(start_vaddr, end, &pagemap_walk);
|
|
up_read(&mm->mmap_sem);
|
|
start_vaddr = end;
|
|
|
|
len = min(count, PM_ENTRY_BYTES * pm.pos);
|
|
if (copy_to_user(buf, pm.buffer, len)) {
|
|
ret = -EFAULT;
|
|
goto out_free;
|
|
}
|
|
copied += len;
|
|
buf += len;
|
|
count -= len;
|
|
}
|
|
*ppos += copied;
|
|
if (!ret || ret == PM_END_OF_BUFFER)
|
|
ret = copied;
|
|
|
|
out_free:
|
|
kfree(pm.buffer);
|
|
out_mm:
|
|
mmput(mm);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int pagemap_open(struct inode *inode, struct file *file)
|
|
{
|
|
struct mm_struct *mm;
|
|
|
|
mm = proc_mem_open(inode, PTRACE_MODE_READ);
|
|
if (IS_ERR(mm))
|
|
return PTR_ERR(mm);
|
|
file->private_data = mm;
|
|
return 0;
|
|
}
|
|
|
|
static int pagemap_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct mm_struct *mm = file->private_data;
|
|
|
|
if (mm)
|
|
mmdrop(mm);
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations proc_pagemap_operations = {
|
|
.llseek = mem_lseek, /* borrow this */
|
|
.read = pagemap_read,
|
|
.open = pagemap_open,
|
|
.release = pagemap_release,
|
|
};
|
|
#endif /* CONFIG_PROC_PAGE_MONITOR */
|
|
|
|
#ifdef CONFIG_PROCESS_RECLAIM
|
|
static int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
|
|
unsigned long end, struct mm_walk *walk)
|
|
{
|
|
struct reclaim_param *rp = walk->private;
|
|
struct vm_area_struct *vma = rp->vma;
|
|
pte_t *pte, ptent;
|
|
spinlock_t *ptl;
|
|
struct page *page;
|
|
LIST_HEAD(page_list);
|
|
int isolated;
|
|
int reclaimed;
|
|
|
|
split_huge_pmd(vma, addr, pmd);
|
|
if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim)
|
|
return 0;
|
|
cont:
|
|
isolated = 0;
|
|
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
|
ptent = *pte;
|
|
if (!pte_present(ptent))
|
|
continue;
|
|
|
|
page = vm_normal_page(vma, addr, ptent);
|
|
if (!page)
|
|
continue;
|
|
|
|
if (isolate_lru_page(page))
|
|
continue;
|
|
|
|
list_add(&page->lru, &page_list);
|
|
inc_node_page_state(page, NR_ISOLATED_ANON +
|
|
page_is_file_cache(page));
|
|
isolated++;
|
|
rp->nr_scanned++;
|
|
if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim)
|
|
break;
|
|
}
|
|
pte_unmap_unlock(pte - 1, ptl);
|
|
reclaimed = reclaim_pages_from_list(&page_list, vma);
|
|
rp->nr_reclaimed += reclaimed;
|
|
rp->nr_to_reclaim -= reclaimed;
|
|
if (rp->nr_to_reclaim < 0)
|
|
rp->nr_to_reclaim = 0;
|
|
|
|
if (rp->nr_to_reclaim && (addr != end))
|
|
goto cont;
|
|
|
|
cond_resched();
|
|
return 0;
|
|
}
|
|
|
|
enum reclaim_type {
|
|
RECLAIM_FILE,
|
|
RECLAIM_ANON,
|
|
RECLAIM_ALL,
|
|
RECLAIM_RANGE,
|
|
};
|
|
|
|
struct reclaim_param reclaim_task_anon(struct task_struct *task,
|
|
int nr_to_reclaim)
|
|
{
|
|
struct mm_struct *mm;
|
|
struct vm_area_struct *vma;
|
|
struct mm_walk reclaim_walk = {};
|
|
struct reclaim_param rp = {
|
|
.nr_to_reclaim = nr_to_reclaim,
|
|
};
|
|
|
|
get_task_struct(task);
|
|
mm = get_task_mm(task);
|
|
if (!mm)
|
|
goto out;
|
|
|
|
reclaim_walk.mm = mm;
|
|
reclaim_walk.pmd_entry = reclaim_pte_range;
|
|
|
|
reclaim_walk.private = &rp;
|
|
|
|
down_read(&mm->mmap_sem);
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
if (is_vm_hugetlb_page(vma))
|
|
continue;
|
|
|
|
if (vma->vm_file)
|
|
continue;
|
|
|
|
if (!rp.nr_to_reclaim)
|
|
break;
|
|
|
|
rp.vma = vma;
|
|
walk_page_range(vma->vm_start, vma->vm_end,
|
|
&reclaim_walk);
|
|
}
|
|
|
|
flush_tlb_mm(mm);
|
|
up_read(&mm->mmap_sem);
|
|
mmput(mm);
|
|
out:
|
|
put_task_struct(task);
|
|
return rp;
|
|
}
|
|
|
|
static ssize_t reclaim_write(struct file *file, const char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
struct task_struct *task;
|
|
char buffer[200];
|
|
struct mm_struct *mm;
|
|
struct vm_area_struct *vma;
|
|
enum reclaim_type type;
|
|
char *type_buf;
|
|
struct mm_walk reclaim_walk = {};
|
|
unsigned long start = 0;
|
|
unsigned long end = 0;
|
|
struct reclaim_param rp;
|
|
|
|
memset(buffer, 0, sizeof(buffer));
|
|
if (count > sizeof(buffer) - 1)
|
|
count = sizeof(buffer) - 1;
|
|
|
|
if (copy_from_user(buffer, buf, count))
|
|
return -EFAULT;
|
|
|
|
type_buf = strstrip(buffer);
|
|
if (!strcmp(type_buf, "file"))
|
|
type = RECLAIM_FILE;
|
|
else if (!strcmp(type_buf, "anon"))
|
|
type = RECLAIM_ANON;
|
|
else if (!strcmp(type_buf, "all"))
|
|
type = RECLAIM_ALL;
|
|
else if (isdigit(*type_buf))
|
|
type = RECLAIM_RANGE;
|
|
else
|
|
goto out_err;
|
|
|
|
if (type == RECLAIM_RANGE) {
|
|
char *token;
|
|
unsigned long long len, len_in, tmp;
|
|
|
|
token = strsep(&type_buf, " ");
|
|
if (!token)
|
|
goto out_err;
|
|
tmp = memparse(token, &token);
|
|
if (tmp & ~PAGE_MASK || tmp > ULONG_MAX)
|
|
goto out_err;
|
|
start = tmp;
|
|
|
|
token = strsep(&type_buf, " ");
|
|
if (!token)
|
|
goto out_err;
|
|
len_in = memparse(token, &token);
|
|
len = (len_in + ~PAGE_MASK) & PAGE_MASK;
|
|
if (len > ULONG_MAX)
|
|
goto out_err;
|
|
/*
|
|
* Check to see whether len was rounded up from small -ve
|
|
* to zero.
|
|
*/
|
|
if (len_in && !len)
|
|
goto out_err;
|
|
|
|
end = start + len;
|
|
if (end < start)
|
|
goto out_err;
|
|
}
|
|
|
|
task = get_proc_task(file->f_path.dentry->d_inode);
|
|
if (!task)
|
|
return -ESRCH;
|
|
|
|
mm = get_task_mm(task);
|
|
if (!mm)
|
|
goto out;
|
|
|
|
reclaim_walk.mm = mm;
|
|
reclaim_walk.pmd_entry = reclaim_pte_range;
|
|
|
|
rp.nr_to_reclaim = INT_MAX;
|
|
rp.nr_reclaimed = 0;
|
|
reclaim_walk.private = &rp;
|
|
|
|
down_read(&mm->mmap_sem);
|
|
if (type == RECLAIM_RANGE) {
|
|
vma = find_vma(mm, start);
|
|
while (vma) {
|
|
if (vma->vm_start > end)
|
|
break;
|
|
if (is_vm_hugetlb_page(vma))
|
|
continue;
|
|
|
|
rp.vma = vma;
|
|
walk_page_range(max(vma->vm_start, start),
|
|
min(vma->vm_end, end),
|
|
&reclaim_walk);
|
|
vma = vma->vm_next;
|
|
}
|
|
} else {
|
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
if (is_vm_hugetlb_page(vma))
|
|
continue;
|
|
|
|
if (type == RECLAIM_ANON && vma->vm_file)
|
|
continue;
|
|
|
|
if (type == RECLAIM_FILE && !vma->vm_file)
|
|
continue;
|
|
|
|
rp.vma = vma;
|
|
walk_page_range(vma->vm_start, vma->vm_end,
|
|
&reclaim_walk);
|
|
}
|
|
}
|
|
|
|
flush_tlb_mm(mm);
|
|
up_read(&mm->mmap_sem);
|
|
mmput(mm);
|
|
out:
|
|
put_task_struct(task);
|
|
return count;
|
|
|
|
out_err:
|
|
return -EINVAL;
|
|
}
|
|
|
|
const struct file_operations proc_reclaim_operations = {
|
|
.write = reclaim_write,
|
|
.llseek = noop_llseek,
|
|
};
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
struct numa_maps {
|
|
unsigned long pages;
|
|
unsigned long anon;
|
|
unsigned long active;
|
|
unsigned long writeback;
|
|
unsigned long mapcount_max;
|
|
unsigned long dirty;
|
|
unsigned long swapcache;
|
|
unsigned long node[MAX_NUMNODES];
|
|
};
|
|
|
|
struct numa_maps_private {
|
|
struct proc_maps_private proc_maps;
|
|
struct numa_maps md;
|
|
};
|
|
|
|
static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
|
|
unsigned long nr_pages)
|
|
{
|
|
int count = page_mapcount(page);
|
|
|
|
md->pages += nr_pages;
|
|
if (pte_dirty || PageDirty(page))
|
|
md->dirty += nr_pages;
|
|
|
|
if (PageSwapCache(page))
|
|
md->swapcache += nr_pages;
|
|
|
|
if (PageActive(page) || PageUnevictable(page))
|
|
md->active += nr_pages;
|
|
|
|
if (PageWriteback(page))
|
|
md->writeback += nr_pages;
|
|
|
|
if (PageAnon(page))
|
|
md->anon += nr_pages;
|
|
|
|
if (count > md->mapcount_max)
|
|
md->mapcount_max = count;
|
|
|
|
md->node[page_to_nid(page)] += nr_pages;
|
|
}
|
|
|
|
static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
|
|
unsigned long addr)
|
|
{
|
|
struct page *page;
|
|
int nid;
|
|
|
|
if (!pte_present(pte))
|
|
return NULL;
|
|
|
|
page = vm_normal_page(vma, addr, pte);
|
|
if (!page)
|
|
return NULL;
|
|
|
|
if (PageReserved(page))
|
|
return NULL;
|
|
|
|
nid = page_to_nid(page);
|
|
if (!node_isset(nid, node_states[N_MEMORY]))
|
|
return NULL;
|
|
|
|
return page;
|
|
}
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
|
|
struct vm_area_struct *vma,
|
|
unsigned long addr)
|
|
{
|
|
struct page *page;
|
|
int nid;
|
|
|
|
if (!pmd_present(pmd))
|
|
return NULL;
|
|
|
|
page = vm_normal_page_pmd(vma, addr, pmd);
|
|
if (!page)
|
|
return NULL;
|
|
|
|
if (PageReserved(page))
|
|
return NULL;
|
|
|
|
nid = page_to_nid(page);
|
|
if (!node_isset(nid, node_states[N_MEMORY]))
|
|
return NULL;
|
|
|
|
return page;
|
|
}
|
|
#endif
|
|
|
|
static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
|
unsigned long end, struct mm_walk *walk)
|
|
{
|
|
struct numa_maps *md = walk->private;
|
|
struct vm_area_struct *vma = walk->vma;
|
|
spinlock_t *ptl;
|
|
pte_t *orig_pte;
|
|
pte_t *pte;
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
ptl = pmd_trans_huge_lock(pmd, vma);
|
|
if (ptl) {
|
|
struct page *page;
|
|
|
|
page = can_gather_numa_stats_pmd(*pmd, vma, addr);
|
|
if (page)
|
|
gather_stats(page, md, pmd_dirty(*pmd),
|
|
HPAGE_PMD_SIZE/PAGE_SIZE);
|
|
spin_unlock(ptl);
|
|
return 0;
|
|
}
|
|
|
|
if (pmd_trans_unstable(pmd))
|
|
return 0;
|
|
#endif
|
|
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
|
|
do {
|
|
struct page *page = can_gather_numa_stats(*pte, vma, addr);
|
|
if (!page)
|
|
continue;
|
|
gather_stats(page, md, pte_dirty(*pte), 1);
|
|
|
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
|
pte_unmap_unlock(orig_pte, ptl);
|
|
cond_resched();
|
|
return 0;
|
|
}
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
|
|
unsigned long addr, unsigned long end, struct mm_walk *walk)
|
|
{
|
|
pte_t huge_pte = huge_ptep_get(pte);
|
|
struct numa_maps *md;
|
|
struct page *page;
|
|
|
|
if (!pte_present(huge_pte))
|
|
return 0;
|
|
|
|
page = pte_page(huge_pte);
|
|
if (!page)
|
|
return 0;
|
|
|
|
md = walk->private;
|
|
gather_stats(page, md, pte_dirty(huge_pte), 1);
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
|
|
unsigned long addr, unsigned long end, struct mm_walk *walk)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Display pages allocated per node and memory policy via /proc.
|
|
*/
|
|
static int show_numa_map(struct seq_file *m, void *v, int is_pid)
|
|
{
|
|
struct numa_maps_private *numa_priv = m->private;
|
|
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
|
|
struct vm_area_struct *vma = v;
|
|
struct numa_maps *md = &numa_priv->md;
|
|
struct file *file = vma->vm_file;
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
struct mm_walk walk = {
|
|
.hugetlb_entry = gather_hugetlb_stats,
|
|
.pmd_entry = gather_pte_stats,
|
|
.private = md,
|
|
.mm = mm,
|
|
};
|
|
struct mempolicy *pol;
|
|
char buffer[64];
|
|
int nid;
|
|
|
|
if (!mm)
|
|
return 0;
|
|
|
|
/* Ensure we start with an empty set of numa_maps statistics. */
|
|
memset(md, 0, sizeof(*md));
|
|
|
|
pol = __get_vma_policy(vma, vma->vm_start);
|
|
if (pol) {
|
|
mpol_to_str(buffer, sizeof(buffer), pol);
|
|
mpol_cond_put(pol);
|
|
} else {
|
|
mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
|
|
}
|
|
|
|
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
|
|
|
|
if (file) {
|
|
seq_puts(m, " file=");
|
|
seq_file_path(m, file, "\n\t= ");
|
|
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
|
|
seq_puts(m, " heap");
|
|
} else if (is_stack(vma)) {
|
|
seq_puts(m, " stack");
|
|
}
|
|
|
|
if (is_vm_hugetlb_page(vma))
|
|
seq_puts(m, " huge");
|
|
|
|
/* mmap_sem is held by m_start */
|
|
walk_page_vma(vma, &walk);
|
|
|
|
if (!md->pages)
|
|
goto out;
|
|
|
|
if (md->anon)
|
|
seq_printf(m, " anon=%lu", md->anon);
|
|
|
|
if (md->dirty)
|
|
seq_printf(m, " dirty=%lu", md->dirty);
|
|
|
|
if (md->pages != md->anon && md->pages != md->dirty)
|
|
seq_printf(m, " mapped=%lu", md->pages);
|
|
|
|
if (md->mapcount_max > 1)
|
|
seq_printf(m, " mapmax=%lu", md->mapcount_max);
|
|
|
|
if (md->swapcache)
|
|
seq_printf(m, " swapcache=%lu", md->swapcache);
|
|
|
|
if (md->active < md->pages && !is_vm_hugetlb_page(vma))
|
|
seq_printf(m, " active=%lu", md->active);
|
|
|
|
if (md->writeback)
|
|
seq_printf(m, " writeback=%lu", md->writeback);
|
|
|
|
for_each_node_state(nid, N_MEMORY)
|
|
if (md->node[nid])
|
|
seq_printf(m, " N%d=%lu", nid, md->node[nid]);
|
|
|
|
seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
|
|
out:
|
|
seq_putc(m, '\n');
|
|
m_cache_vma(m, vma);
|
|
return 0;
|
|
}
|
|
|
|
static int show_pid_numa_map(struct seq_file *m, void *v)
|
|
{
|
|
return show_numa_map(m, v, 1);
|
|
}
|
|
|
|
static int show_tid_numa_map(struct seq_file *m, void *v)
|
|
{
|
|
return show_numa_map(m, v, 0);
|
|
}
|
|
|
|
static const struct seq_operations proc_pid_numa_maps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_pid_numa_map,
|
|
};
|
|
|
|
static const struct seq_operations proc_tid_numa_maps_op = {
|
|
.start = m_start,
|
|
.next = m_next,
|
|
.stop = m_stop,
|
|
.show = show_tid_numa_map,
|
|
};
|
|
|
|
static int numa_maps_open(struct inode *inode, struct file *file,
|
|
const struct seq_operations *ops)
|
|
{
|
|
return proc_maps_open(inode, file, ops,
|
|
sizeof(struct numa_maps_private));
|
|
}
|
|
|
|
static int pid_numa_maps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
|
|
}
|
|
|
|
static int tid_numa_maps_open(struct inode *inode, struct file *file)
|
|
{
|
|
return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
|
|
}
|
|
|
|
const struct file_operations proc_pid_numa_maps_operations = {
|
|
.open = pid_numa_maps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
|
|
const struct file_operations proc_tid_numa_maps_operations = {
|
|
.open = tid_numa_maps_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = proc_map_release,
|
|
};
|
|
#endif /* CONFIG_NUMA */
|