msm-4.14/kernel/hung_task.c
Blagovest Kolenichev dc1d03db8d Merge android-4.14.114 (c680586) into msm-4.14
* refs/heads/tmp-c680586:
  dm: Restore reverted changes
  Linux 4.14.114
  kernel/sysctl.c: fix out-of-bounds access when setting file-max
  Revert "locking/lockdep: Add debug_locks check in __lock_downgrade()"
  i2c-hid: properly terminate i2c_hid_dmi_desc_override_table[] array
  xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute
  xfs: add the ability to join a held buffer to a defer_ops
  iomap: report collisions between directio and buffered writes to userspace
  tools include: Adopt linux/bits.h
  percpu: stop printing kernel addresses
  ALSA: info: Fix racy addition/deletion of nodes
  mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n
  device_cgroup: fix RCU imbalance in error case
  sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup
  Revert "kbuild: use -Oz instead of -Os when using clang"
  net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c
  net: IP6 defrag: use rbtrees for IPv6 defrag
  ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module
  net: IP defrag: encapsulate rbtree defrag code into callable functions
  ipv6: frags: fix a lockdep false positive
  tpm/tpm_i2c_atmel: Return -E2BIG when the transfer is incomplete
  modpost: file2alias: check prototype of handler
  modpost: file2alias: go back to simple devtable lookup
  mmc: sdhci: Handle auto-command errors
  mmc: sdhci: Rename SDHCI_ACMD12_ERR and SDHCI_INT_ACMD12ERR
  mmc: sdhci: Fix data command CRC error handling
  crypto: crypto4xx - properly set IV after de- and encrypt
  x86/speculation: Prevent deadlock on ssb_state::lock
  perf/x86: Fix incorrect PEBS_REGS
  x86/cpu/bugs: Use __initconst for 'const' init data
  perf/x86/amd: Add event map for AMD Family 17h
  mac80211: do not call driver wake_tx_queue op during reconfig
  rt2x00: do not increment sequence number while re-transmitting
  kprobes: Fix error check when reusing optimized probes
  kprobes: Mark ftrace mcount handler functions nokprobe
  x86/kprobes: Verify stack frame on kretprobe
  arm64: futex: Restore oldval initialization to work around buggy compilers
  crypto: x86/poly1305 - fix overflow during partial reduction
  coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping
  Revert "svm: Fix AVIC incomplete IPI emulation"
  Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO"
  scsi: core: set result when the command cannot be dispatched
  ALSA: core: Fix card races between register and disconnect
  ALSA: hda/realtek - add two more pin configuration sets to quirk table
  staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf
  staging: comedi: ni_usb6501: Fix use of uninitialized mutex
  staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf
  staging: comedi: vmk80xx: Fix use of uninitialized semaphore
  io: accel: kxcjk1013: restore the range after resume.
  iio: core: fix a possible circular locking dependency
  iio: adc: at91: disable adc channel interrupt in timeout case
  iio: Fix scan mask selection
  iio: dac: mcp4725: add missing powerdown bits in store eeprom
  iio: ad_sigma_delta: select channel when reading register
  iio: cros_ec: Fix the maths for gyro scale calculation
  iio/gyro/bmg160: Use millidegrees for temperature scale
  iio: gyro: mpu3050: fix chip ID reading
  staging: iio: ad7192: Fix ad7193 channel address
  Staging: iio: meter: fixed typo
  KVM: x86: svm: make sure NMI is injected after nmi_singlestep
  KVM: x86: Don't clear EFER during SMM transitions for 32-bit vCPU
  CIFS: keep FileInfo handle live during oplock break
  net: thunderx: don't allow jumbo frames with XDP
  net: thunderx: raise XDP MTU to 1508
  ipv4: ensure rcu_read_lock() in ipv4_link_failure()
  ipv4: recompile ip options in ipv4_link_failure
  vhost: reject zero size iova range
  team: set slave to promisc if team is already in promisc mode
  tcp: tcp_grow_window() needs to respect tcp_space()
  net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv
  net: bridge: multicast: use rcu to access port list from br_multicast_start_querier
  net: bridge: fix per-port af_packet sockets
  net: atm: Fix potential Spectre v1 vulnerabilities
  bonding: fix event handling for stacked bonds
  ANDROID: cuttlefish_defconfig: Enable CONFIG_XFRM_STATISTICS
  Linux 4.14.113
  appletalk: Fix compile regression
  mm: hide incomplete nr_indirectly_reclaimable in sysfs
  net: stmmac: Set dma ring length before enabling the DMA
  bpf: Fix selftests are changes for CVE 2019-7308
  bpf: fix sanitation rewrite in case of non-pointers
  bpf: do not restore dst_reg when cur_state is freed
  bpf: fix inner map masking to prevent oob under speculation
  bpf: fix sanitation of alu op with pointer / scalar type from different paths
  bpf: prevent out of bounds speculation on pointer arithmetic
  bpf: fix check_map_access smin_value test when pointer contains offset
  bpf: restrict unknown scalars of mixed signed bounds for unprivileged
  bpf: restrict stack pointer arithmetic for unprivileged
  bpf: restrict map value pointer arithmetic for unprivileged
  bpf: enable access to ax register also from verifier rewrite
  bpf: move tmp variable into ax register in interpreter
  bpf: move {prev_,}insn_idx into verifier env
  bpf: fix stack state printing in verifier log
  bpf: fix verifier NULL pointer dereference
  bpf: fix verifier memory leaks
  bpf: reduce verifier memory consumption
  dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock
  bpf: fix use after free in bpf_evict_inode
  include/linux/swap.h: use offsetof() instead of custom __swapoffset macro
  lib/div64.c: off by one in shift
  appletalk: Fix use-after-free in atalk_proc_exit
  drm/amdkfd: use init_mqd function to allocate object for hid_mqd (CI)
  ARM: 8839/1: kprobe: make patch_lock a raw_spinlock_t
  drm/nouveau/volt/gf117: fix speedo readout register
  coresight: cpu-debug: Support for CA73 CPUs
  Revert "ACPI / EC: Remove old CLEAR_ON_RESUME quirk"
  crypto: axis - fix for recursive locking from bottom half
  drm/panel: panel-innolux: set display off in innolux_panel_unprepare
  lkdtm: Add tests for NULL pointer dereference
  lkdtm: Print real addresses
  soc/tegra: pmc: Drop locking from tegra_powergate_is_powered()
  iommu/dmar: Fix buffer overflow during PCI bus notification
  crypto: sha512/arm - fix crash bug in Thumb2 build
  crypto: sha256/arm - fix crash bug in Thumb2 build
  kernel: hung_task.c: disable on suspend
  cifs: fallback to older infolevels on findfirst queryinfo retry
  compiler.h: update definition of unreachable()
  KVM: nVMX: restore host state in nested_vmx_vmexit for VMFail
  ACPI / SBS: Fix GPE storm on recent MacBookPro's
  usbip: fix vhci_hcd controller counting
  ARM: samsung: Limit SAMSUNG_PM_CHECK config option to non-Exynos platforms
  HID: i2c-hid: override HID descriptors for certain devices
  media: au0828: cannot kfree dev before usb disconnect
  powerpc/pseries: Remove prrn_work workqueue
  serial: uartps: console_setup() can't be placed to init section
  netfilter: xt_cgroup: shrink size of v2 path
  f2fs: fix to do sanity check with current segment number
  9p locks: add mount option for lock retry interval
  9p: do not trust pdu content for stat item size
  rsi: improve kernel thread handling to fix kernel panic
  gpio: pxa: handle corner case of unprobed device
  ext4: prohibit fstrim in norecovery mode
  fix incorrect error code mapping for OBJECTID_NOT_FOUND
  x86/hw_breakpoints: Make default case in hw_breakpoint_arch_parse() return an error
  iommu/vt-d: Check capability before disabling protected memory
  drm/nouveau/debugfs: Fix check of pm_runtime_get_sync failure
  x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors
  x86/hpet: Prevent potential NULL pointer dereference
  irqchip/mbigen: Don't clear eventid when freeing an MSI
  perf tests: Fix a memory leak in test__perf_evsel__tp_sched_test()
  perf tests: Fix memory leak by expr__find_other() in test__expr()
  perf tests: Fix a memory leak of cpu_map object in the openat_syscall_event_on_all_cpus test
  perf evsel: Free evsel->counts in perf_evsel__exit()
  perf hist: Add missing map__put() in error case
  perf top: Fix error handling in cmd_top()
  perf build-id: Fix memory leak in print_sdt_events()
  perf config: Fix a memory leak in collect_config()
  perf config: Fix an error in the config template documentation
  perf list: Don't forget to drop the reference to the allocated thread_map
  tools/power turbostat: return the exit status of a command
  x86/mm: Don't leak kernel addresses
  scsi: iscsi: flush running unbind operations when removing a session
  thermal/intel_powerclamp: fix truncated kthread name
  thermal/int340x_thermal: fix mode setting
  thermal/int340x_thermal: Add additional UUIDs
  thermal: bcm2835: Fix crash in bcm2835_thermal_debugfs
  thermal/intel_powerclamp: fix __percpu declaration of worker_data
  ALSA: opl3: fix mismatch between snd_opl3_drum_switch definition and declaration
  mmc: davinci: remove extraneous __init annotation
  IB/mlx4: Fix race condition between catas error reset and aliasguid flows
  auxdisplay: hd44780: Fix memory leak on ->remove()
  ALSA: sb8: add a check for request_region
  ALSA: echoaudio: add a check for ioremap_nocache
  ext4: report real fs size after failed resize
  ext4: add missing brelse() in add_new_gdb_meta_bg()
  perf/core: Restore mmap record type correctly
  arc: hsdk_defconfig: Enable CONFIG_BLK_DEV_RAM
  ARC: u-boot args: check that magic number is correct
  ANDROID: cuttlefish_defconfig: Enable L2TP/PPTP
  ANDROID: Makefile: Properly resolve 4.14.112 merge
  Make arm64 serial port config compatible with crosvm
  Linux 4.14.112
  arm64: dts: rockchip: Fix vcc_host1_5v GPIO polarity on rk3328-rock64
  arm64: dts: rockchip: fix vcc_host1_5v pin assign on rk3328-rock64
  dm table: propagate BDI_CAP_STABLE_WRITES to fix sporadic checksum errors
  PCI: Add function 1 DMA alias quirk for Marvell 9170 SATA controller
  x86/perf/amd: Remove need to check "running" bit in NMI handler
  x86/perf/amd: Resolve NMI latency issues for active PMCs
  x86/perf/amd: Resolve race condition when disabling PMC
  xtensa: fix return_address
  sched/fair: Do not re-read ->h_load_next during hierarchical load calculation
  xen: Prevent buffer overflow in privcmd ioctl
  arm64: backtrace: Don't bother trying to unwind the userspace stack
  arm64: dts: rockchip: fix rk3328 rgmii high tx error rate
  arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value
  ARM: dts: at91: Fix typo in ISC_D0 on PC9
  ARM: dts: am335x-evm: Correct the regulators for the audio codec
  ARM: dts: am335x-evmsk: Correct the regulators for the audio codec
  virtio: Honour 'may_reduce_num' in vring_create_virtqueue
  genirq: Initialize request_mutex if CONFIG_SPARSE_IRQ=n
  genirq: Respect IRQCHIP_SKIP_SET_WAKE in irq_chip_set_wake_parent()
  block: fix the return errno for direct IO
  block: do not leak memory in bio_copy_user_iov()
  btrfs: prop: fix vanished compression property after failed set
  btrfs: prop: fix zstd compression parameter validation
  Btrfs: do not allow trimming when a fs is mounted with the nologreplay option
  ASoC: fsl_esai: fix channel swap issue when stream starts
  include/linux/bitrev.h: fix constant bitrev
  drm/udl: add a release method and delay modeset teardown
  alarmtimer: Return correct remaining time
  parisc: regs_return_value() should return gpr28
  parisc: Detect QEMU earlier in boot process
  arm64: dts: rockchip: fix rk3328 sdmmc0 write errors
  hv_netvsc: Fix unwanted wakeup after tx_disable
  ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type
  ALSA: seq: Fix OOB-reads from strlcpy
  net: ethtool: not call vzalloc for zero sized memory request
  netns: provide pure entropy for net_hash_mix()
  net/sched: act_sample: fix divide by zero in the traffic path
  bnxt_en: Reset device on RX buffer errors.
  bnxt_en: Improve RX consumer index validity check.
  nfp: validate the return code from dev_queue_xmit()
  net/mlx5e: Add a lock on tir list
  net/mlx5e: Fix error handling when refreshing TIRs
  vrf: check accept_source_route on the original netdevice
  tcp: Ensure DCTCP reacts to losses
  sctp: initialize _pad of sockaddr_in before copying to user memory
  qmi_wwan: add Olicard 600
  openvswitch: fix flow actions reallocation
  net/sched: fix ->get helper of the matchall cls
  net: rds: force to destroy connection if t_sock is NULL in rds_tcp_kill_sock().
  net/mlx5: Decrease default mr cache size
  net-gro: Fix GRO flush when receiving a GSO packet.
  kcm: switch order of device registration to fix a crash
  ipv6: sit: reset ip header pointer in ipip6_rcv
  ipv6: Fix dangling pointer when ipv6 fragment
  tty: ldisc: add sysctl to prevent autoloading of ldiscs
  tty: mark Siemens R3964 line discipline as BROKEN
  arm64: kaslr: Reserve size of ARM64_MEMSTART_ALIGN in linear region
  stating: ccree: revert "staging: ccree: fix leak of import() after init()"
  lib/string.c: implement a basic bcmp
  x86/vdso: Drop implicit common-page-size linker flag
  x86: vdso: Use $LD instead of $CC to link
  kbuild: clang: choose GCC_TOOLCHAIN_DIR not on LD
  powerpc/tm: Limit TM code inside PPC_TRANSACTIONAL_MEM
  drm/i915/gvt: do not let pin count of shadow mm go negative
  x86/power: Make restore_processor_context() sane
  x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
  x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
  x86/power: Fix some ordering bugs in __restore_processor_context()
  net: sfp: move sfp_register_socket call from sfp_remove to sfp_probe
  Revert "CHROMIUM: dm: boot time specification of dm="
  Revert "ANDROID: dm: do_mounts_dm: Rebase on top of 4.9"
  Revert "ANDROID: dm: do_mounts_dm: fix dm_substitute_devices()"
  Revert "ANDROID: dm: do_mounts_dm: Update init/do_mounts_dm.c to the latest ChromiumOS version."
  sched/fair: remove printk while schedule is in progress
  ANDROID: Makefile: Add '-fsplit-lto-unit' to cfi-clang-flags
  ANDROID: cfi: Remove unused variable in ptr_to_check_fn
  ANDROID: cuttlefish_defconfig: Enable CONFIG_FUSE_FS

Conflicts:
	arch/arm64/kernel/traps.c
	drivers/mmc/host/sdhci.c
	drivers/mmc/host/sdhci.h
	drivers/tty/Kconfig
	kernel/sched/fair.c

Change-Id: Ic4c01204f58cdb536e2cab04e4f1a2451977f6a3
Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
2019-06-25 03:05:18 -07:00

308 lines
7.2 KiB
C

/*
* Detect Hung Task
*
* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
*
*/
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/export.h>
#include <linux/sysctl.h>
#include <linux/suspend.h>
#include <linux/utsname.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <trace/events/sched.h>
#include <linux/sched/sysctl.h>
/*
* The number of tasks checked:
*/
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
/*
* Selective monitoring of hung tasks.
*
* if set to 1, khungtaskd skips monitoring tasks, which has
* task_struct->hang_detection_enabled value not set, else monitors all tasks.
*/
int sysctl_hung_task_selective_monitoring = 1;
/*
* Limit number of tasks checked in a batch.
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section. It also controls the size of
* the RCU grace period. So it needs to be upper-bound.
*/
#define HUNG_TASK_LOCK_BREAK (HZ / 10)
/*
* Zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
static bool hung_task_show_lock;
static bool hung_task_call_panic;
static struct task_struct *watchdog_task;
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* hung task is detected:
*/
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
static int __init hung_task_panic_setup(char *str)
{
int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
if (rc)
return rc;
return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
return NOTIFY_DONE;
}
static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
/*
* Ensure the task is not frozen.
* Also, skip vfork and any other user process that freezer should skip.
*/
if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
return;
/*
* When a freshly created task is scheduled once, changes its state to
* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
* musn't be checked.
*/
if (unlikely(!switch_count))
return;
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
return;
}
trace_sched_process_hang(t);
if (sysctl_hung_task_panic) {
console_verbose();
hung_task_show_lock = true;
hung_task_call_panic = true;
}
/*
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
if (sysctl_hung_task_warnings) {
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
t->comm, t->pid, timeout);
pr_err(" %s %s %.*s\n",
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
hung_task_show_lock = true;
}
touch_nmi_watchdog();
}
/*
* To avoid extending the RCU grace period for an unbounded amount of time,
* periodically exit the critical section and enter a new one.
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
* to exit the grace period. For classic RCU, a reschedule is required.
*/
static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
{
bool can_cont;
get_task_struct(g);
get_task_struct(t);
rcu_read_unlock();
cond_resched();
rcu_read_lock();
can_cont = pid_alive(g) && pid_alive(t);
put_task_struct(t);
put_task_struct(g);
return can_cont;
}
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time (120 seconds). If that happens, print out
* a warning.
*/
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
unsigned long last_break = jiffies;
struct task_struct *g, *t;
/*
* If the system crashed already then all bets are off,
* do not report extra hung tasks:
*/
if (test_taint(TAINT_DIE) || did_panic)
return;
hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
if (!max_count--)
goto unlock;
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
if (!rcu_lock_break(g, t))
goto unlock;
last_break = jiffies;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
/* Check for selective monitoring */
if (!sysctl_hung_task_selective_monitoring ||
t->hang_detection_enabled)
check_hung_task(t, timeout);
}
unlock:
rcu_read_unlock();
if (hung_task_show_lock)
debug_show_all_locks();
if (hung_task_call_panic) {
trigger_all_cpu_backtrace();
panic("hung_task: blocked tasks");
}
}
static long hung_timeout_jiffies(unsigned long last_checked,
unsigned long timeout)
{
/* timeout of 0 will disable the watchdog */
return timeout ? last_checked - jiffies + timeout * HZ :
MAX_SCHEDULE_TIMEOUT;
}
/*
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
goto out;
wake_up_process(watchdog_task);
out:
return ret;
}
static atomic_t reset_hung_task = ATOMIC_INIT(0);
void reset_hung_task_detector(void)
{
atomic_set(&reset_hung_task, 1);
}
EXPORT_SYMBOL_GPL(reset_hung_task_detector);
static bool hung_detector_suspended;
static int hungtask_pm_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
switch (action) {
case PM_SUSPEND_PREPARE:
case PM_HIBERNATION_PREPARE:
case PM_RESTORE_PREPARE:
hung_detector_suspended = true;
break;
case PM_POST_SUSPEND:
case PM_POST_HIBERNATION:
case PM_POST_RESTORE:
hung_detector_suspended = false;
break;
default:
break;
}
return NOTIFY_OK;
}
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog(void *dummy)
{
unsigned long hung_last_checked = jiffies;
set_user_nice(current, 0);
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
long t = hung_timeout_jiffies(hung_last_checked, timeout);
if (t <= 0) {
if (!atomic_xchg(&reset_hung_task, 0) &&
!hung_detector_suspended)
check_hung_uninterruptible_tasks(timeout);
hung_last_checked = jiffies;
continue;
}
schedule_timeout_interruptible(t);
}
return 0;
}
static int __init hung_task_init(void)
{
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
/* Disable hung task detector on suspend */
pm_notifier(hungtask_pm_notify, 0);
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
return 0;
}
subsys_initcall(hung_task_init);