msm-4.14/kernel/softirq.c
Isaac J. Manjarres 2c7009015c Merge remote-tracking branch 'remotes/origin/tmp-474d3c4' into msm-4.14
* remotes/origin/tmp-474d3c4:
  Linux 4.14.21
  ovl: hash directory inodes for fsnotify
  ASoC: acpi: fix machine driver selection based on quirk
  mmc: sdhci-of-esdhc: fix the mmc error after sleep on ls1046ardb
  mmc: sdhci-of-esdhc: fix eMMC couldn't work after kexec
  mmc: sdhci-of-esdhc: disable SD clock for clock value 0
  media: r820t: fix r820t_write_reg for KASAN
  ARM: dts: Delete bogus reference to the charlcd
  arm: dts: mt2701: Add reset-cells
  arm: dts: mt7623: Update ethsys binding
  ARM: dts: s5pv210: add interrupt-parent for ohci
  arm64: dts: msm8916: Add missing #phy-cells
  ARM: pxa/tosa-bt: add MODULE_LICENSE tag
  ARM: dts: exynos: fix RTC interrupt for exynos5410
  Bluetooth: BT_HCIUART now depends on SERIAL_DEV_BUS
  scsi: core: check for device state in __scsi_remove_target()
  x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
  usb: Move USB_UHCI_BIG_ENDIAN_* out of USB_SUPPORT
  mvpp2: fix multicast address filter
  ALSA: seq: Fix racy pool initializations
  ALSA: usb: add more device quirks for USB DSD devices
  ALSA: usb-audio: add implicit fb quirk for Behringer UFX1204
  ALSA: hda/realtek: PCI quirk for Fujitsu U7x7
  ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform
  ALSA: hda/realtek - Add headset mode support for Dell laptop
  ALSA: usb-audio: Fix UAC2 get_ctl request with a RANGE attribute
  ALSA: hda - Fix headset mic detection problem for two Dell machines
  mtd: nand: vf610: set correct ooblayout
  9p/trans_virtio: discard zero-length reply
  Btrfs: fix unexpected -EEXIST when creating new inode
  Btrfs: fix use-after-free on root->orphan_block_rsv
  Btrfs: fix btrfs_evict_inode to handle abnormal inodes correctly
  Btrfs: fix extent state leak from tree log
  Btrfs: fix crash due to not cleaning up tree log block's dirty bits
  Btrfs: fix deadlock in run_delalloc_nocow
  dm: correctly handle chained bios in dec_pending()
  iscsi-target: make sure to wake up sleeping login worker
  target/iscsi: avoid NULL dereference in CHAP auth error path
  blk-wbt: account flush requests correctly
  xprtrdma: Fix BUG after a device removal
  xprtrdma: Fix calculation of ri_max_send_sges
  drm/qxl: reapply cursor after resetting primary
  qxl: alloc & use shadow for dumb buffers
  arm64: proc: Set PTE_NG for table entries to avoid traversing them twice
  rtlwifi: rtl8821ae: Fix connection lost problem correctly
  mpls, nospec: Sanitize array index in mpls_label_ok()
  tracing: Fix parsing of globs with a wildcard at the beginning
  seq_file: fix incomplete reset on read from zero offset
  xenbus: track caller request id
  xen: Fix {set,clear}_foreign_p2m_mapping on autotranslating guests
  rbd: whitelist RBD_FEATURE_OPERATIONS feature bit
  console/dummy: leave .con_font_get set to NULL
  video: fbdev: atmel_lcdfb: fix display-timings lookup
  PCI: keystone: Fix interrupt-controller-node lookup
  PCI: iproc: Fix NULL pointer dereference for BCMA
  PCI: Disable MSI for HiSilicon Hip06/Hip07 only in Root Port mode
  MIPS: Fix incorrect mem=X@Y handling
  MIPS: Fix typo BIG_ENDIAN to CPU_BIG_ENDIAN
  mm: Fix memory size alignment in devm_memremap_pages_release()
  mm: hide a #warning for COMPILE_TEST
  ext4: correct documentation for grpid mount option
  ext4: save error to disk in __ext4_grp_locked_error()
  ext4: fix a race in the ext4 shutdown path
  jbd2: fix sphinx kernel-doc build warnings
  Revert "apple-gmux: lock iGP IO to protect from vgaarb changes"
  mlx5: fix mlx5_get_vector_affinity to start from completion vector 0
  Revert "mmc: meson-gx: include tx phase in the tuning process"
  mmc: bcm2835: Don't overwrite max frequency unconditionally
  mmc: sdhci: Implement an SDHCI-specific bounce buffer
  mbcache: initialize entry->e_referenced in mb_cache_entry_create()
  rtc-opal: Fix handling of firmware error codes, prevent busy loops
  drm/radeon: adjust tested variable
  drm/radeon: Add dpm quirk for Jet PRO (v2)
  arm64: Add missing Falkor part number for branch predictor hardening
  drm/ast: Load lut in crtc_commit
  drm/amd/powerplay: Fix smu_table_entry.handle type
  drm/qxl: unref cursor bo when finished with it
  drm/ttm: Fix 'buf' pointer update in ttm_bo_vm_access_kmap() (v2)
  drm/ttm: Don't add swapped BOs to swap-LRU list
  x86/entry/64: Fix CR3 restore in paranoid_exit()
  x86/cpu: Change type of x86_cache_size variable to unsigned int
  x86/spectre: Fix an error message
  x86/cpu: Rename cpu_data.x86_mask to cpu_data.x86_stepping
  selftests/x86/mpx: Fix incorrect bounds with old _sigfault
  x86/mm: Rename flush_tlb_single() and flush_tlb_one() to __flush_tlb_one_[user|kernel]()
  kmemcheck: rip it out for real
  kmemcheck: rip it out
  kmemcheck: remove whats left of NOTRACK flags
  kmemcheck: stop using GFP_NOTRACK and SLAB_NOTRACK
  kmemcheck: remove annotations
  x86/speculation: Add <asm/msr-index.h> dependency
  nospec: Move array_index_nospec() parameter checking into separate macro
  x86/speculation: Fix up array_index_nospec_mask() asm constraint
  x86/debug: Use UD2 for WARN()
  x86/debug, objtool: Annotate WARN()-related UD2 as reachable
  objtool: Fix segfault in ignore_unreachable_insn()
  selftests/x86: Disable tests requiring 32-bit support on pure 64-bit systems
  selftests/x86: Do not rely on "int $0x80" in single_step_syscall.c
  selftests/x86: Do not rely on "int $0x80" in test_mremap_vdso.c
  selftests/x86/pkeys: Remove unused functions
  selftests/x86: Clean up and document sscanf() usage
  selftests/x86: Fix vDSO selftest segfault for vsyscall=none
  x86/entry/64: Remove the unused 'icebp' macro
  x86/entry/64: Fix paranoid_entry() frame pointer warning
  x86/entry/64: Indent PUSH_AND_CLEAR_REGS and POP_REGS properly
  x86/entry/64: Get rid of the ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS macros
  x86/entry/64: Use PUSH_AND_CLEAN_REGS in more cases
  x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro
  x86/entry/64: Interleave XOR register clearing with PUSH instructions
  x86/entry/64: Merge the POP_C_REGS and POP_EXTRA_REGS macros into a single POP_REGS macro
  x86/entry/64: Merge SAVE_C_REGS and SAVE_EXTRA_REGS, remove unused extensions
  x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface
  PM: cpuidle: Fix cpuidle_poll_state_init() prototype
  PM / runtime: Update links_count also if !CONFIG_SRCU
  x86/speculation: Clean up various Spectre related details
  KVM/nVMX: Set the CPU_BASED_USE_MSR_BITMAPS if we have a valid L02 MSR bitmap
  X86/nVMX: Properly set spec_ctrl and pred_cmd before merging MSRs
  KVM/x86: Reduce retpoline performance impact in slot_handle_level_range(), by always inlining iterator helper methods
  Revert "x86/speculation: Simplify indirect_branch_prediction_barrier()"
  x86/speculation: Correct Speculation Control microcode blacklist again
  x86/speculation: Update Speculation Control microcode blacklist
  x86/mm/pti: Fix PTI comment in entry_SYSCALL_64()
  powerpc/mm/radix: Split linear mapping on hot-unplug
  crypto: sun4i_ss_prng - convert lock to _bh in sun4i_ss_prng_generate
  crypto: sun4i_ss_prng - fix return value of sun4i_ss_prng_generate
  compiler-gcc.h: __nostackprotector needs gcc-4.4 and up
  compiler-gcc.h: Introduce __optimize function attribute
  x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface
  x86/entry/64: Clear extra registers beyond syscall arguments, to reduce speculation attack surface
  x86: PM: Make APM idle driver initialize polling state
  x86/xen: init %gs very early to avoid page faults with stack protector
  x86/kexec: Make kexec (mostly) work in 5-level paging mode
  x86/gpu: add CFL to early quirks
  drm/i915/kbl: Change a KBL pci id to GT2 from GT1.5
  drm/i915: add GT number to intel_device_info
  arm: spear13xx: Fix spics gpio controller's warning
  arm: spear13xx: Fix dmas cells
  arm: spear600: Add missing interrupt-parent of rtc
  arm: dts: mt7623: fix card detection issue on bananapi-r2
  ARM: dts: nomadik: add interrupt-parent for clcd
  ARM: dts: STi: Add gpio polarity for "hdmi,hpd-gpio" property
  ARM: lpc3250: fix uda1380 gpio numbers
  arm64: dts: msm8916: Correct ipc references for smsm
  s390: fix handling of -1 in set{,fs}[gu]id16 syscalls
  dma-buf: fix reservation_object_wait_timeout_rcu once more v2
  powerpc: Fix DABR match on hash based systems
  powerpc/xive: Use hw CPU ids when configuring the CPU queues
  powerpc/mm: Flush radix process translations when setting MMU type
  powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove
  powerpc/radix: Remove trace_tlbie call from radix__flush_tlb_all
  ocfs2: try a blocking lock before return AOP_TRUNCATED_PAGE
  mwifiex: resolve reset vs. remove()/shutdown() deadlocks
  PM / devfreq: Propagate error from devfreq_add_device()
  swiotlb: suppress warning when __GFP_NOWARN is set
  cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin
  RDMA/rxe: Fix rxe_qp_cleanup()
  RDMA/rxe: Fix a race condition in rxe_requester()
  RDMA/rxe: Fix a race condition related to the QP error state
  kselftest: fix OOM in memory compaction test
  selftests: seccomp: fix compile error seccomp_bpf
  IB/core: Avoid a potential OOPs for an unused optional parameter
  IB/core: Fix ib_wc structure size to remain in 64 bytes boundary
  IB/core: Fix two kernel warnings triggered by rxe registration
  IB/mlx4: Fix incorrectly releasing steerable UD QPs when have only ETH ports
  IB/qib: Fix comparison error with qperf compare/swap test
  IB/umad: Fix use of unprotected device pointer
  scsi: smartpqi: allow static build ("built-in")
  tracing: Prevent PROFILE_ALL_BRANCHES when FORTIFY_SOURCE=y

Change-Id: I351a603ea607d9c158727d60c8915981a555044f
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
2018-02-23 15:37:55 -08:00

786 lines
19 KiB
C

/*
* linux/kernel/softirq.c
*
* Copyright (C) 1992 Linus Torvalds
*
* Distribute under GPLv2.
*
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/export.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/smpboot.h>
#include <linux/tick.h>
#include <linux/irq.h>
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
/*
- No shared variables, all the data are CPU local.
- If a softirq needs serialization, let it serialize itself
by its own spinlocks.
- Even if softirq is serialized, only local cpu is marked for
execution. Hence, we get something sort of weak cpu binding.
Though it is still not clear, will it result in better locality
or will not.
Examples:
- NET RX softirq. It is multithreaded and does not require
any global serialization.
- NET TX softirq. It kicks software netdevice queues, hence
it is logically serialized per device, but this serialization
is invisible to common code.
- Tasklets: serialized wrt itself.
*/
#ifndef __ARCH_IRQ_STAT
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
#endif
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
/*
* active_softirqs -- per cpu, a mask of softirqs that are being handled,
* with the expectation that approximate answers are acceptable and therefore
* no synchronization.
*/
DEFINE_PER_CPU(__u32, active_softirqs);
const char * const softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
"TASKLET", "SCHED", "HRTIMER", "RCU"
};
/*
* we cannot loop indefinitely here to avoid userspace starvation,
* but we also don't want to introduce a worst case 1/HZ latency
* to the pending events, so lets the scheduler to balance
* the softirq load for us.
*/
static void wakeup_softirqd(void)
{
/* Interrupts are disabled: no need to stop preemption */
struct task_struct *tsk = __this_cpu_read(ksoftirqd);
if (tsk && tsk->state != TASK_RUNNING)
wake_up_process(tsk);
}
/*
* If ksoftirqd is scheduled, we do not want to process pending softirqs
* right now. Let ksoftirqd handle this at its own rate, to get fairness.
*/
static bool ksoftirqd_running(void)
{
struct task_struct *tsk = __this_cpu_read(ksoftirqd);
return tsk && (tsk->state == TASK_RUNNING);
}
/*
* preempt_count and SOFTIRQ_OFFSET usage:
* - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
* softirq processing.
* - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
* on local_bh_disable or local_bh_enable.
* This lets us distinguish between whether we are currently processing
* softirq and whether we just have bh disabled.
*/
/*
* This one is for softirq.c-internal use,
* where hardirqs are disabled legitimately:
*/
#ifdef CONFIG_TRACE_IRQFLAGS
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;
WARN_ON_ONCE(in_irq());
raw_local_irq_save(flags);
/*
* The preempt tracer hooks into preempt_count_add and will break
* lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
* is set and before current->softirq_enabled is cleared.
* We must manually increment preempt_count here and manually
* call the trace_preempt_off later.
*/
__preempt_count_add(cnt);
/*
* Were softirqs turned off above:
*/
if (softirq_count() == (cnt & SOFTIRQ_MASK))
trace_softirqs_off(ip);
raw_local_irq_restore(flags);
if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
current->preempt_disable_ip = get_lock_parent_ip();
#endif
trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
}
}
EXPORT_SYMBOL(__local_bh_disable_ip);
#endif /* CONFIG_TRACE_IRQFLAGS */
static void __local_bh_enable(unsigned int cnt)
{
WARN_ON_ONCE(!irqs_disabled());
if (softirq_count() == (cnt & SOFTIRQ_MASK))
trace_softirqs_on(_RET_IP_);
preempt_count_sub(cnt);
}
/*
* Special-case - softirqs can safely be enabled in
* cond_resched_softirq(), or by __do_softirq(),
* without processing still-pending softirqs:
*/
void _local_bh_enable(void)
{
WARN_ON_ONCE(in_irq());
__local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
}
EXPORT_SYMBOL(_local_bh_enable);
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{
WARN_ON_ONCE(in_irq() || irqs_disabled());
#ifdef CONFIG_TRACE_IRQFLAGS
local_irq_disable();
#endif
/*
* Are softirqs going to be turned on now:
*/
if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
trace_softirqs_on(ip);
/*
* Keep preemption disabled until we are done with
* softirq processing:
*/
preempt_count_sub(cnt - 1);
if (unlikely(!in_interrupt() && local_softirq_pending())) {
/*
* Run softirq if any pending. And do it in its own stack
* as we may be calling this deep in a task call stack already.
*/
do_softirq();
}
preempt_count_dec();
#ifdef CONFIG_TRACE_IRQFLAGS
local_irq_enable();
#endif
preempt_check_resched();
}
EXPORT_SYMBOL(__local_bh_enable_ip);
/*
* We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
* but break the loop if need_resched() is set or after 2 ms.
* The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
* certain cases, such as stop_machine(), jiffies may cease to
* increment and so we need the MAX_SOFTIRQ_RESTART limit as
* well to make sure we eventually return from this method.
*
* These limits have been established via experimentation.
* The two things to balance is latency against fairness -
* we want to handle softirqs as soon as possible, but they
* should not be able to lock up the box.
*/
#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
#define MAX_SOFTIRQ_RESTART 10
#ifdef CONFIG_TRACE_IRQFLAGS
/*
* When we run softirqs from irq_exit() and thus on the hardirq stack we need
* to keep the lockdep irq context tracking as tight as possible in order to
* not miss-qualify lock contexts and miss possible deadlocks.
*/
static inline bool lockdep_softirq_start(void)
{
bool in_hardirq = false;
if (trace_hardirq_context(current)) {
in_hardirq = true;
trace_hardirq_exit();
}
lockdep_softirq_enter();
return in_hardirq;
}
static inline void lockdep_softirq_end(bool in_hardirq)
{
lockdep_softirq_exit();
if (in_hardirq)
trace_hardirq_enter();
}
#else
static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
unsigned long old_flags = current->flags;
int max_restart = MAX_SOFTIRQ_RESTART;
struct softirq_action *h;
bool in_hardirq;
__u32 pending;
int softirq_bit;
/*
* Mask out PF_MEMALLOC s current task context is borrowed for the
* softirq. A softirq handled such as network RX might set PF_MEMALLOC
* again if the socket is related to swap
*/
current->flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();
account_irq_enter_time(current);
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
in_hardirq = lockdep_softirq_start();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
__this_cpu_write(active_softirqs, pending);
local_irq_enable();
h = softirq_vec;
while ((softirq_bit = ffs(pending))) {
unsigned int vec_nr;
int prev_count;
h += softirq_bit - 1;
vec_nr = h - softirq_vec;
prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h);
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
vec_nr, softirq_to_name[vec_nr], h->action,
prev_count, preempt_count());
preempt_count_set(prev_count);
}
h++;
pending >>= softirq_bit;
}
__this_cpu_write(active_softirqs, 0);
rcu_bh_qs();
local_irq_disable();
pending = local_softirq_pending();
if (pending) {
if (time_before(jiffies, end) && !need_resched() &&
--max_restart)
goto restart;
wakeup_softirqd();
}
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
}
asmlinkage __visible void do_softirq(void)
{
__u32 pending;
unsigned long flags;
if (in_interrupt())
return;
local_irq_save(flags);
pending = local_softirq_pending();
if (pending && !ksoftirqd_running())
do_softirq_own_stack();
local_irq_restore(flags);
}
/*
* Enter an interrupt context.
*/
void irq_enter(void)
{
rcu_irq_enter();
if (is_idle_task(current) && !in_interrupt()) {
/*
* Prevent raise_softirq from needlessly waking up ksoftirqd
* here, as softirq will be serviced on return from interrupt.
*/
local_bh_disable();
tick_irq_enter();
_local_bh_enable();
}
__irq_enter();
}
static inline void invoke_softirq(void)
{
if (ksoftirqd_running())
return;
if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
* We can safely execute softirq on the current stack if
* it is the irq stack, because it should be near empty
* at this stage.
*/
__do_softirq();
#else
/*
* Otherwise, irq_exit() is called on the task stack that can
* be potentially deep already. So call softirq in its own stack
* to prevent from any overrun.
*/
do_softirq_own_stack();
#endif
} else {
wakeup_softirqd();
}
}
static inline void tick_irq_exit(void)
{
#ifdef CONFIG_NO_HZ_COMMON
int cpu = smp_processor_id();
/* Make sure that timer wheel updates are propagated */
if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
if (!in_interrupt())
tick_nohz_irq_exit();
}
#endif
}
/*
* Exit an interrupt context. Process softirqs if needed and possible:
*/
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
local_irq_disable();
#else
WARN_ON_ONCE(!irqs_disabled());
#endif
account_irq_exit_time(current);
preempt_count_sub(HARDIRQ_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
tick_irq_exit();
rcu_irq_exit();
trace_hardirq_exit(); /* must be last! */
}
/*
* This function must run with irqs disabled!
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
__raise_softirq_irqoff(nr);
/*
* If we're in an interrupt or softirq, we're done
* (this also catches softirq-disabled code). We will
* actually run the softirq once we return from
* the irq or softirq.
*
* Otherwise we wake up ksoftirqd to make sure we
* schedule the softirq soon.
*/
if (!in_interrupt())
wakeup_softirqd();
}
void raise_softirq(unsigned int nr)
{
unsigned long flags;
local_irq_save(flags);
raise_softirq_irqoff(nr);
local_irq_restore(flags);
}
void __raise_softirq_irqoff(unsigned int nr)
{
trace_softirq_raise(nr);
or_softirq_pending(1UL << nr);
}
void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
}
/*
* Tasklets
*/
struct tasklet_head {
struct tasklet_struct *head;
struct tasklet_struct **tail;
};
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
void __tasklet_schedule(struct tasklet_struct *t)
{
unsigned long flags;
local_irq_save(flags);
t->next = NULL;
*__this_cpu_read(tasklet_vec.tail) = t;
__this_cpu_write(tasklet_vec.tail, &(t->next));
raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_schedule);
void __tasklet_hi_schedule(struct tasklet_struct *t)
{
unsigned long flags;
local_irq_save(flags);
t->next = NULL;
*__this_cpu_read(tasklet_hi_vec.tail) = t;
__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__tasklet_hi_schedule);
static __latent_entropy void tasklet_action(struct softirq_action *a)
{
struct tasklet_struct *list;
local_irq_disable();
list = __this_cpu_read(tasklet_vec.head);
__this_cpu_write(tasklet_vec.head, NULL);
__this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
local_irq_enable();
while (list) {
struct tasklet_struct *t = list;
list = list->next;
if (tasklet_trylock(t)) {
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED,
&t->state))
BUG();
t->func(t->data);
tasklet_unlock(t);
continue;
}
tasklet_unlock(t);
}
local_irq_disable();
t->next = NULL;
*__this_cpu_read(tasklet_vec.tail) = t;
__this_cpu_write(tasklet_vec.tail, &(t->next));
__raise_softirq_irqoff(TASKLET_SOFTIRQ);
local_irq_enable();
}
}
static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
{
struct tasklet_struct *list;
local_irq_disable();
list = __this_cpu_read(tasklet_hi_vec.head);
__this_cpu_write(tasklet_hi_vec.head, NULL);
__this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
local_irq_enable();
while (list) {
struct tasklet_struct *t = list;
list = list->next;
if (tasklet_trylock(t)) {
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED,
&t->state))
BUG();
t->func(t->data);
tasklet_unlock(t);
continue;
}
tasklet_unlock(t);
}
local_irq_disable();
t->next = NULL;
*__this_cpu_read(tasklet_hi_vec.tail) = t;
__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
__raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_enable();
}
}
void tasklet_init(struct tasklet_struct *t,
void (*func)(unsigned long), unsigned long data)
{
t->next = NULL;
t->state = 0;
atomic_set(&t->count, 0);
t->func = func;
t->data = data;
}
EXPORT_SYMBOL(tasklet_init);
void tasklet_kill(struct tasklet_struct *t)
{
if (in_interrupt())
pr_notice("Attempt to kill tasklet from interrupt\n");
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
do {
yield();
} while (test_bit(TASKLET_STATE_SCHED, &t->state));
}
tasklet_unlock_wait(t);
clear_bit(TASKLET_STATE_SCHED, &t->state);
}
EXPORT_SYMBOL(tasklet_kill);
/*
* tasklet_hrtimer
*/
/*
* The trampoline is called when the hrtimer expires. It schedules a tasklet
* to run __tasklet_hrtimer_trampoline() which in turn will call the intended
* hrtimer callback, but from softirq context.
*/
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
struct tasklet_hrtimer *ttimer =
container_of(timer, struct tasklet_hrtimer, timer);
tasklet_hi_schedule(&ttimer->tasklet);
return HRTIMER_NORESTART;
}
/*
* Helper function which calls the hrtimer callback from
* tasklet/softirq context
*/
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
struct tasklet_hrtimer *ttimer = (void *)data;
enum hrtimer_restart restart;
restart = ttimer->function(&ttimer->timer);
if (restart != HRTIMER_NORESTART)
hrtimer_restart(&ttimer->timer);
}
/**
* tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
* @ttimer: tasklet_hrtimer which is initialized
* @function: hrtimer callback function which gets called from softirq context
* @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
* @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
*/
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t which_clock, enum hrtimer_mode mode)
{
hrtimer_init(&ttimer->timer, which_clock, mode);
ttimer->timer.function = __hrtimer_tasklet_trampoline;
tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
(unsigned long)ttimer);
ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
void __init softirq_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
}
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
static int ksoftirqd_should_run(unsigned int cpu)
{
return local_softirq_pending();
}
static void run_ksoftirqd(unsigned int cpu)
{
local_irq_disable();
if (local_softirq_pending()) {
/*
* We can safely run softirq on inline stack, as we are not deep
* in the task stack here.
*/
__do_softirq();
local_irq_enable();
cond_resched_rcu_qs();
return;
}
local_irq_enable();
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* tasklet_kill_immediate is called to remove a tasklet which can already be
* scheduled for execution on @cpu.
*
* Unlike tasklet_kill, this function removes the tasklet
* _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
*
* When this function is called, @cpu must be in the CPU_DEAD state.
*/
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
{
struct tasklet_struct **i;
BUG_ON(cpu_online(cpu));
BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
if (!test_bit(TASKLET_STATE_SCHED, &t->state))
return;
/* CPU is dead, so no lock needed. */
for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
if (*i == t) {
*i = t->next;
/* If this was the tail element, move the tail ptr */
if (*i == NULL)
per_cpu(tasklet_vec, cpu).tail = i;
return;
}
}
BUG();
}
static int takeover_tasklets(unsigned int cpu)
{
/* CPU is dead, so no lock needed. */
local_irq_disable();
/* Find end, append list for that CPU. */
if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
per_cpu(tasklet_vec, cpu).head = NULL;
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
}
raise_softirq_irqoff(TASKLET_SOFTIRQ);
if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
*__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
__this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
per_cpu(tasklet_hi_vec, cpu).head = NULL;
per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
}
raise_softirq_irqoff(HI_SOFTIRQ);
local_irq_enable();
return 0;
}
#else
#define takeover_tasklets NULL
#endif /* CONFIG_HOTPLUG_CPU */
static struct smp_hotplug_thread softirq_threads = {
.store = &ksoftirqd,
.thread_should_run = ksoftirqd_should_run,
.thread_fn = run_ksoftirqd,
.thread_comm = "ksoftirqd/%u",
};
static __init int spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
return 0;
}
early_initcall(spawn_ksoftirqd);
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
* GCC does not inline them incorrectly. ]
*/
int __init __weak early_irq_init(void)
{
return 0;
}
int __init __weak arch_probe_nr_irqs(void)
{
return NR_IRQS_LEGACY;
}
int __init __weak arch_early_irq_init(void)
{
return 0;
}
unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
{
return from;
}