mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
Merge branch 'qspinlock'
Fast and efficient queued spinlocks backported from mainline v5.3. Signed-off-by: Danny Lin <danny@kdrag0n.dev> Signed-off-by: azrim <mirzaspc@gmail.com>
This commit is contained in:
parent
67275beaaf
commit
dc40b2c53d
@ -14,7 +14,6 @@
|
||||
* We make no fairness assumptions. They have a cost.
|
||||
*/
|
||||
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
#define arch_spin_is_locked(x) ((x)->lock != 0)
|
||||
|
||||
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
||||
@ -171,7 +170,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
|
||||
lock->lock = 0;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#endif /* _ALPHA_SPINLOCK_H */
|
||||
|
@ -416,8 +416,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SPINLOCK_H */
|
||||
|
@ -280,8 +280,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SPINLOCK_H */
|
||||
|
@ -48,6 +48,7 @@ config ARM64
|
||||
select ARCH_SUPPORTS_SHADOW_CALL_STACK
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
||||
|
@ -17,6 +17,7 @@ generic-y += mm-arch-hooks.h
|
||||
generic-y += msi.h
|
||||
generic-y += preempt.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += qspinlock.h
|
||||
generic-y += rwsem.h
|
||||
generic-y += segment.h
|
||||
generic-y += serial.h
|
||||
|
@ -127,6 +127,19 @@ do { \
|
||||
__u.__val; \
|
||||
})
|
||||
|
||||
#define smp_cond_load_relaxed(ptr, cond_expr) \
|
||||
({ \
|
||||
typeof(ptr) __PTR = (ptr); \
|
||||
typeof(*ptr) VAL; \
|
||||
for (;;) { \
|
||||
VAL = READ_ONCE(*__PTR); \
|
||||
if (cond_expr) \
|
||||
break; \
|
||||
__cmpwait_relaxed(__PTR, VAL); \
|
||||
} \
|
||||
VAL; \
|
||||
})
|
||||
|
||||
#define smp_cond_load_acquire(ptr, cond_expr) \
|
||||
({ \
|
||||
typeof(ptr) __PTR = (ptr); \
|
||||
|
@ -16,136 +16,8 @@
|
||||
#ifndef __ASM_SPINLOCK_H
|
||||
#define __ASM_SPINLOCK_H
|
||||
|
||||
#include <asm/lse.h>
|
||||
#include <asm/spinlock_types.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
/*
|
||||
* Spinlock implementation.
|
||||
*
|
||||
* The memory barriers are implicit with the load-acquire and store-release
|
||||
* instructions.
|
||||
*/
|
||||
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
{
|
||||
unsigned int tmp;
|
||||
arch_spinlock_t lockval, newval;
|
||||
|
||||
asm volatile(
|
||||
/* Atomically increment the next ticket. */
|
||||
ARM64_LSE_ATOMIC_INSN(
|
||||
/* LL/SC */
|
||||
" prfm pstl1strm, %3\n"
|
||||
"1: ldaxr %w0, %3\n"
|
||||
" add %w1, %w0, %w5\n"
|
||||
" stxr %w2, %w1, %3\n"
|
||||
" cbnz %w2, 1b\n",
|
||||
/* LSE atomics */
|
||||
" mov %w2, %w5\n"
|
||||
" ldadda %w2, %w0, %3\n"
|
||||
__nops(3)
|
||||
)
|
||||
|
||||
/* Did we get the lock? */
|
||||
" eor %w1, %w0, %w0, ror #16\n"
|
||||
" cbz %w1, 3f\n"
|
||||
/*
|
||||
* No: spin on the owner. Send a local event to avoid missing an
|
||||
* unlock before the exclusive load.
|
||||
*/
|
||||
" sevl\n"
|
||||
"2: wfe\n"
|
||||
" ldaxrh %w2, %4\n"
|
||||
" eor %w1, %w2, %w0, lsr #16\n"
|
||||
" cbnz %w1, 2b\n"
|
||||
/* We got the lock. Critical section starts here. */
|
||||
"3:"
|
||||
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
|
||||
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
||||
{
|
||||
unsigned int tmp;
|
||||
arch_spinlock_t lockval;
|
||||
|
||||
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
||||
/* LL/SC */
|
||||
"1: ldaxr %w0, %2\n"
|
||||
" eor %w1, %w0, %w0, ror #16\n"
|
||||
" cbnz %w1, 2f\n"
|
||||
" add %w0, %w0, %3\n"
|
||||
" stxr %w1, %w0, %2\n"
|
||||
" cbnz %w1, 1b\n"
|
||||
"2:",
|
||||
/* LSE atomics */
|
||||
" ldr %w0, %2\n"
|
||||
" eor %w1, %w0, %w0, ror #16\n"
|
||||
" cbnz %w1, 1f\n"
|
||||
" add %w1, %w0, %3\n"
|
||||
" casa %w0, %w1, %2\n"
|
||||
" sub %w1, %w1, %3\n"
|
||||
" eor %w1, %w1, %w0\n"
|
||||
"1:")
|
||||
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
|
||||
: "I" (1 << TICKET_SHIFT)
|
||||
: "memory");
|
||||
|
||||
return !tmp;
|
||||
}
|
||||
|
||||
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
||||
/* LL/SC */
|
||||
" ldrh %w1, %0\n"
|
||||
" add %w1, %w1, #1\n"
|
||||
" stlrh %w1, %0",
|
||||
/* LSE atomics */
|
||||
" mov %w1, #1\n"
|
||||
" staddlh %w1, %0\n"
|
||||
__nops(1))
|
||||
: "=Q" (lock->owner), "=&r" (tmp)
|
||||
:
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
||||
{
|
||||
return lock.owner == lock.next;
|
||||
}
|
||||
|
||||
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||
{
|
||||
/*
|
||||
* Ensure prior spin_lock operations to other locks have completed
|
||||
* on this CPU before we test whether "lock" is locked.
|
||||
*/
|
||||
smp_mb(); /* ^^^ */
|
||||
return !arch_spin_value_unlocked(READ_ONCE(*lock));
|
||||
}
|
||||
|
||||
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||
{
|
||||
arch_spinlock_t lockval = READ_ONCE(*lock);
|
||||
return (lockval.next - lockval.owner) > 1;
|
||||
}
|
||||
#define arch_spin_is_contended arch_spin_is_contended
|
||||
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
/* See include/linux/spinlock.h */
|
||||
#define smp_mb__after_spinlock() smp_mb()
|
||||
|
@ -20,22 +20,7 @@
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define TICKET_SHIFT 16
|
||||
|
||||
typedef struct {
|
||||
#ifdef __AARCH64EB__
|
||||
u16 next;
|
||||
u16 owner;
|
||||
#else
|
||||
u16 owner;
|
||||
u16 next;
|
||||
#endif
|
||||
} __aligned(4) arch_spinlock_t;
|
||||
|
||||
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 }
|
||||
|
||||
#include <asm-generic/qspinlock_types.h>
|
||||
#include <asm-generic/qrwlock_types.h>
|
||||
|
||||
#endif
|
||||
|
@ -36,8 +36,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
__raw_spin_lock_asm(&lock->lock);
|
||||
}
|
||||
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
||||
{
|
||||
return __raw_spin_trylock_asm(&lock->lock);
|
||||
@ -63,8 +61,6 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
|
||||
__raw_read_lock_asm(&rw->lock);
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
|
||||
static inline int arch_read_trylock(arch_rwlock_t *rw)
|
||||
{
|
||||
return __raw_read_trylock_asm(&rw->lock);
|
||||
@ -80,8 +76,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
|
||||
__raw_write_lock_asm(&rw->lock);
|
||||
}
|
||||
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
static inline int arch_write_trylock(arch_rwlock_t *rw)
|
||||
{
|
||||
return __raw_write_trylock_asm(&rw->lock);
|
||||
@ -92,10 +86,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
|
||||
__raw_write_unlock_asm(&rw->lock);
|
||||
}
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* !__BFIN_SPINLOCK_H */
|
||||
|
@ -177,11 +177,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
|
||||
/*
|
||||
* SMP spinlocks are intended to allow only a single CPU at the lock
|
||||
*/
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
#define arch_spin_is_locked(x) ((x)->lock != 0)
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#endif
|
||||
|
@ -127,6 +127,7 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
|
||||
{
|
||||
arch_spin_lock(lock);
|
||||
}
|
||||
#define arch_spin_lock_flags arch_spin_lock_flags
|
||||
|
||||
#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0)
|
||||
#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0)
|
||||
@ -157,6 +158,7 @@ arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
|
||||
: "p6", "p7", "r2", "memory");
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags arch_read_lock_flags
|
||||
#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
|
||||
|
||||
#else /* !ASM_SUPPORTED */
|
||||
@ -209,6 +211,7 @@ arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
|
||||
: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
|
||||
}
|
||||
|
||||
#define arch_write_lock_flags arch_write_lock_flags
|
||||
#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
|
||||
|
||||
#define arch_write_trylock(rw) \
|
||||
@ -232,8 +235,6 @@ static inline void arch_write_unlock(arch_rwlock_t *x)
|
||||
|
||||
#else /* !ASM_SUPPORTED */
|
||||
|
||||
#define arch_write_lock_flags(l, flags) arch_write_lock(l)
|
||||
|
||||
#define arch_write_lock(l) \
|
||||
({ \
|
||||
__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \
|
||||
@ -273,8 +274,4 @@ static inline int arch_read_trylock(arch_rwlock_t *x)
|
||||
return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word;
|
||||
}
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* _ASM_IA64_SPINLOCK_H */
|
||||
|
@ -29,7 +29,6 @@
|
||||
*/
|
||||
|
||||
#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0)
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
/**
|
||||
* arch_spin_trylock - Try spin lock and return a result
|
||||
@ -318,11 +317,4 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* _ASM_M32R_SPINLOCK_H */
|
||||
|
@ -16,13 +16,4 @@
|
||||
* locked.
|
||||
*/
|
||||
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SPINLOCK_H */
|
||||
|
@ -243,8 +243,4 @@ static inline int arch_read_can_lock(arch_rwlock_t *rw)
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SPINLOCK_LNKGET_H */
|
||||
|
@ -13,11 +13,4 @@
|
||||
#include <asm/qrwlock.h>
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* _ASM_SPINLOCK_H */
|
||||
|
@ -84,6 +84,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock,
|
||||
: "d" (flags), "a"(&lock->slock), "i"(EPSW_IE | MN10300_CLI_LEVEL)
|
||||
: "memory", "cc");
|
||||
}
|
||||
#define arch_spin_lock_flags arch_spin_lock_flags
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
@ -183,9 +184,6 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define _raw_spin_relax(lock) cpu_relax()
|
||||
#define _raw_read_relax(lock) cpu_relax()
|
||||
#define _raw_write_relax(lock) cpu_relax()
|
||||
|
@ -161,6 +161,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
|
||||
local_irq_restore(flags_dis);
|
||||
}
|
||||
}
|
||||
#define arch_spin_lock_flags arch_spin_lock_flags
|
||||
|
||||
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
||||
{
|
||||
@ -302,9 +303,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
|
||||
rw->lock = 0;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) __spin_yield(lock)
|
||||
#define arch_read_relax(lock) __rw_yield(lock)
|
||||
#define arch_write_relax(lock) __rw_yield(lock)
|
||||
|
@ -46,6 +46,7 @@ static inline void arch_spin_relax(arch_spinlock_t *lock)
|
||||
{
|
||||
arch_lock_relax(lock->lock);
|
||||
}
|
||||
#define arch_spin_relax arch_spin_relax
|
||||
|
||||
static inline u32 arch_spin_lockval(int cpu)
|
||||
{
|
||||
@ -81,6 +82,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
|
||||
if (!arch_spin_trylock_once(lp))
|
||||
arch_spin_lock_wait_flags(lp, flags);
|
||||
}
|
||||
#define arch_spin_lock_flags arch_spin_lock_flags
|
||||
|
||||
static inline int arch_spin_trylock(arch_spinlock_t *lp)
|
||||
{
|
||||
@ -126,9 +128,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
|
||||
extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
|
||||
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
static inline int arch_read_trylock_once(arch_rwlock_t *rw)
|
||||
{
|
||||
int old = ACCESS_ONCE(rw->lock);
|
||||
@ -269,10 +268,12 @@ static inline void arch_read_relax(arch_rwlock_t *rw)
|
||||
{
|
||||
arch_lock_relax(rw->owner);
|
||||
}
|
||||
#define arch_read_relax arch_read_relax
|
||||
|
||||
static inline void arch_write_relax(arch_rwlock_t *rw)
|
||||
{
|
||||
arch_lock_relax(rw->owner);
|
||||
}
|
||||
#define arch_write_relax arch_write_relax
|
||||
|
||||
#endif /* __ASM_SPINLOCK_H */
|
||||
|
@ -27,7 +27,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
|
||||
*/
|
||||
|
||||
#define arch_spin_is_locked(x) ((x)->lock <= 0)
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
{
|
||||
@ -102,11 +101,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
|
||||
return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SH_SPINLOCK_CAS_H */
|
||||
|
@ -19,7 +19,6 @@
|
||||
*/
|
||||
|
||||
#define arch_spin_is_locked(x) ((x)->lock <= 0)
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
/*
|
||||
* Simple spin lock operations. There are two variants, one clears IRQ's
|
||||
@ -209,11 +208,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
|
||||
return (oldval > (RW_LOCK_BIAS - 1));
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* __ASM_SH_SPINLOCK_LLSC_H */
|
||||
|
@ -51,9 +51,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||
|
||||
void arch_spin_lock(arch_spinlock_t *lock);
|
||||
|
||||
/* We cannot take an interrupt after getting a ticket, so don't enable them. */
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
int arch_spin_trylock(arch_spinlock_t *lock);
|
||||
|
||||
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
||||
@ -125,7 +122,4 @@ void arch_read_unlock(arch_rwlock_t *rwlock);
|
||||
*/
|
||||
void arch_write_unlock(arch_rwlock_t *rwlock);
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#endif /* _ASM_TILE_SPINLOCK_32_H */
|
||||
|
@ -75,9 +75,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
/* Try to get the lock, and return whether we succeeded. */
|
||||
int arch_spin_trylock(arch_spinlock_t *lock);
|
||||
|
||||
/* We cannot take an interrupt after getting a ticket, so don't enable them. */
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
/*
|
||||
* Read-write spinlocks, allowing multiple readers
|
||||
* but only one writer.
|
||||
@ -156,7 +153,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#endif /* _ASM_TILE_SPINLOCK_64_H */
|
||||
|
@ -42,11 +42,4 @@
|
||||
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#define arch_spin_relax(lock) cpu_relax()
|
||||
#define arch_read_relax(lock) cpu_relax()
|
||||
#define arch_write_relax(lock) cpu_relax()
|
||||
|
||||
#endif /* _ASM_X86_SPINLOCK_H */
|
||||
|
@ -33,8 +33,6 @@
|
||||
|
||||
#define arch_spin_is_locked(x) ((x)->slock != 0)
|
||||
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
|
||||
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
{
|
||||
unsigned long tmp;
|
||||
@ -200,7 +198,4 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
|
||||
#endif /* _XTENSA_SPINLOCK_H */
|
||||
|
@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
|
||||
if (unlikely(cnts))
|
||||
return 0;
|
||||
|
||||
return likely(atomic_cmpxchg_acquire(&lock->cnts,
|
||||
cnts, cnts | _QW_LOCKED) == cnts);
|
||||
return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
|
||||
_QW_LOCKED));
|
||||
}
|
||||
/**
|
||||
* queued_read_lock - acquire read lock of a queue rwlock
|
||||
@ -114,8 +114,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
|
||||
*/
|
||||
static inline void queued_write_lock(struct qrwlock *lock)
|
||||
{
|
||||
u32 cnts = 0;
|
||||
/* Optimize for the unfair lock case where the fair flag is 0. */
|
||||
if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
|
||||
if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
|
||||
return;
|
||||
|
||||
queued_write_lock_slowpath(lock);
|
||||
|
@ -26,7 +26,6 @@
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
* Return: 1 if it is locked, 0 otherwise
|
||||
*/
|
||||
#ifndef queued_spin_is_locked
|
||||
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
|
||||
{
|
||||
/*
|
||||
@ -35,7 +34,6 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
|
||||
*/
|
||||
return atomic_read(&lock->val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* queued_spin_value_unlocked - is the spinlock structure unlocked?
|
||||
@ -68,10 +66,12 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
|
||||
*/
|
||||
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
|
||||
{
|
||||
if (!atomic_read(&lock->val) &&
|
||||
(atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
|
||||
return 1;
|
||||
return 0;
|
||||
u32 val = atomic_read(&lock->val);
|
||||
|
||||
if (unlikely(val))
|
||||
return 0;
|
||||
|
||||
return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
|
||||
}
|
||||
|
||||
extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
|
||||
@ -82,11 +82,11 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
|
||||
*/
|
||||
static __always_inline void queued_spin_lock(struct qspinlock *lock)
|
||||
{
|
||||
u32 val;
|
||||
u32 val = 0;
|
||||
|
||||
val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
|
||||
if (likely(val == 0))
|
||||
if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
|
||||
return;
|
||||
|
||||
queued_spin_lock_slowpath(lock, val);
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
|
||||
/*
|
||||
* unlock() needs release semantics:
|
||||
*/
|
||||
(void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
|
||||
smp_store_release(&lock->locked, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -121,6 +121,5 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
|
||||
#define arch_spin_lock(l) queued_spin_lock(l)
|
||||
#define arch_spin_trylock(l) queued_spin_trylock(l)
|
||||
#define arch_spin_unlock(l) queued_spin_unlock(l)
|
||||
#define arch_spin_lock_flags(l, f) queued_spin_lock(l)
|
||||
|
||||
#endif /* __ASM_GENERIC_QSPINLOCK_H */
|
||||
|
@ -142,9 +142,6 @@ extern void __mutex_init(struct mutex *lock, const char *name,
|
||||
*/
|
||||
static inline int mutex_is_locked(struct mutex *lock)
|
||||
{
|
||||
/*
|
||||
* XXX think about spin_is_locked
|
||||
*/
|
||||
return __mutex_owner(lock) != NULL;
|
||||
}
|
||||
|
||||
|
@ -38,6 +38,15 @@ do { \
|
||||
extern int do_raw_write_trylock(rwlock_t *lock);
|
||||
extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
|
||||
#else
|
||||
|
||||
#ifndef arch_read_lock_flags
|
||||
# define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
||||
#endif
|
||||
|
||||
#ifndef arch_write_lock_flags
|
||||
# define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
||||
#endif
|
||||
|
||||
# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
|
||||
# define do_raw_read_lock_flags(lock, flags) \
|
||||
do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
|
||||
|
@ -166,6 +166,10 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
|
||||
arch_spin_lock(&lock->raw_lock);
|
||||
}
|
||||
|
||||
#ifndef arch_spin_lock_flags
|
||||
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
|
||||
{
|
||||
|
@ -32,14 +32,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
|
||||
barrier();
|
||||
}
|
||||
|
||||
static inline void
|
||||
arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
local_irq_save(flags);
|
||||
lock->slock = 0;
|
||||
barrier();
|
||||
}
|
||||
|
||||
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
||||
{
|
||||
char oldval = lock->slock;
|
||||
|
@ -12,11 +12,11 @@
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
|
||||
* (C) Copyright 2013-2014 Red Hat, Inc.
|
||||
* (C) Copyright 2013-2014,2018 Red Hat, Inc.
|
||||
* (C) Copyright 2015 Intel Corp.
|
||||
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hpe.com>
|
||||
* Authors: Waiman Long <longman@redhat.com>
|
||||
* Peter Zijlstra <peterz@infradead.org>
|
||||
*/
|
||||
|
||||
@ -32,6 +32,11 @@
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
/*
|
||||
* Include queued spinlock statistics code
|
||||
*/
|
||||
#include "qspinlock_stat.h"
|
||||
|
||||
/*
|
||||
* The basic principle of a queue-based spinlock can best be understood
|
||||
* by studying a classic queue-based spinlock implementation called the
|
||||
@ -69,12 +74,24 @@
|
||||
*/
|
||||
|
||||
#include "mcs_spinlock.h"
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#define MAX_NODES 8
|
||||
#else
|
||||
#define MAX_NODES 4
|
||||
|
||||
/*
|
||||
* On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
|
||||
* size and four of them will fit nicely in one 64-byte cacheline. For
|
||||
* pvqspinlock, however, we need more space for extra data. To accommodate
|
||||
* that, we insert two more long words to pad it up to 32 bytes. IOW, only
|
||||
* two of them can fit in a cacheline in this case. That is OK as it is rare
|
||||
* to have more than 2 levels of slowpath nesting in actual use. We don't
|
||||
* want to penalize pvqspinlocks to optimize for a rare case in native
|
||||
* qspinlocks.
|
||||
*/
|
||||
struct qnode {
|
||||
struct mcs_spinlock mcs;
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
long reserved[2];
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* The pending bit spinning loop count.
|
||||
@ -96,7 +113,7 @@
|
||||
*
|
||||
* PV doubles the storage and uses the second cacheline for PV state.
|
||||
*/
|
||||
static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
|
||||
static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
|
||||
|
||||
/*
|
||||
* We must be able to distinguish between no-tail and the tail at 0:0,
|
||||
@ -107,9 +124,6 @@ static inline __pure u32 encode_tail(int cpu, int idx)
|
||||
{
|
||||
u32 tail;
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
BUG_ON(idx > 3);
|
||||
#endif
|
||||
tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
|
||||
tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
|
||||
|
||||
@ -121,7 +135,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
|
||||
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
|
||||
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
|
||||
|
||||
return per_cpu_ptr(&mcs_nodes[idx], cpu);
|
||||
return per_cpu_ptr(&qnodes[idx].mcs, cpu);
|
||||
}
|
||||
|
||||
static inline __pure
|
||||
struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
|
||||
{
|
||||
return &((struct qnode *)base + idx)->mcs;
|
||||
}
|
||||
|
||||
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
|
||||
@ -164,10 +184,10 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
|
||||
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
||||
{
|
||||
/*
|
||||
* Use release semantics to make sure that the MCS node is properly
|
||||
* initialized before changing the tail code.
|
||||
* We can use relaxed semantics since the caller ensures that the
|
||||
* MCS node is properly initialized before updating the tail.
|
||||
*/
|
||||
return (u32)xchg_release(&lock->tail,
|
||||
return (u32)xchg_relaxed(&lock->tail,
|
||||
tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
|
||||
}
|
||||
|
||||
@ -212,10 +232,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
||||
for (;;) {
|
||||
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
|
||||
/*
|
||||
* Use release semantics to make sure that the MCS node is
|
||||
* properly initialized before changing the tail code.
|
||||
* We can use relaxed semantics since the caller ensures that
|
||||
* the MCS node is properly initialized before updating the
|
||||
* tail.
|
||||
*/
|
||||
old = atomic_cmpxchg_release(&lock->val, val, new);
|
||||
old = atomic_cmpxchg_relaxed(&lock->val, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
@ -308,7 +329,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
|
||||
|
||||
if (pv_enabled())
|
||||
goto queue;
|
||||
goto pv_queue;
|
||||
|
||||
if (virt_spin_lock(lock))
|
||||
return;
|
||||
@ -334,17 +355,23 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
/*
|
||||
* trylock || pending
|
||||
*
|
||||
* 0,0,0 -> 0,0,1 ; trylock
|
||||
* 0,0,1 -> 0,1,1 ; pending
|
||||
* 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
|
||||
*/
|
||||
val = queued_fetch_set_pending_acquire(lock);
|
||||
|
||||
/*
|
||||
* If we observe any contention; undo and queue.
|
||||
* If we observe contention, there is a concurrent locker.
|
||||
*
|
||||
* Undo and queue; our setting of PENDING might have made the
|
||||
* n,0,0 -> 0,0,0 transition fail and it will now be waiting
|
||||
* on @next to become !NULL.
|
||||
*/
|
||||
if (unlikely(val & ~_Q_LOCKED_MASK)) {
|
||||
|
||||
/* Undo PENDING if we set it. */
|
||||
if (!(val & _Q_PENDING_MASK))
|
||||
clear_pending(lock);
|
||||
|
||||
goto queue;
|
||||
}
|
||||
|
||||
@ -360,7 +387,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* barriers.
|
||||
*/
|
||||
if (val & _Q_LOCKED_MASK)
|
||||
smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
|
||||
atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));
|
||||
|
||||
/*
|
||||
* take ownership and clear the pending bit.
|
||||
@ -368,6 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* 0,1,0 -> 0,0,1
|
||||
*/
|
||||
clear_pending_set_locked(lock);
|
||||
qstat_inc(qstat_lock_pending, true);
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -375,11 +403,34 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* queuing.
|
||||
*/
|
||||
queue:
|
||||
node = this_cpu_ptr(&mcs_nodes[0]);
|
||||
qstat_inc(qstat_lock_slowpath, true);
|
||||
pv_queue:
|
||||
node = this_cpu_ptr(&qnodes[0].mcs);
|
||||
idx = node->count++;
|
||||
tail = encode_tail(smp_processor_id(), idx);
|
||||
|
||||
node += idx;
|
||||
/*
|
||||
* 4 nodes are allocated based on the assumption that there will
|
||||
* not be nested NMIs taking spinlocks. That may not be true in
|
||||
* some architectures even though the chance of needing more than
|
||||
* 4 nodes will still be extremely unlikely. When that happens,
|
||||
* we fall back to spinning on the lock directly without using
|
||||
* any MCS node. This is not the most elegant solution, but is
|
||||
* simple enough.
|
||||
*/
|
||||
if (unlikely(idx >= MAX_NODES)) {
|
||||
qstat_inc(qstat_lock_no_node, true);
|
||||
while (!queued_spin_trylock(lock))
|
||||
cpu_relax();
|
||||
goto release;
|
||||
}
|
||||
|
||||
node = grab_mcs_node(node, idx);
|
||||
|
||||
/*
|
||||
* Keep counts of non-zero index values:
|
||||
*/
|
||||
qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
|
||||
|
||||
/*
|
||||
* Ensure that we increment the head node->count before initialising
|
||||
@ -401,12 +452,18 @@ queue:
|
||||
goto release;
|
||||
|
||||
/*
|
||||
* Ensure that the initialisation of @node is complete before we
|
||||
* publish the updated tail via xchg_tail() and potentially link
|
||||
* @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* Publish the updated tail.
|
||||
* We have already touched the queueing cacheline; don't bother with
|
||||
* pending stuff.
|
||||
*
|
||||
* p,*,* -> n,*,*
|
||||
*
|
||||
* RELEASE, such that the stores to @node must be complete.
|
||||
*/
|
||||
old = xchg_tail(lock, tail);
|
||||
next = NULL;
|
||||
@ -418,14 +475,8 @@ queue:
|
||||
if (old & _Q_TAIL_MASK) {
|
||||
prev = decode_tail(old);
|
||||
|
||||
/*
|
||||
* We must ensure that the stores to @node are observed before
|
||||
* the write to prev->next. The address dependency from
|
||||
* xchg_tail is not sufficient to ensure this because the read
|
||||
* component of xchg_tail is unordered with respect to the
|
||||
* initialisation of @node.
|
||||
*/
|
||||
smp_store_release(&prev->next, node);
|
||||
/* Link @node into the waitqueue. */
|
||||
WRITE_ONCE(prev->next, node);
|
||||
|
||||
pv_wait_node(node, prev);
|
||||
arch_mcs_spin_lock_contended(&node->locked);
|
||||
@ -454,8 +505,8 @@ queue:
|
||||
*
|
||||
* The PV pv_wait_head_or_lock function, if active, will acquire
|
||||
* the lock and return a non-zero value. So we have to skip the
|
||||
* smp_cond_load_acquire() call. As the next PV queue head hasn't been
|
||||
* designated yet, there is no way for the locked value to become
|
||||
* atomic_cond_read_acquire() call. As the next PV queue head hasn't
|
||||
* been designated yet, there is no way for the locked value to become
|
||||
* _Q_SLOW_VAL. So both the set_locked() and the
|
||||
* atomic_cmpxchg_relaxed() calls will be safe.
|
||||
*
|
||||
@ -465,7 +516,7 @@ queue:
|
||||
if ((val = pv_wait_head_or_lock(lock, node)))
|
||||
goto locked;
|
||||
|
||||
val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
|
||||
val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
|
||||
|
||||
locked:
|
||||
/*
|
||||
@ -479,27 +530,33 @@ locked:
|
||||
* Otherwise, we only need to grab the lock.
|
||||
*/
|
||||
|
||||
/* In the PV case we might already have _Q_LOCKED_VAL set */
|
||||
/*
|
||||
* In the PV case we might already have _Q_LOCKED_VAL set, because
|
||||
* of lock stealing; therefore we must also allow:
|
||||
*
|
||||
* n,0,1 -> 0,0,1
|
||||
*
|
||||
* Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
|
||||
* above wait condition, therefore any concurrent setting of
|
||||
* PENDING will make the uncontended transition fail.
|
||||
*/
|
||||
if ((val & _Q_TAIL_MASK) == tail) {
|
||||
/*
|
||||
* The smp_cond_load_acquire() call above has provided the
|
||||
* necessary acquire semantics required for locking.
|
||||
*/
|
||||
old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
|
||||
if (old == val)
|
||||
if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
|
||||
goto release; /* No contention */
|
||||
}
|
||||
|
||||
/* Either somebody is queued behind us or _Q_PENDING_VAL is set */
|
||||
/*
|
||||
* Either somebody is queued behind us or _Q_PENDING_VAL got set
|
||||
* which will then detect the remaining tail and queue behind us
|
||||
* ensuring we'll see a @next.
|
||||
*/
|
||||
set_locked(lock);
|
||||
|
||||
/*
|
||||
* contended path; wait for next if not observed yet, release.
|
||||
*/
|
||||
if (!next) {
|
||||
while (!(next = READ_ONCE(node->next)))
|
||||
cpu_relax();
|
||||
}
|
||||
if (!next)
|
||||
next = smp_cond_load_relaxed(&node->next, (VAL));
|
||||
|
||||
arch_mcs_spin_unlock_contended(&next->locked);
|
||||
pv_kick_node(lock, next);
|
||||
@ -508,7 +565,7 @@ release:
|
||||
/*
|
||||
* release the node
|
||||
*/
|
||||
__this_cpu_dec(mcs_nodes[0].count);
|
||||
__this_cpu_dec(qnodes[0].mcs.count);
|
||||
}
|
||||
EXPORT_SYMBOL(queued_spin_lock_slowpath);
|
||||
|
||||
|
@ -49,17 +49,10 @@ enum vcpu_state {
|
||||
|
||||
struct pv_node {
|
||||
struct mcs_spinlock mcs;
|
||||
struct mcs_spinlock __res[3];
|
||||
|
||||
int cpu;
|
||||
u8 state;
|
||||
};
|
||||
|
||||
/*
|
||||
* Include queued spinlock statistics code
|
||||
*/
|
||||
#include "qspinlock_stat.h"
|
||||
|
||||
/*
|
||||
* By replacing the regular queued_spin_trylock() with the function below,
|
||||
* it will be called once when a lock waiter enter the PV slowpath before
|
||||
@ -257,7 +250,7 @@ static void pv_init_node(struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
|
||||
BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode));
|
||||
|
||||
pn->cpu = smp_processor_id();
|
||||
pn->state = vcpu_running;
|
||||
@ -394,7 +387,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
/*
|
||||
* Tracking # of slowpath locking operations
|
||||
*/
|
||||
qstat_inc(qstat_pv_lock_slowpath, true);
|
||||
qstat_inc(qstat_lock_slowpath, true);
|
||||
|
||||
for (;; waitcnt++) {
|
||||
/*
|
||||
|
@ -22,13 +22,21 @@
|
||||
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
|
||||
* pv_latency_kick - average latency (ns) of vCPU kick operation
|
||||
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
|
||||
* pv_lock_slowpath - # of locking operations via the slowpath
|
||||
* pv_lock_stealing - # of lock stealing operations
|
||||
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
|
||||
* pv_wait_again - # of wait's after a queue head vCPU kick
|
||||
* pv_wait_early - # of early vCPU wait's
|
||||
* pv_wait_head - # of vCPU wait's at the queue head
|
||||
* pv_wait_node - # of vCPU wait's at a non-head queue node
|
||||
* lock_pending - # of locking operations via pending code
|
||||
* lock_slowpath - # of locking operations via MCS lock queue
|
||||
* lock_use_node2 - # of locking operations that use 2nd per-CPU node
|
||||
* lock_use_node3 - # of locking operations that use 3rd per-CPU node
|
||||
* lock_use_node4 - # of locking operations that use 4th per-CPU node
|
||||
* lock_no_node - # of locking operations without using per-CPU node
|
||||
*
|
||||
* Subtracting lock_use_node[234] from lock_slowpath will give you
|
||||
* lock_use_node1.
|
||||
*
|
||||
* Writing to the "reset_counters" file will reset all the above counter
|
||||
* values.
|
||||
@ -46,13 +54,18 @@ enum qlock_stats {
|
||||
qstat_pv_kick_wake,
|
||||
qstat_pv_latency_kick,
|
||||
qstat_pv_latency_wake,
|
||||
qstat_pv_lock_slowpath,
|
||||
qstat_pv_lock_stealing,
|
||||
qstat_pv_spurious_wakeup,
|
||||
qstat_pv_wait_again,
|
||||
qstat_pv_wait_early,
|
||||
qstat_pv_wait_head,
|
||||
qstat_pv_wait_node,
|
||||
qstat_lock_pending,
|
||||
qstat_lock_slowpath,
|
||||
qstat_lock_use_node2,
|
||||
qstat_lock_use_node3,
|
||||
qstat_lock_use_node4,
|
||||
qstat_lock_no_node,
|
||||
qstat_num, /* Total number of statistical counters */
|
||||
qstat_reset_cnts = qstat_num,
|
||||
};
|
||||
@ -73,12 +86,17 @@ static const char * const qstat_names[qstat_num + 1] = {
|
||||
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
|
||||
[qstat_pv_latency_kick] = "pv_latency_kick",
|
||||
[qstat_pv_latency_wake] = "pv_latency_wake",
|
||||
[qstat_pv_lock_slowpath] = "pv_lock_slowpath",
|
||||
[qstat_pv_lock_stealing] = "pv_lock_stealing",
|
||||
[qstat_pv_wait_again] = "pv_wait_again",
|
||||
[qstat_pv_wait_early] = "pv_wait_early",
|
||||
[qstat_pv_wait_head] = "pv_wait_head",
|
||||
[qstat_pv_wait_node] = "pv_wait_node",
|
||||
[qstat_lock_pending] = "lock_pending",
|
||||
[qstat_lock_slowpath] = "lock_slowpath",
|
||||
[qstat_lock_use_node2] = "lock_use_node2",
|
||||
[qstat_lock_use_node3] = "lock_use_node3",
|
||||
[qstat_lock_use_node4] = "lock_use_node4",
|
||||
[qstat_lock_no_node] = "lock_no_node",
|
||||
[qstat_reset_cnts] = "reset_counters",
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user