Merge branch 'qspinlock'

Fast and efficient queued spinlocks backported from mainline v5.3. Signed-off-by: Danny Lin <danny@kdrag0n.dev> Signed-off-by: azrim <mirzaspc@gmail.com>
2025-02-20 11:45:48 +08:00 · 2019-09-18 18:20:30 -07:00 · 2019-09-18 18:20:30 -07:00 · dc40b2c53d
commit dc40b2c53d
parent 67275beaaf
33 changed files with 183 additions and 342 deletions
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@ -14,7 +14,6 @@
 * We make no fairness assumptions. They have a cost.
 */

-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)

 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
@ -171,7 +170,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 	lock->lock = 0;
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #endif /* _ALPHA_SPINLOCK_H */
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@ -416,8 +416,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_lock_flags(lock, flags)	arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags)	arch_write_lock(lock)

-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SPINLOCK_H */
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@ -280,8 +280,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)

-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SPINLOCK_H */
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -48,6 +48,7 @@ config ARM64
 	select ARCH_SUPPORTS_SHADOW_CALL_STACK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@ -17,6 +17,7 @@ generic-y += mm-arch-hooks.h
 generic-y += msi.h
 generic-y += preempt.h
 generic-y += qrwlock.h
+generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += serial.h
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@ -127,6 +127,19 @@ do {									\
 	__u.__val;							\
 })

+#define smp_cond_load_relaxed(ptr, cond_expr)				\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	typeof(*ptr) VAL;						\
+	for (;;) {							\
+		VAL = READ_ONCE(*__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(__PTR, VAL);				\
+	}								\
+	VAL;								\
+})
+
 #define smp_cond_load_acquire(ptr, cond_expr)				\
 ({									\
 	typeof(ptr) __PTR = (ptr);					\
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@ -16,136 +16,8 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H

-#include <asm/lse.h>
-#include <asm/spinlock_types.h>
-#include <asm/processor.h>
-
-/*
- * Spinlock implementation.
- *
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
- */
-
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	unsigned int tmp;
-	arch_spinlock_t lockval, newval;
-
-	asm volatile(
-	/* Atomically increment the next ticket. */
-	ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-"	prfm	pstl1strm, %3\n"
-"1:	ldaxr	%w0, %3\n"
-"	add	%w1, %w0, %w5\n"
-"	stxr	%w2, %w1, %3\n"
-"	cbnz	%w2, 1b\n",
-	/* LSE atomics */
-"	mov	%w2, %w5\n"
-"	ldadda	%w2, %w0, %3\n"
-	__nops(3)
-	)
-
-	/* Did we get the lock? */
-"	eor	%w1, %w0, %w0, ror #16\n"
-"	cbz	%w1, 3f\n"
-	/*
-	 * No: spin on the owner. Send a local event to avoid missing an
-	 * unlock before the exclusive load.
-	 */
-"	sevl\n"
-"2:	wfe\n"
-"	ldaxrh	%w2, %4\n"
-"	eor	%w1, %w2, %w0, lsr #16\n"
-"	cbnz	%w1, 2b\n"
-	/* We got the lock. Critical section starts here. */
-"3:"
-	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
-	: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
-	: "memory");
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned int tmp;
-	arch_spinlock_t lockval;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"1:	ldaxr	%w0, %2\n"
-	"	eor	%w1, %w0, %w0, ror #16\n"
-	"	cbnz	%w1, 2f\n"
-	"	add	%w0, %w0, %3\n"
-	"	stxr	%w1, %w0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	"2:",
-	/* LSE atomics */
-	"	ldr	%w0, %2\n"
-	"	eor	%w1, %w0, %w0, ror #16\n"
-	"	cbnz	%w1, 1f\n"
-	"	add	%w1, %w0, %3\n"
-	"	casa	%w0, %w1, %2\n"
-	"	sub	%w1, %w1, %3\n"
-	"	eor	%w1, %w1, %w0\n"
-	"1:")
-	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
-	: "I" (1 << TICKET_SHIFT)
-	: "memory");
-
-	return !tmp;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	ldrh	%w1, %0\n"
-	"	add	%w1, %w1, #1\n"
-	"	stlrh	%w1, %0",
-	/* LSE atomics */
-	"	mov	%w1, #1\n"
-	"	staddlh	%w1, %0\n"
-	__nops(1))
-	: "=Q" (lock->owner), "=&r" (tmp)
-	:
-	: "memory");
-}
-
-static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	return lock.owner == lock.next;
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	/*
-	 * Ensure prior spin_lock operations to other locks have completed
-	 * on this CPU before we test whether "lock" is locked.
-	 */
-	smp_mb(); /* ^^^ */
-	return !arch_spin_value_unlocked(READ_ONCE(*lock));
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	arch_spinlock_t lockval = READ_ONCE(*lock);
-	return (lockval.next - lockval.owner) > 1;
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
 #include <asm/qrwlock.h>
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
+#include <asm/qspinlock.h>

 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()	smp_mb()
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@ -20,22 +20,7 @@
 # error "please don't include this file directly"
 #endif

-#include <linux/types.h>
-
-#define TICKET_SHIFT	16
-
-typedef struct {
-#ifdef __AARCH64EB__
-	u16 next;
-	u16 owner;
-#else
-	u16 owner;
-	u16 next;
-#endif
-} __aligned(4) arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
-
+#include <asm-generic/qspinlock_types.h>
 #include <asm-generic/qrwlock_types.h>

 #endif
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@ -36,8 +36,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	__raw_spin_lock_asm(&lock->lock);
 }

-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return __raw_spin_trylock_asm(&lock->lock);
@ -63,8 +61,6 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 	__raw_read_lock_asm(&rw->lock);
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	return __raw_read_trylock_asm(&rw->lock);
@ -80,8 +76,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 	__raw_write_lock_asm(&rw->lock);
 }

-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	return __raw_write_trylock_asm(&rw->lock);
@ -92,10 +86,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	__raw_write_unlock_asm(&rw->lock);
 }

-#define arch_spin_relax(lock)  	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif

 #endif /*  !__BFIN_SPINLOCK_H */
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@ -177,11 +177,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 /*
 * SMP spinlocks are intended to allow only a single CPU at the lock
 */
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
 #define arch_spin_is_locked(x) ((x)->lock != 0)

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #endif
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@ -127,6 +127,7 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 {
 	arch_spin_lock(lock);
 }
+#define arch_spin_lock_flags	arch_spin_lock_flags

 #define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
 #define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
@ -157,6 +158,7 @@ arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "p6", "p7", "r2", "memory");
 }

+#define arch_read_lock_flags arch_read_lock_flags
 #define arch_read_lock(lock) arch_read_lock_flags(lock, 0)

 #else /* !ASM_SUPPORTED */
@ -209,6 +211,7 @@ arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
 }

+#define arch_write_lock_flags arch_write_lock_flags
 #define arch_write_lock(rw) arch_write_lock_flags(rw, 0)

 #define arch_write_trylock(rw)							\
@ -232,8 +235,6 @@ static inline void arch_write_unlock(arch_rwlock_t *x)

 #else /* !ASM_SUPPORTED */

-#define arch_write_lock_flags(l, flags) arch_write_lock(l)
-
 #define arch_write_lock(l)								\
 ({											\
 	__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1);			\
@ -273,8 +274,4 @@ static inline int arch_read_trylock(arch_rwlock_t *x)
 	return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word;
 }

-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /*  _ASM_IA64_SPINLOCK_H */
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@ -29,7 +29,6 @@
 */

 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)

 /**
 * arch_spin_trylock - Try spin lock and return a result
@ -318,11 +317,4 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 	return 0;
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif	/* _ASM_M32R_SPINLOCK_H */
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@ -16,13 +16,4 @@
 * locked.
 */

-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-#define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SPINLOCK_H */
--- a/arch/metag/include/asm/spinlock_lnkget.h
+++ b/arch/metag/include/asm/spinlock_lnkget.h
@ -243,8 +243,4 @@ static inline int arch_read_can_lock(arch_rwlock_t *rw)
 #define	arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define	arch_write_lock_flags(lock, flags) arch_write_lock(lock)

-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SPINLOCK_LNKGET_H */
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@ -13,11 +13,4 @@
 #include <asm/qrwlock.h>
 #include <asm/qspinlock.h>

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ASM_SPINLOCK_H */
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@ -84,6 +84,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 		: "d" (flags), "a"(&lock->slock), "i"(EPSW_IE | MN10300_CLI_LEVEL)
 		: "memory", "cc");
 }
+#define arch_spin_lock_flags	arch_spin_lock_flags

 #ifdef __KERNEL__

@ -183,9 +184,6 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 	return 0;
 }

-#define arch_read_lock_flags(lock, flags)  arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #define _raw_spin_relax(lock)	cpu_relax()
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@ -161,6 +161,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 		local_irq_restore(flags_dis);
 	}
 }
+#define arch_spin_lock_flags arch_spin_lock_flags

 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
@ -302,9 +303,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	rw->lock = 0;
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #define arch_spin_relax(lock)	__spin_yield(lock)
 #define arch_read_relax(lock)	__rw_yield(lock)
 #define arch_write_relax(lock)	__rw_yield(lock)
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@ -46,6 +46,7 @@ static inline void arch_spin_relax(arch_spinlock_t *lock)
 {
 	arch_lock_relax(lock->lock);
 }
+#define arch_spin_relax		arch_spin_relax

 static inline u32 arch_spin_lockval(int cpu)
 {
@ -81,6 +82,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
 	if (!arch_spin_trylock_once(lp))
 		arch_spin_lock_wait_flags(lp, flags);
 }
+#define arch_spin_lock_flags	arch_spin_lock_flags

 static inline int arch_spin_trylock(arch_spinlock_t *lp)
 {
@ -126,9 +128,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
 extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
 extern int _raw_write_trylock_retry(arch_rwlock_t *lp);

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 static inline int arch_read_trylock_once(arch_rwlock_t *rw)
 {
 	int old = ACCESS_ONCE(rw->lock);
@ -269,10 +268,12 @@ static inline void arch_read_relax(arch_rwlock_t *rw)
 {
 	arch_lock_relax(rw->owner);
 }
+#define arch_read_relax		arch_read_relax

 static inline void arch_write_relax(arch_rwlock_t *rw)
 {
 	arch_lock_relax(rw->owner);
 }
+#define arch_write_relax	arch_write_relax

 #endif /* __ASM_SPINLOCK_H */
--- a/arch/sh/include/asm/spinlock-cas.h
+++ b/arch/sh/include/asm/spinlock-cas.h
@ -27,7 +27,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
 */

 #define arch_spin_is_locked(x)		((x)->lock <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)

 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
@ -102,11 +101,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SH_SPINLOCK_CAS_H */
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@ -19,7 +19,6 @@
 */

 #define arch_spin_is_locked(x)		((x)->lock <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)

 /*
 * Simple spin lock operations.  There are two variants, one clears IRQ's
@ -209,11 +208,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	return (oldval > (RW_LOCK_BIAS - 1));
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SH_SPINLOCK_LLSC_H */
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@ -51,9 +51,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)

 void arch_spin_lock(arch_spinlock_t *lock);

-/* We cannot take an interrupt after getting a ticket, so don't enable them. */
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
 int arch_spin_trylock(arch_spinlock_t *lock);

 static inline void arch_spin_unlock(arch_spinlock_t *lock)
@ -125,7 +122,4 @@ void arch_read_unlock(arch_rwlock_t *rwlock);
 */
 void arch_write_unlock(arch_rwlock_t *rwlock);

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #endif /* _ASM_TILE_SPINLOCK_32_H */
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@ -75,9 +75,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 /* Try to get the lock, and return whether we succeeded. */
 int arch_spin_trylock(arch_spinlock_t *lock);

-/* We cannot take an interrupt after getting a ticket, so don't enable them. */
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
 /*
 * Read-write spinlocks, allowing multiple readers
 * but only one writer.
@ -156,7 +153,4 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	return 0;
 }

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #endif /* _ASM_TILE_SPINLOCK_64_H */
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@ -42,11 +42,4 @@

 #include <asm/qrwlock.h>

-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ASM_X86_SPINLOCK_H */
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@ -33,8 +33,6 @@

 #define arch_spin_is_locked(x) ((x)->slock != 0)

-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
@ -200,7 +198,4 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 			: "memory");
 }

-#define arch_read_lock_flags(lock, flags)	arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags)	arch_write_lock(lock)
-
 #endif	/* _XTENSA_SPINLOCK_H */
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;

-	return likely(atomic_cmpxchg_acquire(&lock->cnts,
-					     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+				_QW_LOCKED));
 }
 /**
 * queued_read_lock - acquire read lock of a queue rwlock
@ -114,8 +114,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
 */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+	u32 cnts = 0;
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
 		return;

 	queued_write_lock_slowpath(lock);
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@ -26,7 +26,6 @@
 * @lock: Pointer to queued spinlock structure
 * Return: 1 if it is locked, 0 otherwise
 */
-#ifndef queued_spin_is_locked
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	/*
@ -35,7 +34,6 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 	 */
 	return atomic_read(&lock->val);
 }
-#endif

 /**
 * queued_spin_value_unlocked - is the spinlock structure unlocked?
@ -68,10 +66,12 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (!atomic_read(&lock->val) &&
-	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-		return 1;
-	return 0;
+	u32 val = atomic_read(&lock->val);
+
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
 }

 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@ -82,11 +82,11 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	u32 val;
+	u32 val = 0;

-	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-	if (likely(val == 0))
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
+
 	queued_spin_lock_slowpath(lock, val);
 }

@ -100,7 +100,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 	/*
 	 * unlock() needs release semantics:
 	 */
-	(void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
+	smp_store_release(&lock->locked, 0);
 }
 #endif

@ -121,6 +121,5 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 #define arch_spin_lock(l)		queued_spin_lock(l)
 #define arch_spin_trylock(l)		queued_spin_trylock(l)
 #define arch_spin_unlock(l)		queued_spin_unlock(l)
-#define arch_spin_lock_flags(l, f)	queued_spin_lock(l)

 #endif /* __ASM_GENERIC_QSPINLOCK_H */
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@ -142,9 +142,6 @@ extern void __mutex_init(struct mutex *lock, const char *name,
 */
 static inline int mutex_is_locked(struct mutex *lock)
 {
-	/*
-	 * XXX think about spin_is_locked
-	 */
 	return __mutex_owner(lock) != NULL;
 }

--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@ -38,6 +38,15 @@ do {								\
 extern int do_raw_write_trylock(rwlock_t *lock);
 extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
+
+#ifndef arch_read_lock_flags
+# define arch_read_lock_flags(lock, flags)	arch_read_lock(lock)
+#endif
+
+#ifndef arch_write_lock_flags
+# define arch_write_lock_flags(lock, flags)	arch_write_lock(lock)
+#endif
+
 # define do_raw_read_lock(rwlock)	do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
 # define do_raw_read_lock_flags(lock, flags) \
 		do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@ -166,6 +166,10 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 	arch_spin_lock(&lock->raw_lock);
 }

+#ifndef arch_spin_lock_flags
+#define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
+#endif
+
 static inline void
 do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
 {
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@ -32,14 +32,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	barrier();
 }

-static inline void
-arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	local_irq_save(flags);
-	lock->slock = 0;
-	barrier();
-}
-
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	char oldval = lock->slock;
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@ -12,11 +12,11 @@
 * GNU General Public License for more details.
 *
 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
- * (C) Copyright 2013-2014 Red Hat, Inc.
+ * (C) Copyright 2013-2014,2018 Red Hat, Inc.
 * (C) Copyright 2015 Intel Corp.
 * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
 *
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
 *          Peter Zijlstra <peterz@infradead.org>
 */

@ -32,6 +32,11 @@
 #include <asm/byteorder.h>
 #include <asm/qspinlock.h>

+/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
 /*
 * The basic principle of a queue-based spinlock can best be understood
 * by studying a classic queue-based spinlock implementation called the
@ -69,12 +74,24 @@
 */

 #include "mcs_spinlock.h"
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define MAX_NODES	8
-#else
 #define MAX_NODES	4
+
+/*
+ * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
+ * size and four of them will fit nicely in one 64-byte cacheline. For
+ * pvqspinlock, however, we need more space for extra data. To accommodate
+ * that, we insert two more long words to pad it up to 32 bytes. IOW, only
+ * two of them can fit in a cacheline in this case. That is OK as it is rare
+ * to have more than 2 levels of slowpath nesting in actual use. We don't
+ * want to penalize pvqspinlocks to optimize for a rare case in native
+ * qspinlocks.
+ */
+struct qnode {
+	struct mcs_spinlock mcs;
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+	long reserved[2];
 #endif
+};

 /*
 * The pending bit spinning loop count.
@ -96,7 +113,7 @@
 *
 * PV doubles the storage and uses the second cacheline for PV state.
 */
-static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
+static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);

 /*
 * We must be able to distinguish between no-tail and the tail at 0:0,
@ -107,9 +124,6 @@ static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;

-#ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(idx > 3);
-#endif
 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */

@ -121,7 +135,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;

-	return per_cpu_ptr(&mcs_nodes[idx], cpu);
+	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
+}
+
+static inline __pure
+struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
+{
+	return &((struct qnode *)base + idx)->mcs;
 }

 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@ -164,10 +184,10 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	/*
-	 * Use release semantics to make sure that the MCS node is properly
-	 * initialized before changing the tail code.
+	 * We can use relaxed semantics since the caller ensures that the
+	 * MCS node is properly initialized before updating the tail.
 	 */
-	return (u32)xchg_release(&lock->tail,
+	return (u32)xchg_relaxed(&lock->tail,
 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }

@ -212,10 +232,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 	for (;;) {
 		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
 		/*
-		 * Use release semantics to make sure that the MCS node is
-		 * properly initialized before changing the tail code.
+		 * We can use relaxed semantics since the caller ensures that
+		 * the MCS node is properly initialized before updating the
+		 * tail.
 		 */
-		old = atomic_cmpxchg_release(&lock->val, val, new);
+		old = atomic_cmpxchg_relaxed(&lock->val, val, new);
 		if (old == val)
 			break;

@ -308,7 +329,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));

 	if (pv_enabled())
-		goto queue;
+		goto pv_queue;

 	if (virt_spin_lock(lock))
 		return;
@ -334,17 +355,23 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	/*
 	 * trylock || pending
 	 *
-	 * 0,0,0 -> 0,0,1 ; trylock
-	 * 0,0,1 -> 0,1,1 ; pending
+	 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
 	 */
 	val = queued_fetch_set_pending_acquire(lock);

 	/*
-	 * If we observe any contention; undo and queue.
+	 * If we observe contention, there is a concurrent locker.
+	 *
+	 * Undo and queue; our setting of PENDING might have made the
+	 * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+	 * on @next to become !NULL.
 	 */
 	if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+		/* Undo PENDING if we set it. */
 		if (!(val & _Q_PENDING_MASK))
 			clear_pending(lock);
+
 		goto queue;
 	}

@ -360,7 +387,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * barriers.
 	 */
 	if (val & _Q_LOCKED_MASK)
-		smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
+		atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));

 	/*
 	 * take ownership and clear the pending bit.
@ -368,6 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * 0,1,0 -> 0,0,1
 	 */
 	clear_pending_set_locked(lock);
+	qstat_inc(qstat_lock_pending, true);
 	return;

 	/*
@ -375,11 +403,34 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * queuing.
 	 */
 queue:
-	node = this_cpu_ptr(&mcs_nodes[0]);
+	qstat_inc(qstat_lock_slowpath, true);
+pv_queue:
+	node = this_cpu_ptr(&qnodes[0].mcs);
 	idx = node->count++;
 	tail = encode_tail(smp_processor_id(), idx);

-	node += idx;
+	/*
+	 * 4 nodes are allocated based on the assumption that there will
+	 * not be nested NMIs taking spinlocks. That may not be true in
+	 * some architectures even though the chance of needing more than
+	 * 4 nodes will still be extremely unlikely. When that happens,
+	 * we fall back to spinning on the lock directly without using
+	 * any MCS node. This is not the most elegant solution, but is
+	 * simple enough.
+	 */
+	if (unlikely(idx >= MAX_NODES)) {
+		qstat_inc(qstat_lock_no_node, true);
+		while (!queued_spin_trylock(lock))
+			cpu_relax();
+		goto release;
+	}
+
+	node = grab_mcs_node(node, idx);
+
+	/*
+	 * Keep counts of non-zero index values:
+	 */
+	qstat_inc(qstat_lock_use_node2 + idx - 1, idx);

 	/*
 	 * Ensure that we increment the head node->count before initialising
@ -401,12 +452,18 @@ queue:
 		goto release;

 	/*
+	 * Ensure that the initialisation of @node is complete before we
+	 * publish the updated tail via xchg_tail() and potentially link
+	 * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+	 */
+	smp_wmb();
+
+	/*
+	 * Publish the updated tail.
 	 * We have already touched the queueing cacheline; don't bother with
 	 * pending stuff.
 	 *
 	 * p,*,* -> n,*,*
-	 *
-	 * RELEASE, such that the stores to @node must be complete.
 	 */
 	old = xchg_tail(lock, tail);
 	next = NULL;
@ -418,14 +475,8 @@ queue:
 	if (old & _Q_TAIL_MASK) {
 		prev = decode_tail(old);

-		/*
-		 * We must ensure that the stores to @node are observed before
-		 * the write to prev->next. The address dependency from
-		 * xchg_tail is not sufficient to ensure this because the read
-		 * component of xchg_tail is unordered with respect to the
-		 * initialisation of @node.
-		 */
-		smp_store_release(&prev->next, node);
+		/* Link @node into the waitqueue. */
+		WRITE_ONCE(prev->next, node);

 		pv_wait_node(node, prev);
 		arch_mcs_spin_lock_contended(&node->locked);
@ -454,8 +505,8 @@ queue:
 	 *
 	 * The PV pv_wait_head_or_lock function, if active, will acquire
 	 * the lock and return a non-zero value. So we have to skip the
-	 * smp_cond_load_acquire() call. As the next PV queue head hasn't been
-	 * designated yet, there is no way for the locked value to become
+	 * atomic_cond_read_acquire() call. As the next PV queue head hasn't
+	 * been designated yet, there is no way for the locked value to become
 	 * _Q_SLOW_VAL. So both the set_locked() and the
 	 * atomic_cmpxchg_relaxed() calls will be safe.
 	 *
@ -465,7 +516,7 @@ queue:
 	if ((val = pv_wait_head_or_lock(lock, node)))
 		goto locked;

-	val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
+	val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));

 locked:
 	/*
@ -479,27 +530,33 @@ locked:
 	 * Otherwise, we only need to grab the lock.
 	 */

-	/* In the PV case we might already have _Q_LOCKED_VAL set */
+	/*
+	 * In the PV case we might already have _Q_LOCKED_VAL set, because
+	 * of lock stealing; therefore we must also allow:
+	 *
+	 * n,0,1 -> 0,0,1
+	 *
+	 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+	 *       above wait condition, therefore any concurrent setting of
+	 *       PENDING will make the uncontended transition fail.
+	 */
 	if ((val & _Q_TAIL_MASK) == tail) {
-		/*
-		 * The smp_cond_load_acquire() call above has provided the
-		 * necessary acquire semantics required for locking.
-		 */
-		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
-		if (old == val)
+		if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
 			goto release; /* No contention */
 	}

-	/* Either somebody is queued behind us or _Q_PENDING_VAL is set */
+	/*
+	 * Either somebody is queued behind us or _Q_PENDING_VAL got set
+	 * which will then detect the remaining tail and queue behind us
+	 * ensuring we'll see a @next.
+	 */
 	set_locked(lock);

 	/*
 	 * contended path; wait for next if not observed yet, release.
 	 */
-	if (!next) {
-		while (!(next = READ_ONCE(node->next)))
-			cpu_relax();
-	}
+	if (!next)
+		next = smp_cond_load_relaxed(&node->next, (VAL));

 	arch_mcs_spin_unlock_contended(&next->locked);
 	pv_kick_node(lock, next);
@ -508,7 +565,7 @@ release:
 	/*
 	 * release the node
 	 */
-	__this_cpu_dec(mcs_nodes[0].count);
+	__this_cpu_dec(qnodes[0].mcs.count);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);

--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@ -49,17 +49,10 @@ enum vcpu_state {

 struct pv_node {
 	struct mcs_spinlock	mcs;
-	struct mcs_spinlock	__res[3];
-
 	int			cpu;
 	u8			state;
 };

-/*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
 /*
 * By replacing the regular queued_spin_trylock() with the function below,
 * it will be called once when a lock waiter enter the PV slowpath before
@ -257,7 +250,7 @@ static void pv_init_node(struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;

-	BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
+	BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode));

 	pn->cpu = smp_processor_id();
 	pn->state = vcpu_running;
@ -394,7 +387,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 	/*
 	 * Tracking # of slowpath locking operations
 	 */
-	qstat_inc(qstat_pv_lock_slowpath, true);
+	qstat_inc(qstat_lock_slowpath, true);

 	for (;; waitcnt++) {
 		/*
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@ -22,13 +22,21 @@
 *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
 *   pv_latency_kick	- average latency (ns) of vCPU kick operation
 *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
- *   pv_lock_slowpath	- # of locking operations via the slowpath
 *   pv_lock_stealing	- # of lock stealing operations
 *   pv_spurious_wakeup	- # of spurious wakeups in non-head vCPUs
 *   pv_wait_again	- # of wait's after a queue head vCPU kick
 *   pv_wait_early	- # of early vCPU wait's
 *   pv_wait_head	- # of vCPU wait's at the queue head
 *   pv_wait_node	- # of vCPU wait's at a non-head queue node
+ *   lock_pending	- # of locking operations via pending code
+ *   lock_slowpath	- # of locking operations via MCS lock queue
+ *   lock_use_node2	- # of locking operations that use 2nd per-CPU node
+ *   lock_use_node3	- # of locking operations that use 3rd per-CPU node
+ *   lock_use_node4	- # of locking operations that use 4th per-CPU node
+ *   lock_no_node	- # of locking operations without using per-CPU node
+ *
+ * Subtracting lock_use_node[234] from lock_slowpath will give you
+ * lock_use_node1.
 *
 * Writing to the "reset_counters" file will reset all the above counter
 * values.
@ -46,13 +54,18 @@ enum qlock_stats {
 	qstat_pv_kick_wake,
 	qstat_pv_latency_kick,
 	qstat_pv_latency_wake,
-	qstat_pv_lock_slowpath,
 	qstat_pv_lock_stealing,
 	qstat_pv_spurious_wakeup,
 	qstat_pv_wait_again,
 	qstat_pv_wait_early,
 	qstat_pv_wait_head,
 	qstat_pv_wait_node,
+	qstat_lock_pending,
+	qstat_lock_slowpath,
+	qstat_lock_use_node2,
+	qstat_lock_use_node3,
+	qstat_lock_use_node4,
+	qstat_lock_no_node,
 	qstat_num,	/* Total number of statistical counters */
 	qstat_reset_cnts = qstat_num,
 };
@ -73,12 +86,17 @@ static const char * const qstat_names[qstat_num + 1] = {
 	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
 	[qstat_pv_latency_kick]	   = "pv_latency_kick",
 	[qstat_pv_latency_wake]    = "pv_latency_wake",
-	[qstat_pv_lock_slowpath]   = "pv_lock_slowpath",
 	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
 	[qstat_pv_wait_again]      = "pv_wait_again",
 	[qstat_pv_wait_early]      = "pv_wait_early",
 	[qstat_pv_wait_head]       = "pv_wait_head",
 	[qstat_pv_wait_node]       = "pv_wait_node",
+	[qstat_lock_pending]       = "lock_pending",
+	[qstat_lock_slowpath]      = "lock_slowpath",
+	[qstat_lock_use_node2]	   = "lock_use_node2",
+	[qstat_lock_use_node3]	   = "lock_use_node3",
+	[qstat_lock_use_node4]	   = "lock_use_node4",
+	[qstat_lock_no_node]	   = "lock_no_node",
 	[qstat_reset_cnts]         = "reset_counters",
 };