From f9b2f0067a178b2ef93c5191ae2bcbeffda9fbeb Mon Sep 17 00:00:00 2001 From: Maria Yu Date: Thu, 29 Nov 2018 13:57:53 +0800 Subject: [PATCH] locking/rwsem: for rwsem prio aware enhancement When add into waiting list will be prio aware, lower prio value means higher priority task will get lock before lower priority task. Only try to preempt waiters with which task priority which is higher than DEFAULT_PRIO. To avoid starvation, add count to record how many waiters preempt to queue in wait list. If preempt count is exceed MAX_PREEMPT_ALLOWED, use simple FIFO to queue in the wait list until the wait list is empty. Change-Id: I4d5fe6a823a16c9762e2e2f416d34bdd701341c4 Signed-off-by: Maria Yu --- include/linux/rwsem.h | 13 ++++++- kernel/Kconfig.locks | 4 +++ kernel/locking/rwsem-xadd.c | 31 +++++++++-------- kernel/locking/rwsem.h | 68 +++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 16 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c427ffaa4904..fa3291f9b38a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -42,6 +42,10 @@ struct rw_semaphore { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE + /* count for waiters preempt to queue in wait list */ + long m_count; +#endif }; /* @@ -83,12 +87,19 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #define __RWSEM_OPT_INIT(lockname) #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE +#define __RWSEM_PRIO_AWARE_INIT(lockname) .m_count = 0 +#else +#define __RWSEM_PRIO_AWARE_INIT(lockname) +#endif + #define __RWSEM_INITIALIZER(name) \ { __RWSEM_INIT_COUNT(name), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_DEP_MAP_INIT(name), \ + __RWSEM_PRIO_AWARE_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 84d882f3e299..621c296fe8f8 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -248,3 +248,7 @@ config ARCH_USE_QUEUED_RWLOCKS config QUEUED_RWLOCKS def_bool y if ARCH_USE_QUEUED_RWLOCKS depends on SMP + +config RWSEM_PRIO_AWARE + def_bool y + depends on RWSEM_XCHGADD_ALGORITHM diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a90336779375..8b41d8883dce 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -90,21 +90,13 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->owner = NULL; osq_lock_init(&sem->osq); #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE + sem->m_count = 0; +#endif } EXPORT_SYMBOL(__init_rwsem); -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - enum rwsem_wake_type { RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ RWSEM_WAKE_READERS, /* Wake readers only */ @@ -228,6 +220,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); + bool is_first_waiter = false; waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; @@ -235,7 +228,9 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); + + /* is_first_waiter == true means we are first in the queue */ + is_first_waiter = rwsem_list_add_per_prio(&waiter, sem); /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); @@ -248,7 +243,8 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) */ if (count == RWSEM_WAITING_BIAS || (count > RWSEM_WAITING_BIAS && - adjustment != -RWSEM_ACTIVE_READ_BIAS)) + (adjustment != -RWSEM_ACTIVE_READ_BIAS || + is_first_waiter))) __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); @@ -498,6 +494,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; DEFINE_WAKE_Q(wake_q); + bool is_first_waiter = false; /* undo write bias from down_write operation, stop active locking */ count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); @@ -519,7 +516,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) if (list_empty(&sem->wait_list)) waiting = false; - list_add_tail(&waiter.list, &sem->wait_list); + /* + * is_first_waiter == true means we are first in the queue, + * so there is no read locks that were queued ahead of us. + */ + is_first_waiter = rwsem_list_add_per_prio(&waiter, sem); /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { @@ -530,7 +531,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * no active writers, the lock must be read owned; so we try to * wake any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS) { + if (!is_first_waiter && count > RWSEM_WAITING_BIAS) { __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* * The wakeup is normally called _after_ the wait_lock diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 410ee7b9ac2c..df2d6ee84d26 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -20,6 +20,17 @@ #define RWSEM_ANONYMOUSLY_OWNED (1UL << 0) #define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED) +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + enum rwsem_waiter_type type; +}; + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * All writes to owner are protected by WRITE_ONCE() to make sure that @@ -79,3 +90,60 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) { } #endif + +#ifdef CONFIG_RWSEM_PRIO_AWARE + +#define RWSEM_MAX_PREEMPT_ALLOWED 3000 + +/* + * Return true if current waiter is added in the front of the rwsem wait list. + */ +static inline bool rwsem_list_add_per_prio(struct rwsem_waiter *waiter_in, + struct rw_semaphore *sem) +{ + struct list_head *pos; + struct list_head *head; + struct rwsem_waiter *waiter = NULL; + + pos = head = &sem->wait_list; + /* + * Rules for task prio aware rwsem wait list queueing: + * 1: Only try to preempt waiters with which task priority + * which is higher than DEFAULT_PRIO. + * 2: To avoid starvation, add count to record + * how many high priority waiters preempt to queue in wait + * list. + * If preempt count is exceed RWSEM_MAX_PREEMPT_ALLOWED, + * use simple fifo until wait list is empty. + */ + if (list_empty(head)) { + list_add_tail(&waiter_in->list, head); + sem->m_count = 0; + return true; + } + + if (waiter_in->task->prio < DEFAULT_PRIO + && sem->m_count < RWSEM_MAX_PREEMPT_ALLOWED) { + + list_for_each(pos, head) { + waiter = list_entry(pos, struct rwsem_waiter, list); + if (waiter->task->prio > waiter_in->task->prio) { + list_add(&waiter_in->list, pos->prev); + sem->m_count++; + return &waiter_in->list == head->next; + } + } + } + + list_add_tail(&waiter_in->list, head); + + return false; +} +#else +static inline bool rwsem_list_add_per_prio(struct rwsem_waiter *waiter_in, + struct rw_semaphore *sem) +{ + list_add_tail(&waiter_in->list, &sem->wait_list); + return false; +} +#endif