diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index c427ffaa4904..fa3291f9b38a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -42,6 +42,10 @@ struct rw_semaphore { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE + /* count for waiters preempt to queue in wait list */ + long m_count; +#endif }; /* @@ -83,12 +87,19 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #define __RWSEM_OPT_INIT(lockname) #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE +#define __RWSEM_PRIO_AWARE_INIT(lockname) .m_count = 0 +#else +#define __RWSEM_PRIO_AWARE_INIT(lockname) +#endif + #define __RWSEM_INITIALIZER(name) \ { __RWSEM_INIT_COUNT(name), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_DEP_MAP_INIT(name), \ + __RWSEM_PRIO_AWARE_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 84d882f3e299..621c296fe8f8 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -248,3 +248,7 @@ config ARCH_USE_QUEUED_RWLOCKS config QUEUED_RWLOCKS def_bool y if ARCH_USE_QUEUED_RWLOCKS depends on SMP + +config RWSEM_PRIO_AWARE + def_bool y + depends on RWSEM_XCHGADD_ALGORITHM diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a90336779375..8b41d8883dce 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -90,21 +90,13 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->owner = NULL; osq_lock_init(&sem->osq); #endif +#ifdef CONFIG_RWSEM_PRIO_AWARE + sem->m_count = 0; +#endif } EXPORT_SYMBOL(__init_rwsem); -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - enum rwsem_wake_type { RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ RWSEM_WAKE_READERS, /* Wake readers only */ @@ -228,6 +220,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); + bool is_first_waiter = false; waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; @@ -235,7 +228,9 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); + + /* is_first_waiter == true means we are first in the queue */ + is_first_waiter = rwsem_list_add_per_prio(&waiter, sem); /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); @@ -248,7 +243,8 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) */ if (count == RWSEM_WAITING_BIAS || (count > RWSEM_WAITING_BIAS && - adjustment != -RWSEM_ACTIVE_READ_BIAS)) + (adjustment != -RWSEM_ACTIVE_READ_BIAS || + is_first_waiter))) __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); @@ -498,6 +494,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; DEFINE_WAKE_Q(wake_q); + bool is_first_waiter = false; /* undo write bias from down_write operation, stop active locking */ count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); @@ -519,7 +516,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) if (list_empty(&sem->wait_list)) waiting = false; - list_add_tail(&waiter.list, &sem->wait_list); + /* + * is_first_waiter == true means we are first in the queue, + * so there is no read locks that were queued ahead of us. + */ + is_first_waiter = rwsem_list_add_per_prio(&waiter, sem); /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { @@ -530,7 +531,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * no active writers, the lock must be read owned; so we try to * wake any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS) { + if (!is_first_waiter && count > RWSEM_WAITING_BIAS) { __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* * The wakeup is normally called _after_ the wait_lock diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 410ee7b9ac2c..df2d6ee84d26 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -20,6 +20,17 @@ #define RWSEM_ANONYMOUSLY_OWNED (1UL << 0) #define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED) +enum rwsem_waiter_type { + RWSEM_WAITING_FOR_WRITE, + RWSEM_WAITING_FOR_READ +}; + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + enum rwsem_waiter_type type; +}; + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * All writes to owner are protected by WRITE_ONCE() to make sure that @@ -79,3 +90,60 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) { } #endif + +#ifdef CONFIG_RWSEM_PRIO_AWARE + +#define RWSEM_MAX_PREEMPT_ALLOWED 3000 + +/* + * Return true if current waiter is added in the front of the rwsem wait list. + */ +static inline bool rwsem_list_add_per_prio(struct rwsem_waiter *waiter_in, + struct rw_semaphore *sem) +{ + struct list_head *pos; + struct list_head *head; + struct rwsem_waiter *waiter = NULL; + + pos = head = &sem->wait_list; + /* + * Rules for task prio aware rwsem wait list queueing: + * 1: Only try to preempt waiters with which task priority + * which is higher than DEFAULT_PRIO. + * 2: To avoid starvation, add count to record + * how many high priority waiters preempt to queue in wait + * list. + * If preempt count is exceed RWSEM_MAX_PREEMPT_ALLOWED, + * use simple fifo until wait list is empty. + */ + if (list_empty(head)) { + list_add_tail(&waiter_in->list, head); + sem->m_count = 0; + return true; + } + + if (waiter_in->task->prio < DEFAULT_PRIO + && sem->m_count < RWSEM_MAX_PREEMPT_ALLOWED) { + + list_for_each(pos, head) { + waiter = list_entry(pos, struct rwsem_waiter, list); + if (waiter->task->prio > waiter_in->task->prio) { + list_add(&waiter_in->list, pos->prev); + sem->m_count++; + return &waiter_in->list == head->next; + } + } + } + + list_add_tail(&waiter_in->list, head); + + return false; +} +#else +static inline bool rwsem_list_add_per_prio(struct rwsem_waiter *waiter_in, + struct rw_semaphore *sem) +{ + list_add_tail(&waiter_in->list, &sem->wait_list); + return false; +} +#endif