forked from luck/tmp_suning_uos_patched
4f23dbc1e6
Because of writer lock stealing, it is possible that a constant stream of incoming writers will cause a waiting writer or reader to wait indefinitely leading to lock starvation. This patch implements a lock handoff mechanism to disable lock stealing and force lock handoff to the first waiter or waiters (for readers) in the queue after at least a 4ms waiting period unless it is a RT writer task which doesn't need to wait. The waiting period is used to avoid discouraging lock stealing too much to affect performance. The setting and clearing of the handoff bit is serialized by the wait_lock. So racing is not possible. A rwsem microbenchmark was run for 5 seconds on a 2-socket 40-core 80-thread Skylake system with a v5.1 based kernel and 240 write_lock threads with 5us sleep critical section. Before the patch, the min/mean/max numbers of locking operations for the locking threads were 1/7,792/173,696. After the patch, the figures became 5,842/6,542/7,458. It can be seen that the rwsem became much more fair, though there was a drop of about 16% in the mean locking operations done which was a tradeoff of having better fairness. Making the waiter set the handoff bit right after the first wakeup can impact performance especially with a mixed reader/writer workload. With the same microbenchmark with short critical section and equal number of reader and writer threads (40/40), the reader/writer locking operation counts with the current patch were: 40 readers, Iterations Min/Mean/Max = 1,793/1,794/1,796 40 writers, Iterations Min/Mean/Max = 1,793/34,956/86,081 By making waiter set handoff bit immediately after wakeup: 40 readers, Iterations Min/Mean/Max = 43/44/46 40 writers, Iterations Min/Mean/Max = 43/1,263/3,191 Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Will Deacon <will.deacon@arm.com> Cc: huang ying <huang.ying.caritas@gmail.com> Link: https://lkml.kernel.org/r/20190520205918.22251-8-longman@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
68 lines
3.0 KiB
C
68 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* Authors: Waiman Long <longman@redhat.com>
|
|
*/
|
|
|
|
#ifndef LOCK_EVENT
|
|
#define LOCK_EVENT(name) LOCKEVENT_ ## name,
|
|
#endif
|
|
|
|
#ifdef CONFIG_QUEUED_SPINLOCKS
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
/*
|
|
* Locking events for PV qspinlock.
|
|
*/
|
|
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
|
|
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
|
|
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
|
|
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
|
|
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
|
|
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
|
|
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
|
|
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
|
|
LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
|
|
LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
|
|
LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
|
|
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
|
|
|
|
/*
|
|
* Locking events for qspinlock
|
|
*
|
|
* Subtracting lock_use_node[234] from lock_slowpath will give you
|
|
* lock_use_node1.
|
|
*/
|
|
LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
|
|
LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
|
|
LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
|
|
LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
|
|
LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
|
|
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
|
|
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
|
|
|
/*
|
|
* Locking events for rwsem
|
|
*/
|
|
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
|
|
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
|
|
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
|
|
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
|
|
LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
|
|
LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
|
|
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
|
|
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
|
|
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
|
|
LOCK_EVENT(rwsem_rlock_handoff) /* # of read lock handoffs */
|
|
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
|
|
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
|
|
LOCK_EVENT(rwsem_wlock_handoff) /* # of write lock handoffs */
|