kernel_optimize_test/include/linux/percpu-rwsem.h
Waiman Long 5a817641f6 locking/percpu-rwsem: Annotate rwsem ownership transfer by setting RWSEM_OWNER_UNKNOWN
The filesystem freezing code needs to transfer ownership of a rwsem
embedded in a percpu-rwsem from the task that does the freezing to
another one that does the thawing by calling percpu_rwsem_release()
after freezing and percpu_rwsem_acquire() before thawing.

However, the new rwsem debug code runs afoul with this scheme by warning
that the task that releases the rwsem isn't the one that acquires it,
as reported by Amir Goldstein:

  DEBUG_LOCKS_WARN_ON(sem->owner != get_current())
  WARNING: CPU: 1 PID: 1401 at /home/amir/build/src/linux/kernel/locking/rwsem.c:133 up_write+0x59/0x79

  Call Trace:
   percpu_up_write+0x1f/0x28
   thaw_super_locked+0xdf/0x120
   do_vfs_ioctl+0x270/0x5f1
   ksys_ioctl+0x52/0x71
   __x64_sys_ioctl+0x16/0x19
   do_syscall_64+0x5d/0x167
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

To work properly with the rwsem debug code, we need to annotate that the
rwsem ownership is unknown during the tranfer period until a brave soul
comes forward to acquire the ownership. During that period, optimistic
spinning will be disabled.

Reported-by: Amir Goldstein <amir73il@gmail.com>
Tested-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jan Kara <jack@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Theodore Y. Ts'o <tytso@mit.edu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-fsdevel@vger.kernel.org
Link: http://lkml.kernel.org/r/1526420991-21213-3-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-05-16 11:45:16 +02:00

151 lines
4.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PERCPU_RWSEM_H
#define _LINUX_PERCPU_RWSEM_H
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/rcuwait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
unsigned int __percpu *read_count;
struct rw_semaphore rw_sem; /* slowpath */
struct rcuwait writer; /* blocked writer */
int readers_block;
};
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
}
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);
static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
preempt_disable();
/*
* We are in an RCU-sched read-side critical section, so the writer
* cannot both change sem->state from readers_fast and start checking
* counters while we are here. So if we see !sem->state, we know that
* the writer won't be checking until we're past the preempt_enable()
* and that one the synchronize_sched() is done, the writer will see
* anything we did within this RCU-sched read-size critical section.
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
__percpu_down_read(sem, false); /* Unconditional memory barrier */
barrier();
/*
* The barrier() prevents the compiler from
* bleeding the critical section out.
*/
}
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
{
percpu_down_read_preempt_disable(sem);
preempt_enable();
}
static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
int ret = 1;
preempt_disable();
/*
* Same as in percpu_down_read().
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
preempt_enable();
/*
* The barrier() from preempt_enable() prevents the compiler from
* bleeding the critical section out.
*/
if (ret)
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
return ret;
}
static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
{
/*
* The barrier() prevents the compiler from
* bleeding the critical section out.
*/
barrier();
/*
* Same as in percpu_down_read().
*/
if (likely(rcu_sync_is_idle(&sem->rss)))
__this_cpu_dec(*sem->read_count);
else
__percpu_up_read(sem); /* Unconditional memory barrier */
preempt_enable();
rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
}
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
preempt_disable();
percpu_up_read_preempt_enable(sem);
}
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
const char *, struct lock_class_key *);
extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
#define percpu_init_rwsem(sem) \
({ \
static struct lock_class_key rwsem_key; \
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
})
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
#define percpu_rwsem_assert_held(sem) \
lockdep_assert_held(&(sem)->rw_sem)
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_release(&sem->rw_sem.dep_map, 1, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
#endif
}
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
sem->rw_sem.owner = current;
#endif
}
#endif