sh: add J2 atomics using the cas.l instruction

Signed-off-by: Rich Felker <dalias@libc.org>
This commit is contained in:
Rich Felker 2016-07-28 19:21:10 +00:00
parent 834da19705
commit 2b47d54ed4
9 changed files with 481 additions and 216 deletions

View File

@ -1,6 +1,12 @@
#ifndef __ASM_SH_ATOMIC_H
#define __ASM_SH_ATOMIC_H
#if defined(CONFIG_CPU_J2)
#include <asm-generic/atomic.h>
#else
/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
@ -63,4 +69,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
#endif /* CONFIG_CPU_J2 */
#endif /* __ASM_SH_ATOMIC_H */

View File

@ -29,6 +29,11 @@
#define wmb() mb()
#define ctrl_barrier() __icbi(PAGE_OFFSET)
#else
#if defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
#define __smp_mb() do { int tmp = 0; __asm__ __volatile__ ("cas.l %0,%0,@%1" : "+r"(tmp) : "z"(&tmp) : "memory", "t"); } while(0)
#define __smp_rmb() __smp_mb()
#define __smp_wmb() __smp_mb()
#endif
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif

View File

@ -0,0 +1,93 @@
#ifndef __ASM_SH_BITOPS_CAS_H
#define __ASM_SH_BITOPS_CAS_H
static inline unsigned __bo_cas(volatile unsigned *p, unsigned old, unsigned new)
{
__asm__ __volatile__("cas.l %1,%0,@r0"
: "+r"(new)
: "r"(old), "z"(p)
: "t", "memory" );
return new;
}
static inline void set_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old|mask) != old);
}
static inline void clear_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old&~mask) != old);
}
static inline void change_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old^mask) != old);
}
static inline int test_and_set_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old|mask) != old);
return !!(old & mask);
}
static inline int test_and_clear_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old&~mask) != old);
return !!(old & mask);
}
static inline int test_and_change_bit(int nr, volatile void *addr)
{
unsigned mask, old;
volatile unsigned *a = addr;
a += nr >> 5;
mask = 1U << (nr & 0x1f);
do old = *a;
while (__bo_cas(a, old, old^mask) != old);
return !!(old & mask);
}
#include <asm-generic/bitops/non-atomic.h>
#endif /* __ASM_SH_BITOPS_CAS_H */

View File

@ -18,6 +18,8 @@
#include <asm/bitops-op32.h>
#elif defined(CONFIG_CPU_SH4A)
#include <asm/bitops-llsc.h>
#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
#include <asm/bitops-cas.h>
#else
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>

View File

@ -0,0 +1,24 @@
#ifndef __ASM_SH_CMPXCHG_CAS_H
#define __ASM_SH_CMPXCHG_CAS_H
static inline unsigned long
__cmpxchg_u32(volatile u32 *m, unsigned long old, unsigned long new)
{
__asm__ __volatile__("cas.l %1,%0,@r0"
: "+r"(new)
: "r"(old), "z"(m)
: "t", "memory" );
return new;
}
static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
{
unsigned long old;
do old = *m;
while (__cmpxchg_u32(m, old, val) != old);
return old;
}
#include <asm/cmpxchg-xchg.h>
#endif /* __ASM_SH_CMPXCHG_CAS_H */

View File

@ -13,6 +13,8 @@
#include <asm/cmpxchg-grb.h>
#elif defined(CONFIG_CPU_SH4A)
#include <asm/cmpxchg-llsc.h>
#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
#include <asm/cmpxchg-cas.h>
#else
#include <asm/cmpxchg-irq.h>
#endif

View File

@ -0,0 +1,117 @@
/*
* include/asm-sh/spinlock-cas.h
*
* Copyright (C) 2015 SEI
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#ifndef __ASM_SH_SPINLOCK_CAS_H
#define __ASM_SH_SPINLOCK_CAS_H
#include <asm/barrier.h>
#include <asm/processor.h>
static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new)
{
__asm__ __volatile__("cas.l %1,%0,@r0"
: "+r"(new)
: "r"(old), "z"(p)
: "t", "memory" );
return new;
}
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_cond_load_acquire(&lock->lock, VAL > 0);
}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (!__sl_cas(&lock->lock, 1, 0));
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
__sl_cas(&lock->lock, 0, 1);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
return __sl_cas(&lock->lock, 1, 0);
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
*
* NOTE! it is quite common to have readers in interrupts but no interrupt
* writers. For those circumstances we can "mix" irq-safe locks - any writer
* needs to get a irq-safe write-lock, but readers can get non-irqsafe
* read-locks.
*/
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_read_can_lock(x) ((x)->lock > 0)
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned old;
do old = rw->lock;
while (!old || __sl_cas(&rw->lock, old, old-1) != old);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned old;
do old = rw->lock;
while (__sl_cas(&rw->lock, old, old+1) != old);
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
while (__sl_cas(&rw->lock, RW_LOCK_BIAS, 0) != RW_LOCK_BIAS);
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__sl_cas(&rw->lock, 0, RW_LOCK_BIAS);
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned old;
do old = rw->lock;
while (old && __sl_cas(&rw->lock, old, old-1) != old);
return !!old;
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
}
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif /* __ASM_SH_SPINLOCK_CAS_H */

View File

@ -0,0 +1,224 @@
/*
* include/asm-sh/spinlock-llsc.h
*
* Copyright (C) 2002, 2003 Paul Mundt
* Copyright (C) 2006, 2007 Akio Idehara
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#ifndef __ASM_SH_SPINLOCK_LLSC_H
#define __ASM_SH_SPINLOCK_LLSC_H
#include <asm/barrier.h>
#include <asm/processor.h>
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_cond_load_acquire(&lock->lock, VAL > 0);
}
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
* on the local processor, one does not.
*
* We make no fairness assumptions. They have a cost.
*/
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
unsigned long oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_spin_lock \n\t"
"mov %0, %1 \n\t"
"mov #0, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"cmp/pl %1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&lock->lock)
: "t", "memory"
);
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
unsigned long tmp;
__asm__ __volatile__ (
"mov #1, %0 ! arch_spin_unlock \n\t"
"mov.l %0, @%1 \n\t"
: "=&z" (tmp)
: "r" (&lock->lock)
: "t", "memory"
);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_spin_trylock \n\t"
"mov %0, %1 \n\t"
"mov #0, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&lock->lock)
: "t", "memory"
);
return oldval;
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
*
* NOTE! it is quite common to have readers in interrupts but no interrupt
* writers. For those circumstances we can "mix" irq-safe locks - any writer
* needs to get a irq-safe write-lock, but readers can get non-irqsafe
* read-locks.
*/
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_read_can_lock(x) ((x)->lock > 0)
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_read_lock \n\t"
"cmp/pl %0 \n\t"
"bf 1b \n\t"
"add #-1, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock)
: "t", "memory"
);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_read_unlock \n\t"
"add #1, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock)
: "t", "memory"
);
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_write_lock \n\t"
"cmp/hs %2, %0 \n\t"
"bf 1b \n\t"
"sub %2, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__asm__ __volatile__ (
"mov.l %1, @%0 ! arch_write_unlock \n\t"
:
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_read_trylock \n\t"
"mov %0, %1 \n\t"
"cmp/pl %0 \n\t"
"bf 2f \n\t"
"add #-1, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"2: \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&rw->lock)
: "t", "memory"
);
return (oldval > 0);
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_write_trylock \n\t"
"mov %0, %1 \n\t"
"cmp/hs %3, %0 \n\t"
"bf 2f \n\t"
"sub %3, %0 \n\t"
"2: \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
return (oldval > (RW_LOCK_BIAS - 1));
}
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif /* __ASM_SH_SPINLOCK_LLSC_H */

View File

@ -11,222 +11,12 @@
#ifndef __ASM_SH_SPINLOCK_H
#define __ASM_SH_SPINLOCK_H
/*
* The only locking implemented here uses SH-4A opcodes. For others,
* split this out as per atomic-*.h.
*/
#ifndef CONFIG_CPU_SH4A
#error "Need movli.l/movco.l for spinlocks"
#if defined(CONFIG_CPU_SH4A)
#include <asm/spinlock-llsc.h>
#elif defined(CONFIG_CPU_J2)
#include <asm/spinlock-cas.h>
#else
#error "The configured cpu type does not support spinlocks"
#endif
#include <asm/barrier.h>
#include <asm/processor.h>
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_cond_load_acquire(&lock->lock, VAL > 0);
}
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
* on the local processor, one does not.
*
* We make no fairness assumptions. They have a cost.
*/
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
unsigned long oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_spin_lock \n\t"
"mov %0, %1 \n\t"
"mov #0, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"cmp/pl %1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&lock->lock)
: "t", "memory"
);
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
unsigned long tmp;
__asm__ __volatile__ (
"mov #1, %0 ! arch_spin_unlock \n\t"
"mov.l %0, @%1 \n\t"
: "=&z" (tmp)
: "r" (&lock->lock)
: "t", "memory"
);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_spin_trylock \n\t"
"mov %0, %1 \n\t"
"mov #0, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&lock->lock)
: "t", "memory"
);
return oldval;
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
*
* NOTE! it is quite common to have readers in interrupts but no interrupt
* writers. For those circumstances we can "mix" irq-safe locks - any writer
* needs to get a irq-safe write-lock, but readers can get non-irqsafe
* read-locks.
*/
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_read_can_lock(x) ((x)->lock > 0)
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_read_lock \n\t"
"cmp/pl %0 \n\t"
"bf 1b \n\t"
"add #-1, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock)
: "t", "memory"
);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_read_unlock \n\t"
"add #1, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock)
: "t", "memory"
);
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%1, %0 ! arch_write_lock \n\t"
"cmp/hs %2, %0 \n\t"
"bf 1b \n\t"
"sub %2, %0 \n\t"
"movco.l %0, @%1 \n\t"
"bf 1b \n\t"
: "=&z" (tmp)
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__asm__ __volatile__ (
"mov.l %1, @%0 ! arch_write_unlock \n\t"
:
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_read_trylock \n\t"
"mov %0, %1 \n\t"
"cmp/pl %0 \n\t"
"bf 2f \n\t"
"add #-1, %0 \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"2: \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&rw->lock)
: "t", "memory"
);
return (oldval > 0);
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned long tmp, oldval;
__asm__ __volatile__ (
"1: \n\t"
"movli.l @%2, %0 ! arch_write_trylock \n\t"
"mov %0, %1 \n\t"
"cmp/hs %3, %0 \n\t"
"bf 2f \n\t"
"sub %3, %0 \n\t"
"2: \n\t"
"movco.l %0, @%2 \n\t"
"bf 1b \n\t"
"synco \n\t"
: "=&z" (tmp), "=&r" (oldval)
: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
: "t", "memory"
);
return (oldval > (RW_LOCK_BIAS - 1));
}
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif /* __ASM_SH_SPINLOCK_H */