kernel_optimize_test/arch/alpha/lib/memmove.S
Linus Torvalds 1da177e4c3 Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
2005-04-16 15:20:36 -07:00

182 lines
2.6 KiB
ArmAsm

/*
* arch/alpha/lib/memmove.S
*
* Barely optimized memmove routine for Alpha EV5.
*
* This is hand-massaged output from the original memcpy.c. We defer to
* memcpy whenever possible; the backwards copy loops are not unrolled.
*/
.set noat
.set noreorder
.text
.align 4
.globl memmove
.ent memmove
memmove:
ldgp $29, 0($27)
unop
nop
.prologue 1
addq $16,$18,$4
addq $17,$18,$5
cmpule $4,$17,$1 /* dest + n <= src */
cmpule $5,$16,$2 /* dest >= src + n */
bis $1,$2,$1
mov $16,$0
xor $16,$17,$2
bne $1,memcpy !samegp
and $2,7,$2 /* Test for src/dest co-alignment. */
and $16,7,$1
cmpule $16,$17,$3
bne $3,$memmove_up /* dest < src */
and $4,7,$1
bne $2,$misaligned_dn
unop
beq $1,$skip_aligned_byte_loop_head_dn
$aligned_byte_loop_head_dn:
lda $4,-1($4)
lda $5,-1($5)
unop
ble $18,$egress
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
and $4,7,$6
stq_u $1,0($4)
bne $6,$aligned_byte_loop_head_dn
$skip_aligned_byte_loop_head_dn:
lda $18,-8($18)
blt $18,$skip_aligned_word_loop_dn
$aligned_word_loop_dn:
ldq $1,-8($5)
nop
lda $5,-8($5)
lda $18,-8($18)
stq $1,-8($4)
nop
lda $4,-8($4)
bge $18,$aligned_word_loop_dn
$skip_aligned_word_loop_dn:
lda $18,8($18)
bgt $18,$byte_loop_tail_dn
unop
ret $31,($26),1
.align 4
$misaligned_dn:
nop
fnop
unop
beq $18,$egress
$byte_loop_tail_dn:
ldq_u $3,-1($5)
ldq_u $2,-1($4)
lda $5,-1($5)
lda $4,-1($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
stq_u $1,0($4)
bgt $18,$byte_loop_tail_dn
br $egress
$memmove_up:
mov $16,$4
mov $17,$5
bne $2,$misaligned_up
beq $1,$skip_aligned_byte_loop_head_up
$aligned_byte_loop_head_up:
unop
ble $18,$egress
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
lda $5,1($5)
stq_u $1,0($4)
lda $4,1($4)
and $4,7,$6
bne $6,$aligned_byte_loop_head_up
$skip_aligned_byte_loop_head_up:
lda $18,-8($18)
blt $18,$skip_aligned_word_loop_up
$aligned_word_loop_up:
ldq $1,0($5)
nop
lda $5,8($5)
lda $18,-8($18)
stq $1,0($4)
nop
lda $4,8($4)
bge $18,$aligned_word_loop_up
$skip_aligned_word_loop_up:
lda $18,8($18)
bgt $18,$byte_loop_tail_up
unop
ret $31,($26),1
.align 4
$misaligned_up:
nop
fnop
unop
beq $18,$egress
$byte_loop_tail_up:
ldq_u $3,0($5)
ldq_u $2,0($4)
lda $18,-1($18)
extbl $3,$5,$1
insbl $1,$4,$1
mskbl $2,$4,$2
bis $1,$2,$1
stq_u $1,0($4)
lda $5,1($5)
lda $4,1($4)
nop
bgt $18,$byte_loop_tail_up
$egress:
ret $31,($26),1
nop
nop
nop
.end memmove