kernel_optimize_test/arch/powerpc/boot/string.S
Paul Mackerras b4e7de0f35 powerpc: Avoid unaligned loads and stores in boot memcpy code
The 601 processor will generate an alignment exception for accesses
which cross a page boundary.  In the boot wrapper code, OF is still
handling all exceptions, and it doesn't have an alignment exception
handler that emulates the instruction and continues.

This changes the memcpy and memmove routines in the boot wrapper to
avoid doing unaligned accesses.  If the source and destination are
misaligned with respect to each other, we just copy one byte at a
time.

Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-01-14 15:06:51 +11:00

231 lines
3.3 KiB
ArmAsm

/*
* Copyright (C) Paul Mackerras 1997.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* NOTE: this code runs in 32 bit mode and is packaged as ELF32.
*/
#include "ppc_asm.h"
.text
.globl strcpy
strcpy:
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r5)
bne 1b
blr
.globl strncpy
strncpy:
cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r6)
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
blr
.globl strcat
strcat:
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r0,1(r5)
cmpwi 0,r0,0
bne 1b
addi r5,r5,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r5)
bne 1b
blr
.globl strcmp
strcmp:
addi r5,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
subf. r3,r0,r3
beqlr 1
beq 1b
blr
.globl strlen
strlen:
addi r4,r3,-1
1: lbzu r0,1(r4)
cmpwi 0,r0,0
bne 1b
subf r3,r3,r4
blr
.globl memset
memset:
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
addi r6,r3,-4
cmplwi 0,r5,4
blt 7f
stwu r4,4(r6)
beqlr
andi. r0,r6,3
add r5,r0,r5
subf r6,r0,r6
rlwinm r0,r5,32-2,2,31
mtctr r0
bdz 6f
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
8: stbu r4,1(r6)
bdnz 8b
blr
.globl memmove
memmove:
cmplw 0,r3,r4
bgt backwards_memcpy
/* fall through */
.globl memcpy
memcpy:
rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */
addi r6,r3,-4
addi r4,r4,-4
beq 3f /* if less than 8 bytes to do */
andi. r0,r6,3 /* get dest word aligned */
mtctr r7
bne 5f
andi. r0,r4,3 /* check src word aligned too */
bne 3f
1: lwz r7,4(r4)
lwzu r8,8(r4)
stw r7,4(r6)
stwu r8,8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,4(r4)
addi r5,r5,-4
stwu r0,4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
addi r4,r4,3
addi r6,r6,3
4: lbzu r0,1(r4)
stbu r0,1(r6)
bdnz 4b
blr
5: subfic r0,r0,4
cmpw cr1,r0,r5
add r7,r0,r4
andi. r7,r7,3 /* will source be word-aligned too? */
ble cr1,3b
bne 3b /* do byte-by-byte if not */
mtctr r0
6: lbz r7,4(r4)
addi r4,r4,1
stb r7,4(r6)
addi r6,r6,1
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b
.globl backwards_memcpy
backwards_memcpy:
rlwinm. r7,r5,32-3,3,31 /* r7 = r5 >> 3 */
add r6,r3,r5
add r4,r4,r5
beq 3f
andi. r0,r6,3
mtctr r7
bne 5f
andi. r0,r4,3
bne 3f
1: lwz r7,-4(r4)
lwzu r8,-8(r4)
stw r7,-4(r6)
stwu r8,-8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,-4(r4)
subi r5,r5,4
stwu r0,-4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
4: lbzu r0,-1(r4)
stbu r0,-1(r6)
bdnz 4b
blr
5: cmpw cr1,r0,r5
subf r7,r0,r4
andi. r7,r7,3
ble cr1,3b
bne 3b
mtctr r0
6: lbzu r7,-1(r4)
stbu r7,-1(r6)
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b
.globl memcmp
memcmp:
cmpwi 0,r5,0
blelr
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
1: lbzu r3,1(r6)
lbzu r0,1(r4)
subf. r3,r0,r3
bdnzt 2,1b
blr
/*
* Flush the dcache and invalidate the icache for a range of addresses.
*
* flush_cache(addr, len)
*/
.global flush_cache
flush_cache:
addi 4,4,0x1f /* len = (len + 0x1f) / 0x20 */
rlwinm. 4,4,27,5,31
mtctr 4
beqlr
1: dcbf 0,3
icbi 0,3
addi 3,3,0x20
bdnz 1b
sync
isync
blr