sparc: Minor tweaks to Niagara page copy/clear.

Don't use floating point on Niagara2, use the traditional
plain Niagara code instead.

Unroll Niagara loops to 128 bytes for copy, and 256 bytes
for clear.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2011-08-01 18:18:57 -07:00
parent ac85fe8b21
commit e95ade0839
4 changed files with 77 additions and 102 deletions

View File

@ -559,7 +559,7 @@ niagara2_patch:
nop
call niagara_patch_bzero
nop
call niagara2_patch_pageops
call niagara_patch_pageops
nop
ba,a,pt %xcc, 80f

View File

@ -31,7 +31,7 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o
lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
lib-$(CONFIG_SPARC64) += NG2patch.o NG2page.o
lib-$(CONFIG_SPARC64) += NG2patch.o
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o

View File

@ -1,61 +0,0 @@
/* NG2page.S: Niagara-2 optimized clear and copy page.
*
* Copyright (C) 2007 (davem@davemloft.net)
*/
#include <asm/asi.h>
#include <asm/page.h>
#include <asm/visasm.h>
.text
.align 32
/* This is heavily simplified from the sun4u variants
* because Niagara-2 does not have any D-cache aliasing issues.
*/
NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
prefetch [%o1 + 0x00], #one_read
prefetch [%o1 + 0x40], #one_read
VISEntryHalf
set PAGE_SIZE, %g7
sub %o0, %o1, %g3
1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P
subcc %g7, 64, %g7
ldda [%o1] ASI_BLK_P, %f0
stda %f0, [%o1 + %g3] ASI_BLK_P
add %o1, 64, %o1
bne,pt %xcc, 1b
prefetch [%o1 + 0x40], #one_read
membar #Sync
VISExitHalf
retl
nop
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
sethi %hi(NEW), %g1; \
or %g1, %lo(NEW), %g1; \
sethi %hi(OLD), %g2; \
or %g2, %lo(OLD), %g2; \
sub %g1, %g2, %g1; \
sethi %hi(BRANCH_ALWAYS), %g3; \
sll %g1, 11, %g1; \
srl %g1, 11 + 2, %g1; \
or %g3, %lo(BRANCH_ALWAYS), %g3; \
or %g3, %g1, %g3; \
stw %g3, [%g2]; \
sethi %hi(NOP), %g3; \
or %g3, %lo(NOP), %g3; \
stw %g3, [%g2 + 0x4]; \
flush %g2;
.globl niagara2_patch_pageops
.type niagara2_patch_pageops,#function
niagara2_patch_pageops:
NG_DO_PATCH(copy_user_page, NG2copy_user_page)
NG_DO_PATCH(_clear_page, NGclear_page)
NG_DO_PATCH(clear_user_page, NGclear_user_page)
retl
nop
.size niagara2_patch_pageops,.-niagara2_patch_pageops

View File

@ -16,55 +16,91 @@
*/
NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
prefetch [%o1 + 0x00], #one_read
mov 8, %g1
mov 16, %g2
mov 24, %g3
save %sp, -192, %sp
rd %asi, %g3
wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
set PAGE_SIZE, %g7
prefetch [%i1 + 0x00], #one_read
prefetch [%i1 + 0x40], #one_read
1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
prefetch [%o1 + 0x40], #one_read
add %o1, 32, %o1
stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
add %o1, 32, %o1
add %o0, 32, %o0
stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
subcc %g7, 64, %g7
1: prefetch [%i1 + 0x80], #one_read
prefetch [%i1 + 0xc0], #one_read
ldda [%i1 + 0x00] %asi, %o2
ldda [%i1 + 0x10] %asi, %o4
ldda [%i1 + 0x20] %asi, %l2
ldda [%i1 + 0x30] %asi, %l4
stxa %o2, [%i0 + 0x00] %asi
stxa %o3, [%i0 + 0x08] %asi
stxa %o4, [%i0 + 0x10] %asi
stxa %o5, [%i0 + 0x18] %asi
stxa %l2, [%i0 + 0x20] %asi
stxa %l3, [%i0 + 0x28] %asi
stxa %l4, [%i0 + 0x30] %asi
stxa %l5, [%i0 + 0x38] %asi
ldda [%i1 + 0x40] %asi, %o2
ldda [%i1 + 0x50] %asi, %o4
ldda [%i1 + 0x60] %asi, %l2
ldda [%i1 + 0x70] %asi, %l4
stxa %o2, [%i0 + 0x40] %asi
stxa %o3, [%i0 + 0x48] %asi
stxa %o4, [%i0 + 0x50] %asi
stxa %o5, [%i0 + 0x58] %asi
stxa %l2, [%i0 + 0x60] %asi
stxa %l3, [%i0 + 0x68] %asi
stxa %l4, [%i0 + 0x70] %asi
stxa %l5, [%i0 + 0x78] %asi
add %i1, 128, %i1
subcc %g7, 128, %g7
bne,pt %xcc, 1b
add %o0, 32, %o0
add %i0, 128, %i0
wr %g3, 0x0, %asi
membar #Sync
retl
nop
ret
restore
.globl NGclear_page, NGclear_user_page
.align 32
NGclear_page: /* %o0=dest */
NGclear_user_page: /* %o0=dest, %o1=vaddr */
mov 8, %g1
mov 16, %g2
mov 24, %g3
rd %asi, %g3
wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
set PAGE_SIZE, %g7
1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 32, %o0
stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
subcc %g7, 64, %g7
1: stxa %g0, [%o0 + 0x00] %asi
stxa %g0, [%o0 + 0x08] %asi
stxa %g0, [%o0 + 0x10] %asi
stxa %g0, [%o0 + 0x18] %asi
stxa %g0, [%o0 + 0x20] %asi
stxa %g0, [%o0 + 0x28] %asi
stxa %g0, [%o0 + 0x30] %asi
stxa %g0, [%o0 + 0x38] %asi
stxa %g0, [%o0 + 0x40] %asi
stxa %g0, [%o0 + 0x48] %asi
stxa %g0, [%o0 + 0x50] %asi
stxa %g0, [%o0 + 0x58] %asi
stxa %g0, [%o0 + 0x60] %asi
stxa %g0, [%o0 + 0x68] %asi
stxa %g0, [%o0 + 0x70] %asi
stxa %g0, [%o0 + 0x78] %asi
stxa %g0, [%o0 + 0x80] %asi
stxa %g0, [%o0 + 0x88] %asi
stxa %g0, [%o0 + 0x90] %asi
stxa %g0, [%o0 + 0x98] %asi
stxa %g0, [%o0 + 0xa0] %asi
stxa %g0, [%o0 + 0xa8] %asi
stxa %g0, [%o0 + 0xb0] %asi
stxa %g0, [%o0 + 0xb8] %asi
stxa %g0, [%o0 + 0xc0] %asi
stxa %g0, [%o0 + 0xc8] %asi
stxa %g0, [%o0 + 0xd0] %asi
stxa %g0, [%o0 + 0xd8] %asi
stxa %g0, [%o0 + 0xe0] %asi
stxa %g0, [%o0 + 0xe8] %asi
stxa %g0, [%o0 + 0xf0] %asi
stxa %g0, [%o0 + 0xf8] %asi
subcc %g7, 256, %g7
bne,pt %xcc, 1b
add %o0, 32, %o0
add %o0, 256, %o0
wr %g3, 0x0, %asi
membar #Sync
retl
nop