Commit d28984e8 authored by Laurent Dufour's avatar Laurent Dufour Committed by Pavel Emelyanov

ppc64: Use optimized memcmp

Instead of belonging to the common C memcmp() function, belong on the
optimized one stolen from the kernel.
Signed-off-by: 's avatarLaurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 16ad1941
...@@ -6,7 +6,7 @@ SYS-ASM := syscalls.S ...@@ -6,7 +6,7 @@ SYS-ASM := syscalls.S
syscalls-asm-y += $(SYS-ASM:.S=).o syscalls-asm-y += $(SYS-ASM:.S=).o
crtools-obj-y += crtools.o crtools-obj-y += crtools.o
crtools-obj-y += cpu.o crtools-obj-y += cpu.o
crtools-asm-y += memcpy_power7.o crtools-asm-y += memcpy_power7.o memcmp_64.o
SYS-DEF := syscall-ppc64.def SYS-DEF := syscall-ppc64.def
SYS-ASM-COMMON := syscall-common-ppc64.S SYS-ASM-COMMON := syscall-common-ppc64.S
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "compiler.h" #include "compiler.h"
#define HAS_BUILTIN_MEMCPY #define HAS_BUILTIN_MEMCPY
#define HAS_BUILTIN_MEMCMP
#include "asm-generic/string.h" #include "asm-generic/string.h"
...@@ -15,4 +16,6 @@ static inline void *builtin_memcpy(void *to, const void *from, unsigned long n) ...@@ -15,4 +16,6 @@ static inline void *builtin_memcpy(void *to, const void *from, unsigned long n)
return to; return to;
} }
extern int builtin_memcmp(const void *cs, const void *ct, size_t count);
#endif /* __CR_ASM_STRING_H__ */ #endif /* __CR_ASM_STRING_H__ */
/*
* Author: Anton Blanchard <anton@au.ibm.com>
* Copyright 2015 IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* --
* Copied form the linux file arch/powerpc/lib/memcmp_64.S
*/
#include "asm/linkage.h"
#define off8 r6
#define off16 r7
#define off24 r8
#define rA r9
#define rB r10
#define rC r11
#define rD r27
#define rE r28
#define rF r29
#define rG r30
#define rH r31
#ifdef __LITTLE_ENDIAN__
#define LD ldbrx
#else
#define LD ldx
#endif
ENTRY(builtin_memcmp)
cmpdi cr1,r5,0
/* Use the short loop if both strings are not 8B aligned */
or r6,r3,r4
andi. r6,r6,7
/* Use the short loop if length is less than 32B */
cmpdi cr6,r5,31
beq cr1,.Lzero
bne .Lshort
bgt cr6,.Llong
.Lshort:
mtctr r5
1: lbz rA,0(r3)
lbz rB,0(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero
lbz rA,1(r3)
lbz rB,1(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero
lbz rA,2(r3)
lbz rB,2(r4)
subf. rC,rB,rA
bne .Lnon_zero
bdz .Lzero
lbz rA,3(r3)
lbz rB,3(r4)
subf. rC,rB,rA
bne .Lnon_zero
addi r3,r3,4
addi r4,r4,4
bdnz 1b
.Lzero:
li r3,0
blr
.Lnon_zero:
mr r3,rC
blr
.Llong:
li off8,8
li off16,16
li off24,24
std r31,-8(r1)
std r30,-16(r1)
std r29,-24(r1)
std r28,-32(r1)
std r27,-40(r1)
srdi r0,r5,5
mtctr r0
andi. r5,r5,31
LD rA,0,r3
LD rB,0,r4
LD rC,off8,r3
LD rD,off8,r4
LD rE,off16,r3
LD rF,off16,r4
LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB
addi r3,r3,32
addi r4,r4,32
bdz .Lfirst32
LD rA,0,r3
LD rB,0,r4
cmpld cr1,rC,rD
LD rC,off8,r3
LD rD,off8,r4
cmpld cr6,rE,rF
LD rE,off16,r3
LD rF,off16,r4
cmpld cr7,rG,rH
bne cr0,.LcmpAB
LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB
bne cr1,.LcmpCD
addi r3,r3,32
addi r4,r4,32
bdz .Lsecond32
.balign 16
1: LD rA,0,r3
LD rB,0,r4
cmpld cr1,rC,rD
bne cr6,.LcmpEF
LD rC,off8,r3
LD rD,off8,r4
cmpld cr6,rE,rF
bne cr7,.LcmpGH
LD rE,off16,r3
LD rF,off16,r4
cmpld cr7,rG,rH
bne cr0,.LcmpAB
LD rG,off24,r3
LD rH,off24,r4
cmpld cr0,rA,rB
bne cr1,.LcmpCD
addi r3,r3,32
addi r4,r4,32
bdnz 1b
.Lsecond32:
cmpld cr1,rC,rD
bne cr6,.LcmpEF
cmpld cr6,rE,rF
bne cr7,.LcmpGH
cmpld cr7,rG,rH
bne cr0,.LcmpAB
bne cr1,.LcmpCD
bne cr6,.LcmpEF
bne cr7,.LcmpGH
.Ltail:
ld r31,-8(r1)
ld r30,-16(r1)
ld r29,-24(r1)
ld r28,-32(r1)
ld r27,-40(r1)
cmpdi r5,0
beq .Lzero
b .Lshort
.Lfirst32:
cmpld cr1,rC,rD
cmpld cr6,rE,rF
cmpld cr7,rG,rH
bne cr0,.LcmpAB
bne cr1,.LcmpCD
bne cr6,.LcmpEF
bne cr7,.LcmpGH
b .Ltail
.LcmpAB:
li r3,1
bgt cr0,.Lout
li r3,-1
b .Lout
.LcmpCD:
li r3,1
bgt cr1,.Lout
li r3,-1
b .Lout
.LcmpEF:
li r3,1
bgt cr6,.Lout
li r3,-1
b .Lout
.LcmpGH:
li r3,1
bgt cr7,.Lout
li r3,-1
.Lout:
ld r31,-8(r1)
ld r30,-16(r1)
ld r29,-24(r1)
ld r28,-32(r1)
ld r27,-40(r1)
blr
...@@ -13,6 +13,7 @@ endif ...@@ -13,6 +13,7 @@ endif
ifeq ($(SRCARCH), ppc64) ifeq ($(SRCARCH), ppc64)
asm-e += $(ARCH_DIR)/vdso-trampoline.o asm-e += $(ARCH_DIR)/vdso-trampoline.o
asm-e += $(ARCH_DIR)/memcpy_power7.o asm-e += $(ARCH_DIR)/memcpy_power7.o
asm-e += $(ARCH_DIR)/memcmp_64.o
endif endif
endif endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment