## diffname alphapc/memmove.s 1999/0422
## diff -e /dev/null /n/emeliedump/1999/0422/sys/src/brazil/alphapc/memmove.s
0a
#define QUAD 8
#define ALIGN 64
#define BLOCK 64
TEXT memmove(SB), $0
MOVL from+4(FP), R7
MOVL n+8(FP), R10
MOVQ R0, R6
CMPUGE R7, R0, R5
BNE R5, _forward
MOVQ R6, R8 /* end to address */
ADDL R10, R6, R6 /* to+n */
ADDL R10, R7, R7 /* from+n */
CMPUGE $ALIGN, R10, R1 /* need at least ALIGN bytes */
BNE R1, _b1tail
_balign:
AND $(ALIGN-1), R6, R1
BEQ R1, _baligned
MOVBU -1(R7), R2
ADDL $-1, R6, R6
MOVB R2, (R6)
ADDL $-1, R7, R7
JMP _balign
_baligned:
AND $(QUAD-1), R7, R1 /* is the source quad-aligned */
BNE R1, _bunaligned
ADDL $(BLOCK-1), R8, R9
_bblock:
CMPUGE R9, R6, R1
BNE R1, _b8tail
MOVQ -64(R7), R22
MOVQ -56(R7), R23
MOVQ -48(R7), R24
MOVQ -40(R7), R25
MOVQ -32(R7), R2
MOVQ -24(R7), R3
MOVQ -16(R7), R4
MOVQ -8(R7), R5
SUBL $64, R6, R6
SUBL $64, R7, R7
MOVQ R22, (R6)
MOVQ R23, 8(R6)
MOVQ R24, 16(R6)
MOVQ R25, 24(R6)
MOVQ R2, 32(R6)
MOVQ R3, 40(R6)
MOVQ R4, 48(R6)
MOVQ R5, 56(R6)
JMP _bblock
_b8tail:
ADDL $(QUAD-1), R8, R9
_b8block:
CMPUGE R9, R6, R1
BNE R1, _b1tail
MOVQ -8(R7), R2
SUBL $8, R6
MOVQ R2, (R6)
SUBL $8, R7
JMP _b8block
_b1tail:
CMPUGE R8, R6, R1
BNE R1, _ret
MOVBU -1(R7), R2
SUBL $1, R6, R6
MOVB R2, (R6)
SUBL $1, R7, R7
JMP _b1tail
_ret:
RET
_bunaligned:
ADDL $(16-1), R8, R9
_bu8block:
CMPUGE R9, R6, R1
BNE R1, _b1tail
MOVQU -16(R7), R4
MOVQU -8(R7), R3
MOVQU (R7), R2
SUBL $16, R6
EXTQH R7, R2, R2
EXTQL R7, R3, R5
OR R5, R2, R11
EXTQH R7, R3, R3
EXTQL R7, R4, R4
OR R3, R4, R13
MOVQ R11, 8(R6)
MOVQ R13, (R6)
SUBL $16, R7
JMP _bu8block
_forward:
ADDL R10, R6, R8 /* end to address */
CMPUGE $ALIGN, R10, R1 /* need at least ALIGN bytes */
BNE R1, _f1tail
_falign:
AND $(ALIGN-1), R6, R1
BEQ R1, _faligned
MOVBU (R7), R2
ADDL $1, R6, R6
ADDL $1, R7, R7
MOVB R2, -1(R6)
JMP _falign
_faligned:
AND $(QUAD-1), R7, R1 /* is the source quad-aligned */
BNE R1, _funaligned
SUBL $(BLOCK-1), R8, R9
_fblock:
CMPUGT R9, R6, R1
BEQ R1, _f8tail
MOVQ (R7), R2
MOVQ 8(R7), R3
MOVQ 16(R7), R4
MOVQ 24(R7), R5
MOVQ 32(R7), R22
MOVQ 40(R7), R23
MOVQ 48(R7), R24
MOVQ 56(R7), R25
ADDL $64, R6, R6
ADDL $64, R7, R7
MOVQ R2, -64(R6)
MOVQ R3, -56(R6)
MOVQ R4, -48(R6)
MOVQ R5, -40(R6)
MOVQ R22, -32(R6)
MOVQ R23, -24(R6)
MOVQ R24, -16(R6)
MOVQ R25, -8(R6)
JMP _fblock
_f8tail:
SUBL $(QUAD-1), R8, R9
_f8block:
CMPUGT R9, R6, R1
BEQ R1, _f1tail
MOVQ (R7), R2
ADDL $8, R6
ADDL $8, R7
MOVQ R2, -8(R6)
JMP _f8block
_f1tail:
CMPUGT R8, R6, R1
BEQ R1, _fret
MOVBU (R7), R2
ADDL $1, R6, R6
ADDL $1, R7, R7
MOVB R2, -1(R6)
JMP _f1tail
_fret:
RET
_funaligned:
SUBL $(16-1), R8, R9
_fu8block:
CMPUGT R9, R6, R1
BEQ R1, _f1tail
MOVQU (R7), R2
MOVQU 8(R7), R3
MOVQU 16(R7), R4
EXTQL R7, R2, R2
EXTQH R7, R3, R5
OR R5, R2, R11
EXTQL R7, R3, R3
MOVQ R11, (R6)
EXTQH R7, R4, R4
OR R3, R4, R11
MOVQ R11, 8(R6)
ADDL $16, R6
ADDL $16, R7
JMP _fu8block
.
|