#define BDNZ BC 16,0,
/*
* 64/64 division adapted from powerpc compiler writer's handbook
*
* (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
* quo dvd dvs
*
* Remainder is left in R7:R8
*
* Code comment notation:
* msw = most-significant (high-order) word, i.e. bits 0..31
* lsw = least-significant (low-order) word, i.e. bits 32..63
* LZ = Leading Zeroes
* SD = Significant Digits
*
* R3:R4 = dvd (input dividend); quo (output quotient)
* R5:R6 = dvs (input divisor)
*
* R7:R8 = tmp; rem (output remainder)
*/
TEXT _divu64(SB), $0
MOVW a+0(FP), R3
MOVW a+4(FP), R4
MOVW b+8(FP), R5
MOVW b+12(FP), R6
/* count the number of leading 0s in the dividend */
CMP R3, $0 /* dvd.msw == 0? R3, */
CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
lab1:
/* count the number of leading 0s in the divisor */
CMP R5, $0 /* dvd.msw == 0? */
CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
lab2:
/* determine shift amounts to minimize the number of iterations */
CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
SUBC R11, $64, R10 /* R10 = dvd.SD */
BGT lab9 /* if(dvs > dvd) quotient = 0 */
ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
SUBC R9, $64, R9 /* R9 = dvs.SD */
ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
/* initial dvd */
SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
/* initial tmp */
MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
/* R7:R8 = R3:R4 >> R9 */
CMP R9, $32
ADD $-32, R9, R7
BLT lab3 /* if(R9 < 32) jump to lab3 */
SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
MOVW $0, R7 /* tmp.msw = 0 */
BR lab4
lab3:
SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
SUBC R9, $32, R7
SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
OR R7, R8 /* tmp.lsw = R8 | R7 */
SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
lab4:
/* R3:R4 = R3:R4 << R11 */
CMP R11,$32
ADDC $-32, R11, R9
BLT lab5 /* (R11 < 32)? */
SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
MOVW $0, R4 /* dvd.lsw = 0 */
BR lab6
lab5:
SLW R11, R3 /* R3 = dvd.msw << R11 */
SUBC R11, $32, R9
SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
OR R9, R3 /* dvd.msw = R3 | R9 */
SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
lab6:
/* restoring division shift and subtract loop */
MOVW $-1, R10
ADDC $0, R7 /* clear carry bit before loop starts */
lab7:
/* tmp:dvd is considered one large register */
/* each portion is shifted left 1 bit by adding it to itself */
/* adde sums the carry from the previous and creates a new carry */
ADDE R4,R4 /* shift dvd.lsw left 1 bit */
ADDE R3,R3 /* shift dvd.msw to left 1 bit */
ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
ADDE R7,R7 /* shift tmp.msw to left 1 bit */
SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
BLT lab8 /* if(result < 0) clear carry bit */
MOVW R11, R8 /* move lsw */
MOVW R9, R7 /* move msw */
ADDC $1, R10, R11 /* set carry bit */
lab8:
BDNZ lab7
ADDE R4,R4 /* quo.lsw (lsb = CA) */
ADDE R3,R3 /* quo.msw (lsb from lsw) */
lab10:
MOVW qp+16(FP), R9
MOVW rp+20(FP), R10
CMP R9, $0
BEQ lab11
MOVW R3, 0(R9)
MOVW R4, 4(R9)
lab11:
CMP R10, $0
BEQ lab12
MOVW R7, 0(R10)
MOVW R8, 4(R10)
lab12:
RETURN
lab9:
/* Quotient is 0 (dvs > dvd) */
MOVW R4, R8 /* rmd.lsw = dvd.lsw */
MOVW R3, R7 /* rmd.msw = dvd.msw */
MOVW $0, R4 /* dvd.lsw = 0 */
MOVW $0, R3 /* dvd.msw = 0 */
BR lab10
|