
88
Efficient 64-Bit Integer Arithmetic
AMD Athlon™ Processor x86 Code Optimization
22007E/0—November 1999
Example 7 (Division):
;_ulldiv divides two unsigned 64-bit integers, and returns
;
the quotient.
;
;INPUT:
[ESP+8]:[ESP+4] dividend
;
[ESP+16]:[ESP+12] divisor
;
;OUTPUT:
EDX:EAX quotient of division
;
;DESTROYS: EAX,ECX,EDX,EFlags
_ulldiv PROC
PUSH EBX
;save EBX as per calling convention
MOV ECX, [ESP+20]
;divisor_hi
MOV EBX, [ESP+16]
;divisor_lo
MOV EDX, [ESP+12]
;dividend_hi
MOV EAX, [ESP+8]
;dividend_lo
TEST ECX, ECX
;divisor > 2^32–1?
JNZ $big_divisor
;yes, divisor > 32^32–1
CMP EDX, EBX
;only one division needed? (ECX = 0)
JAE $two_divs
;need two divisions
DIV EBX
;EAX = quotient_lo
MOV EDX, ECX
;EDX = quotient_hi = 0 (quotient in
; EDX:EAX)
POP EBX
;restore EBX as per calling convention
RET
;done, return to caller
$two_divs:
MOV ECX, EAX
;save dividend_lo in ECX
MOV EAX, EDX
;get dividend_hi
XOR EDX, EDX
;zero extend it into EDX:EAX
DIV EBX
;quotient_hi in EAX
XCHG EAX, ECX
;ECX = quotient_hi, EAX = dividend_lo
DIV EBX
;EAX = quotient_lo
MOV EDX, ECX
;EDX = quotient_hi (quotient in EDX:EAX)
POP EBX
;restore EBX as per calling convention
RET
;done, return to caller
$big_divisor:
PUSH EDI
;save EDI as per calling convention
MOV EDI, ECX
;save divisor_hi
SHR EDX, 1
;shift both divisor and dividend right
RCR EAX, 1
; by 1 bit
ROR EDI, 1
RCR EBX, 1
BSR ECX, ECX
;ECX = number of remaining shifts
SHRD EBX, EDI, CL ;scale down divisor and dividend
SHRD EAX, EDX, CL ; such that divisor is
SHR EDX, CL
; less than 2^32 (i.e. fits in EBX)
ROL EDI, 1
;restore original divisor_hi
DIV EBX
;compute quotient
MOV EBX, [ESP+12] ;dividend_lo
Содержание Athlon Processor x86
Страница 1: ...AMD Athlon Processor x86 Code Optimization Guide TM...
Страница 12: ...xii List of Figures AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 16: ...xvi Revision History AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 60: ...44 Code Padding Using Neutral Code Fillers AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 92: ...76 Push Memory Data Carefully AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 122: ...106 Take Advantage of the FSINCOS Instruction AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 156: ...140 AMD Athlon Processor Microarchitecture AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 176: ...160 Write Combining Operations AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 202: ...186 Page Attribute Table PAT AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 252: ...236 VectorPath Instructions AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...
Страница 256: ...240 Index AMD Athlon Processor x86 Code Optimization 22007E 0 November 1999...