Software Pipelining
6-42
Example 6–26. Assembly Code for Fixed-Point Dot Product (Software Pipelined)
LDW
.D1
*A4++,A2
; load ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
; load bi & bi+1 from memory
||
MVK
.S1
50,A1
; set up loop counter
||
ZERO
.L1
A7
; zero out sum0 accumulator
||
ZERO
.L2
B7
; zero out sum1 accumulator
[A1] SUB
.S1
A1,1,A1
; decrement loop counter
||
LDW
.D1
*A4++,A2
;* load ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;* load bi & bi+1 from memory
[A1] SUB
.S1
A1,1,A1
;* decrement loop counter
|| [A1] B
.S2
LOOP
; branch to loop
||
LDW
.D1
*A4++,A2
;** load ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;** load bi & bi+1 from memory
[A1] SUB
.S1
A1,1,A1
;** decrement loop counter
|| [A1] B
.S2
LOOP
;* branch to loop
||
LDW
.D1
*A4++,A2
;*** load ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;*** load bi & bi+1 from memory
[A1] SUB
.S1
A1,1,A1
;*** decrement loop counter
|| [A1] B
.S2
LOOP
;** branch to loop
||
LDW
.D1
*A4++,A2
;**** load ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;**** load bi & bi+1 from memory
MPY
.M1X
A2,B2,A6
; ai * bi
||
MPYH
.M2X
A2,B2,B6
; ai+1 * bi+1
||[A1] SUB
.S1
A1,1,A1
;**** decrement loop counter
||[A1] B
.S2
LOOP
;*** branch to loop
||
LDW
.D1
*A4++,A2
;***** ld ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;***** ld bi & bi+1 from memory
MPY
.M1X
A2,B2,A6
;* ai * bi
||
MPYH
.M2X
A2,B2,B6
;* ai+1 * bi+1
||[A1] SUB
.S1
A1,1,A1
;***** decrement loop counter
||[A1] B
.S2
LOOP
;**** branch to loop
||
LDW
.D1
*A4++,A2
;****** ld ai & ai+1 from memory
||
LDW
.D2
*B4++,B2
;****** ld bi & bi+1 from memory
LOOP:
ADD
.L1
A6,A7,A7
; sum0 += (ai * bi)
||
ADD
.L2
B6,B7,B7
; sum1 += (ai+1 * bi+1)
||
MPY
.M1X
A2,B2,A6
;** ai * bi
||
MPYH
.M2X
A2,B2,B6
;** ai+1 * bi+1
||[A1] SUB
.S1
A1,1,A1
;****** decrement loop counter
||[A1] B
.S2
LOOP
;***** branch to loop
||
LDW
.D1
*A4++,A2
;******* ld ai & ai+1 fm memory
||
LDW
.D2
*B4++,B2
;******* ld bi & bi+1 fm memory
; Branch occurs here
ADD
.L1X
A7,B7,A4
; sum = sum0 + sum1