Software Pipelining
6-52
Example 6–30. Assembly Code for Fixed-Point Dot Product (Software Pipelined With
Removal of Prolog and Epilog)
MVK
.S1
57,A1
; set up loop counter
[A1] SUB
.S1
A1,1,A1
; decrement loop counter
||
ZERO
.L1
A7
; zero out sum0 accumulator
||
ZERO
.L2
B7
; zero out sum1 accumulator
[A1] SUB
.S1
A1,1,A1
;* decrement loop counter
||[A1] B
.S2
LOOP
; branch to loop
||
ZERO
.L1
A6
; zero out add input
||
ZERO
.L2
B6
; zero out add input
[A1] SUB
.S1
A1,1,A1
;** decrement loop counter
||[A1] B
.S2
LOOP
;* branch to loop
||
ZERO
.L1
A2
; zero out mpy input
||
ZERO
.L2
B2
; zero out mpy input
[A1] SUB
.S1
A1,1,A1
;*** decrement loop counter
||[A1] B
.S2
LOOP
;** branch to loop
[A1] SUB
.S1
A1,1,A1
;**** decrement loop counter
||[A1] B
.S2
LOOP
;*** branch to loop
[A1] SUB
.S1
A1,1,A1
;***** decrement loop counter
||[A1] B
.S2
LOOP
;**** branch to loop
LOOP:
ADD
.L1
A6,A7,A7
; sum0 += (ai * bi)
||
ADD
.L2
B6,B7,B7
; sum1 += (ai+1 * bi+1)
||
MPY
.M1X
A2,B2,A6
;** ai * bi
||
MPYH
.M2X
A2,B2,B6
;** ai+1 * bi+1
||[A1] SUB
.S1
A1,1,A1
;****** decrement loop counter
||[A1] B
.S2
LOOP
;***** branch to loop
||
LDW
.D1
*A4++,A2
;******* ld ai & ai+1 fm memory
||
LDW
.D2
*B4++,B2
;******* ld bi & bi+1 fm memory
; Branch occurs here
ADD
.L1X
A7,B7,A4
; sum = sum0 + sum1