Outer Loop Conditionally Executed With Inner Loop
6-147
Optimizing Assembly Code via Linear Assembly
Example 6–78. Final Assembly Code for FIR Filter
MV
.L1X
B4,A0
; point to h[0] & h[1]
||
ADD
.D2
B4,4,B2
; point to h[2] & h[3]
||
MV
.L2X
A4,B1
; point to x[j] & x[j+1]
||
ADD
.D1
A4,4,A4
; point to x[j+2] & x[j+3]
||
MVK
.S2
200,B0
; set lp ctr ((32/8)*(100/2))
LDW
.D1
*A4++[2],B9
; x[j+i+2] & x[j+i+3]
||
LDW
.D2
*B1++[2],A10
; x[j+i+0] & x[j+i+1]
||
MVK
.S1
4,A1
; set pointer reset lp cntr
LDW
.D2
*B2++[2],B7
; h[i+2] & h[i+3]
||
LDW
.D1
*A0++[2],A8
; h[i+0] & h[i+1]
||
MVK
.S1
60,A3
; used to reset x ptr (16*4–4)
||
MVK
.S2
60,B14
; used to reset x ptr (16*4–4)
LDW
.D2
*B1++[2],A11
; x[j+i+4] & x[j+i+5]
||
LDW
.D1
*A4++[2],B10
; x[j+i+6] & x[j+i+7]
||[A1]
SUB
.L1
A1,1,A1
; dec pointer reset lp cntr
||
MVK
.S1
64,A5
; used to reset h ptr (16*4)
||
MVK
.S2
64,B5
; used to reset h ptr (16*4)
||
ADD
.L2X
A6,2,B6
; point to y[j+1]
LDW
.D1
*A0++[2],A9
; h[i+4] & h[i+5]
||
LDW
.D2
*B2++[2],B8
; h[i+6] & h[i+7]
||[!A1]
SUB
.S1
A4,A3,A4
; reset x ptr
[!A1]
SUB
.S2
B1,B14,B1
; reset x ptr
||[!A1]
SUB
.S1
A0,A5,A0
; reset h ptr
||
LDH
.D2
*B1,A8
; x[j+i+8]
ADD
.S2X
A10,0,B8
; move to other reg file
||
MVK
.S1
5,A2
; set store lp cntr
MPYLH
.M2X
A8,B8,B4
; p10 = h[i+0]*x[j+i+1]
||[!A1]
SUB
.S2
B2,B5,B2
; reset h ptr
||
MPYHL
.M1X
A8,B9,A14
; p11 = h[i+1]*x[j+i+2]
MPY
.M1
A8,A10,A7
; p00 = h[i+0]*x[j+i+0]
||
MPYLH
.M2
B7,B9,B13
; p12 = h[i+2]*x[j+i+3]
||[A2]
SUB
.S1
A2,1,A2
; dec store lp cntr
||
ZERO
.L2
B11
; zero out initial accumulator
[!A2]
SHR
.S2
B11,15,B11
; (Bsum1 >> 15)
||
MPY
.M2
B7,B9,B9
; p02 = h[i+2]*x[j+i+2]
||
MPYH
.M1
A8,A10,A10
; p01 = h[i+1]*x[j+i+1]
||[A2]
ADD
.L2
B4,B11,B4
; sum1(p10) = p10 + sum1
||
LDW
.D1
*A4++[2],B9
;* x[j+i+2] & x[j+i+3]
||
LDW
.D2
*B1++[2],A10
;* x[j+i+0] & x[j+i+1]
||
ZERO
.L1
A10
; zero out initial accumulator