Outer Loop Conditionally Executed With Inner Loop
6-143
Optimizing Assembly Code via Linear Assembly
Example 6–77. Linear Assembly for FIR With Outer Loop Conditionally Executed
With Inner Loop (With Functional Units)
.global _fir
_fir:
.cproc
x, h, y
.reg
x_1, h_1, y_1, octr, pctr, sctr
.reg
sum01, sum02, sum03, sum04, sum05, sum06, sum07
.reg
sum11, sum12, sum13, sum14, sum15, sum16, sum17
.reg
p00, p01, p02, p03, p04, p05, p06, p07
.reg
p10, p11, p12, p13, p14, p15, p16, p17
.reg
x01b, x01, x23, x45, x67, x8, h01, h23, h45, h67
.reg
y0, y1, rstx1, rstx2, rsth1, rsth2
ADD
x,4,x_1
; point to x[2]
ADD
h,4,h_1
; point to h[2]
ADD
y,2,y_1
; point to y[1]
MVK
60,rstx1
; used to rst x pointer each outer loop
MVK
60,rstx2
; used to rst x pointer each outer loop
MVK
64,rsth1
; used to rst h pointer each outer loop
MVK
64,rsth2
; used to rst h pointer each outer loop
MVK
201,octr
; loop ctr = 201 = (100/2) * (32/8) + 1
MVK
4,pctr
; pointer reset lp cntr = 32/8
MVK
5,sctr
; reset store lp cntr = 32/8 + 1
ZERO
sum07
; sum07 = 0
ZERO
sum17
; sum17 = 0
.mptr
x, x+0
.mptr
x_1, x+4
.mptr
h, h+0
.mptr
h_1, h+4
LOOP:
.trip 8
LDW
.D1T1
*h++[2],h01
; h[i+0] & h[i+1]
LDW
.D2T2
*h_1++[2],h23 ; h[i+2] & h[i+3]
LDW
.D1T1
*h++[2],h45
; h[i+4] & h[i+5]
LDW
.D2T2
*h_1++[2],h67 ; h[i+6] & h[i+7]
LDW
.D2T1
*x++[2],x01
; x[j+i+0] & x[j+i+1]
LDW
.D1T2
*x_1++[2],x23 ; x[j+i+2] & x[j+i+3]
LDW
.D2T1
*x++[2],x45
; x[j+i+4] & x[j+i+5]
LDW
.D1T2
*x_1++[2],x67 ; x[j+i+6] & x[j+i+7]
LDH
.D2T1
*x,x8
; x[j+i+8]
[sctr]
SUB
.S1
sctr,1,sctr
; dec store lp cntr
[!sctr]
SHR
.S1
sum07,15,y0
; (sum0 >> 15)
[!sctr]
SHR
.S2
sum17,15,y1
; (sum1 >> 15)
[!sctr]
STH
.D1
y0,*y++[2]
; y[j] = (sum0 >> 15)
[!sctr]
STH
.D2
y1,*y_1++[2] ; y[j+1] = (sum1 >> 15)