MMACF32 MR3, MR2, MRd, MRe, MRf ||MMOV32 MRa, mem32
(continued)
32-Bit Floating-Point Multiply and Accumulate with Parallel Move
Example 1
; Perform 5 multiply and accumulate operations:
;
; X and Y are 32-bit floating point arrays
;
; 1st multiply: A = X0 * Y0
; 2nd multiply: B = X1 * Y1
; 3rd multiply: C = X2 * Y2
; 4th multiply: D = X3 * Y3
; 5th multiply: E = X3 * Y3
;
; Result = A + B + C + D + E
;
_Cla1Task1:
MMOVI16 MAR0, #_X ; MAR0 points to X array
MMOVI16 MAR1, #_Y ; MAR1 points to Y array
MNOP ; Delay for MAR0, MAR1 load
MNOP ; Delay for MAR0, MAR1 load
; <-- MAR0 valid
MMOV32 MR0, *MAR0[2]++ ; MR0 = X0, MAR0 += 2
; <-- MAR1 valid
MMOV32 MR1, *MAR1[2]++ ; MR1 = Y0, MAR1 += 2
MMPYF32 MR2, MR0, MR1 ; MR2 = A = X0 * Y0
|| MMOV32 MR0, *MAR0[2]++ ; In parallel MR0 = X1, MAR0 += 2
MMOV32 MR1, *MAR1[2]++ ; MR1 = Y1, MAR1 += 2
MMPYF32 MR3, MR0, MR1 ; MR3 = B = X1 * Y1
|| MMOV32 MR0, *MAR0[2]++ ; In parallel MR0 = X2, MAR0 += 2
MMOV32 MR1, *MAR1[2]++ ; MR1 = Y2, MAR2 += 2
MMACF32 MR3, MR2, MR2, MR0, MR1 ; MR3 = A + B, MR2 = C = X2 * Y2
|| MMOV32 MR0, *MAR0[2]++ ; In parallel MR0 = X3
MMOV32 MR1, *MAR1[2]++ ; MR1 = Y3 M
MACF32 MR3, MR2, MR2, MR0, MR1 ; MR3 = (A + B) + C, MR2 = D = X3 * Y3
|| MMOV32 MR0, *MAR0 ; In parallel MR0 = X4
MMOV32 MR1, *MAR1 ; MR1 = Y4
MMPYF32 MR2, MR0, MR1 ; MR2 = E = X4 * Y4
|| MADDF32 MR3, MR3, MR2 ; in parallel MR3 = (A + B + C) + D
MADDF32 MR3, MR3, MR2 ; MR3 = (A + B + C + D) + E
MMOV32 @_Result, MR3 ; Store the result
MSTOP ; end of task
Example 2
; sum = X0*B0 + X1*B1 + X2*B2 + Y1*A1 + Y2*B2
;
; X2 = X1
; X1 = X0
; Y2 = Y1 ; Y1 = sum
;
_ClaTask2:
MMOV32 MR0, @_B2 ; MR0 = B2
MMOV32 MR1, @_X2 ; MR1 = X2
MMPYF32 MR2, MR1, MR0 ; MR2 = X2*B2
|| MMOV32 MR0, @_B1 ; MR0 = B1
MMOVD32 MR1, @_X1 ; MR1 = X1, X2 = X1
MMPYF32 MR3, MR1, MR0 ; MR3 = X1*B1
|| MMOV32 MR0, @_B0 ; MR0 = B0
MMOVD32 MR1, @_X0 ; MR1 = X0, X1 = X0
; MR3 = X1*B1 + X2*B2, MR2 = X0*B0
; MR0 = A2
MMACF32 MR3, MR2, MR2, MR1, MR0
|| MMOV32 MR0, @_A2 M
MOV32 MR1, @_Y2 ; MR1 = Y2
; MR3 = X0*B0 + X1*B1 + X2*B2, MR2 = Y2*A2
; MR0 = A1
MMACF32 MR3, MR2, MR2, MR1, MR0
|| MMOV32 MR0, @_A1
MMOVD32 MR1,@_Y1 ; MR1 = Y1, Y2 = Y1
MADDF32 MR3, MR3, MR2 ; MR3 = Y2*A2 + X0*B0 + X1*B1 + X2*B2
|| MMPYF32 MR2, MR1, MR0 ; MR2 = Y1*A1
MADDF32 MR3, MR3, MR2 ; MR3 = Y1*A1 + Y2*A2 + X0*B0 + X1*B1 + X2*B2
Control Law Accelerator (CLA)
SPRUH18I – JANUARY 2011 – REVISED JUNE 2022
TMS320x2806x Microcontrollers
639
Copyright © 2022 Texas Instruments Incorporated
Содержание TMS320 2806 Series
Страница 2: ......