Refining C/C++ Code
3-32
Example 3–16. Float Dot Product With Peak Performance
#define FHI(a) _itof(_hi(a))
#define FLO(a) _itof(_lo(a))
float dotp3(const double a[restrict], const double b[restrict])
{
int i;
float sum0 = 0;
float sum1 = 0;
float sum2 = 0;
float sum3 = 0;
float sum4 = 0;
float sum5 = 0;
float sum6 = 0;
float sum7 = 0;
float sum8 = 0;
for (i=0; i<512; i+=4)
{
sum0 += FHI(a[i]) * FHI(b[i]);
sum1 += FLO(a[i]) * FLO(b[i]);
sum2 += FHI(a[i+1]) * FHI(b[i+1]);
sum3 += FLO(a[i+1]) * FLO(b[i+1]);
sum4 += FHI(a[i+2]) * FHI(b[i+2]);
sum5 += FLO(a[i+2]) * FLO(b[i+2]);
sum6 += FHI(a[i+3]) * FHI(b[i+3]);
sum7 += FLO(a[i+3]) * FLO(b[i+3]);
}
sum0 += sum1;
sum2 += sum3;
sum4 += sum5;
sum6 += sum7;
sum0 += sum2;
sum4 += sum6;
return sum0 + sum4;
}
void main()
{
/* Using 0 as the bank parameter for the DATA_MEM_BANK */
/* pragma aligns variable to a double word boundary for */
/* the C62xx, C64xx, and C67xx. */
#pragma DATA_MEM_BANK(a, 0);
#pragma DATA_MEM_BANK (b, 0);
float ret_val, a[SIZE_A], b[SIZE_B];
ret_val = dotp3((double *)a, (double *)b);
}