Packed-Data Processing on the ’C64x
8-44
Example 8–17. Clear Below Threshold Kernel, Using _cmpgtu4 and _xpnd4 Intrinsics
void clear_below_thresh(unsigned char *restrict image, int count,
unsigned char threshold)
{
int i;
unsigned t3_t2_t1_t0; /* Threshold (replicated) */
unsigned p7_p6_p5_p4, p3_p2_p1_p0; /* Pixels */
unsigned c7_c6_c5_c4, c3_c2_c1_c0; /* Comparison results */
unsigned x7_x6_x5_x4, x3_x2_x1_x0; /* Expanded masks */
/* Replicate the threshold value four times in a single word */
unsigned temp = _pack2(threshold, threshold);
t3_t2_t1_t0 = _packl4(temp, temp);
for (i = 0; i < count; i += 8)
{
/* Load 8 pixels from input image (one double–word). */
p7_p6_p5_p4 = _hi(*(double*) &image[i]);
p3_p2_p1_p0 = _lo(*(double*) &image[i]);
/* Compare each of the pixels to the threshold. */
c7_c6_c5_c4 = _cmpgtu4(p7_p6_p5_p4, t3_t2_t1_t0);
c3_c2_c1_c0 = _cmpgtu4(p3_p2_p1_p0, t3_t2_t1_t0);
/* Expand the comparison results to generate a bitmask. */
x7_x6_x5_x4 = _xpnd4(c7_c6_c5_c4);
x3_x2_x1_x0 = _xpnd4(c3_c2_c1_c0);
/* Apply mask to the pixels. Pixels that were less than or */
/* equal to the threshold will be forced to 0 because the */
/* corresponding mask bits will be all 0s. The pixels that */
/* were greater will not be modified, because their mask */
/* bits will be all 1s. */
p7_p6_p5_p4 = p7_p6_p5_p4 & x7_x6_x5_x4;
p3_p2_p1_p0 = p3_p2_p1_p0 & x3_x2_x1_x0;
/* Store the thresholded pixels back to the image. */
*(double*) &image[i] = _itod(p7_p6_p5_p4, p3_p2_p1_p0);
}
}