IA-32 Intel® Architecture Optimization
5-16
// Start deswizzling here
movaps xmm7, xmm4
// xmm7= a1 a2 a3 a4
movhlps xmm7, xmm3
// xmm7= b3 b4 a3 a4
movaps xmm6, xmm2
// xmm6= g1 g2 g3 g4
movlhps xmm3, xmm4
// xmm3= b1 b2 a1 a2
movhlps xmm2, xmm1
// xmm2= r3 r4 g3 g4
movlhps xmm1, xmm6
// xmm1= r1 r2 g1 g2
movaps xmm6, xmm2
// xmm6= r3 r4 g3 g4
movaps xmm5, xmm1
// xmm5= r1 r2 g1 g2
shufps xmm2, xmm7, 0xDD
// xmm2= r4 g4 b4 a4
shufps xmm1, xmm3, 0x88
// xmm4= r1 g1 b1 a1
shufps xmm5, xmm3, 0x88
// xmm5= r2 g2 b2 a2
shufps xmm6, xmm7, 0xDD
// xmm6= r3 g3 b3 a3
movaps [edx], xmm4
// v1 = r1 g1 b1 a1
movaps [edx+16], xmm5
// v2 = r2 g2 b2 a2
movaps [edx+32], xmm6
// v3 = r3 g3 b3 a3
movaps [edx+48], xmm2
// v4 = r4 g4 b4 a4
// DESWIZZLING ENDS HERE
}
}
Example 5-7
Deswizzling Data 64-bit Integer SIMD Data
void mmx_deswizzle(IVertex_soa *in, IVertex_aos *out)
{
__asm {
mov ebx, in
mov edx, out
movq mm0, [ebx]
// mm0= u1 u2
}
continued
Example 5-6
Deswizzling Data Using the movlhps and shuffle
Instructions
(continued)
Summary of Contents for ARCHITECTURE IA-32
Page 1: ...IA 32 Intel Architecture Optimization Reference Manual Order Number 248966 013US April 2006...
Page 220: ...IA 32 Intel Architecture Optimization 3 40...
Page 434: ...IA 32 Intel Architecture Optimization 9 20...
Page 514: ...IA 32 Intel Architecture Optimization B 60...
Page 536: ...IA 32 Intel Architecture Optimization C 22...