v 0. Pasted by Zeux as cpp at 2009-03-14 17:07:12 MSK and set expiration to never.

Paste will expire never. Expiration is locked.

  1. #include <stdbool.h>
  2. #include <spu_intrinsics.h>
  3.  
  4. // shuffle helpers
  5. #define L0 0x00010203
  6. #define L1 0x04050607
  7. #define L2 0x08090a0b
  8. #define L3 0x0c0d0e0f
  9.  
  10. #define R0 0x10111213
  11. #define R1 0x14151617
  12. #define R2 0x18191a1b
  13. #define R3 0x1c1d1e1f
  14.  
  15. #define ZERO 0x80808080
  16.  
  17. #define SHUFFLE(l, r, x, y, z, w) si_shufb(l, r, ((qword)(vec_uint4){x, y, z, w}))
  18.  
  19. // splat helper
  20. #define SPLAT(v, idx) si_shufb(v, v, (qword)(vec_uint4)(L ## idx))
  21.  
  22. struct matrix_t
  23. {
  24.     vec_float4 row0;
  25.     vec_float4 row1;
  26.     vec_float4 row2;
  27.     vec_float4 row3;
  28. };
  29.  
  30. struct aabb_t
  31. {
  32.     vec_float4 min;
  33.     vec_float4 max;
  34. };
  35.  
  36. static inline void transform_points_4(qword* dest, qword x, qword y, qword z, const struct matrix_t* mat)
  37. {
  38. #define COMP(c) \
  39.     qword res_ ## c = SPLAT((qword)mat->row3, c); \
  40.     res_ ## c = si_fma(z, SPLAT((qword)mat->row2, c), res_ ## c); \
  41.     res_ ## c = si_fma(y, SPLAT((qword)mat->row1, c), res_ ## c); \
  42.     res_ ## c = si_fma(x, SPLAT((qword)mat->row0, c), res_ ## c); \
  43.     dest[c] = res_ ## c;
  44.  
  45.     COMP(0);
  46.     COMP(1);
  47.     COMP(2);
  48.     COMP(3);
  49.    
  50. #undef COMP
  51. }
  52.  
  53. static inline void transform_matrix(struct matrix_t* dest, const struct matrix_t* lhs, const struct matrix_t* rhs)
  54. {
  55. #define COMP_0(c) \
  56.     qword res_ ## c = si_fm((qword)lhs->row2, SPLAT((qword)rhs->row ## c, 2)); \
  57.     res_ ## c = si_fma((qword)lhs->row1, SPLAT((qword)rhs->row ## c, 1), res_ ## c); \
  58.     res_ ## c = si_fma((qword)lhs->row0, SPLAT((qword)rhs->row ## c, 0), res_ ## c); \
  59.     dest->row ## c = (vec_float4)res_ ## c;
  60.  
  61. #define COMP_1(c) \
  62.     qword res_ ## c = si_fma((qword)lhs->row2, SPLAT((qword)rhs->row ## c, 2), (qword)lhs->row3); \
  63.     res_ ## c = si_fma((qword)lhs->row1, SPLAT((qword)rhs->row ## c, 1), res_ ## c); \
  64.     res_ ## c = si_fma((qword)lhs->row0, SPLAT((qword)rhs->row ## c, 0), res_ ## c); \
  65.     dest->row ## c = (vec_float4)res_ ## c;
  66.  
  67.     COMP_0(0);
  68.     COMP_0(1);
  69.     COMP_0(2);
  70.     COMP_1(3);
  71.  
  72. #undef COMP_0
  73. #undef COMP_1
  74. }
  75.  
  76. __attribute__((noinline)) unsigned int is_visible(const struct matrix_t* transform, const struct aabb_t* aabb, const struct matrix_t* frustum)
  77. {
  78.     qword min = (qword)aabb->min;
  79.     qword max = (qword)aabb->max;
  80.  
  81.     // get aabb points (SoA)
  82.     qword minmax_x = SHUFFLE(min, max, L0, R0, L0, R0); // x X x X
  83.     qword minmax_y = SHUFFLE(min, max, L1, L1, R1, R1); // y y Y Y
  84.     qword minmax_z_0 = SPLAT(min, 2); // z z z z
  85.     qword minmax_z_1 = SPLAT(max, 2); // Z Z Z Z
  86.  
  87.     // get clipping matrix
  88.     struct matrix_t clip;
  89.  
  90.     transform_matrix(&clip, frustum, transform);
  91.  
  92.     // transform points to clip space
  93.     qword points_cs_0[4];
  94.     qword points_cs_1[4];
  95.  
  96.     transform_points_4(points_cs_0, minmax_x, minmax_y, minmax_z_0, &clip);
  97.     transform_points_4(points_cs_1, minmax_x, minmax_y, minmax_z_1, &clip);
  98.  
  99.     // calculate -w
  100.     qword points_cs_0_negw = si_xor(points_cs_0[3], (qword)(vec_uint4)(0x80000000));
  101.     qword points_cs_1_negw = si_xor(points_cs_1[3], (qword)(vec_uint4)(0x80000000));
  102.  
  103.     // for each plane...
  104.     #define NOUT(a, b, c, d) si_orx(si_or(si_fcgt(a, b), si_fcgt(c, d)))
  105.  
  106.     qword nout0 = NOUT(points_cs_0[0], points_cs_0_negw, points_cs_1[0], points_cs_1_negw);
  107.     qword nout1 = NOUT(points_cs_0[3], points_cs_0[0], points_cs_1[3], points_cs_1[0]);
  108.     qword nout2 = NOUT(points_cs_0[1], points_cs_0_negw, points_cs_1[1], points_cs_1_negw);
  109.     qword nout3 = NOUT(points_cs_0[3], points_cs_0[1], points_cs_1[3], points_cs_1[1]);
  110.     qword nout4 = NOUT(points_cs_0[2], (qword)(0), points_cs_1[2], (qword)(0));
  111.     qword nout5 = NOUT(points_cs_0[3], points_cs_0[2], points_cs_1[3], points_cs_1[2]);
  112.  
  113.     #undef NOUT
  114.  
  115.     // merge "not outside" flags
  116.     qword nout01 = si_and(nout0, nout1);
  117.     qword nout012 = si_and(nout01, nout2);
  118.  
  119.     qword nout34 = si_and(nout3, nout4);
  120.     qword nout345 = si_and(nout34, nout5);
  121.  
  122.     qword nout = si_and(nout012, nout345);
  123.  
  124.     return si_to_uint(nout);
  125. }
  126.  
  127. // simple ortho frustum
  128. struct matrix_t frustum =
  129. {
  130.     { 0.1f, 0, 0, 0 },
  131.     { 0, 0.1f, 0, 0 },
  132.     { 0, 0, 0.1f, 0 },
  133.     { 0, 0, 0, 1 }
  134. };
  135.  
  136. // small box
  137. struct aabb_t aabb =
  138. {
  139.     { -1, -2, -3 },
  140.     { 1, 2, 3 }
  141. };
  142.  
  143. // and some weird matrix
  144. struct matrix_t transform =
  145. {
  146.     { 0.123f, 0.456f, 0.789f },
  147.     { 0.456f, 0.123f, 0.789f },
  148.     { 0.789f, 0.123f, 0.456f },
  149.     { 1.f, -1.f, 1.f }
  150. };
  151.  
  152. int main()
  153. {
  154.     is_visible(&transform, &aabb, &frustum);
  155.     si_stop(0);
  156. }


Editing is locked.