v 0. Pasted by Zeux as cpp at 2009-02-08 15:15:40 MSK and set expiration to never.

Paste will expire never. Expiration is locked.

  1. #include <stdbool.h>
  2. #include <spu_intrinsics.h>
  3.  
  4. // shuffle helpers
  5. #define L0 0x00010203
  6. #define L1 0x04050607
  7. #define L2 0x08090a0b
  8. #define L3 0x0c0d0e0f
  9.  
  10. #define R0 0x10111213
  11. #define R1 0x14151617
  12. #define R2 0x18191a1b
  13. #define R3 0x1c1d1e1f
  14.  
  15. #define ZERO 0x80808080
  16.  
  17. #define SHUFFLE(l, r, x, y, z, w) si_shufb(l, r, ((qword)(vec_uint4){x, y, z, w}))
  18.  
  19. // splat helper
  20. #define SPLAT(v, idx) si_shufb(v, v, (qword)(vec_uint4)(L ## idx))
  21.  
  22. struct matrix43_t
  23. {
  24.     vec_float4 row0;
  25.     vec_float4 row1;
  26.     vec_float4 row2;
  27.     vec_float4 row3;
  28. };
  29.  
  30. struct aabb_t
  31. {
  32.     vec_float4 min;
  33.     vec_float4 max;
  34. };
  35.  
  36. struct frustum_t
  37. {
  38.     vec_float4 planes[6];
  39. };
  40.  
  41. static inline void transform_points_4(qword* dest, qword x, qword y, qword z, const struct matrix43_t* mat)
  42. {
  43. #define COMP(c) \
  44.     qword res_ ## c = SPLAT((qword)mat->row3, c); \
  45.     res_ ## c = si_fma(z, SPLAT((qword)mat->row2, c), res_ ## c); \
  46.     res_ ## c = si_fma(y, SPLAT((qword)mat->row1, c), res_ ## c); \
  47.     res_ ## c = si_fma(x, SPLAT((qword)mat->row0, c), res_ ## c); \
  48.     dest[c] = res_ ## c;
  49.  
  50.     COMP(0);
  51.     COMP(1);
  52.     COMP(2);
  53.    
  54. #undef COMP
  55. }
  56.  
  57. static inline qword dot4(qword v, qword x, qword y, qword z)
  58. {
  59.     qword result = SPLAT(v, 3);
  60.  
  61.     result = si_fma(SPLAT(v, 2), z, result);
  62.     result = si_fma(SPLAT(v, 1), y, result);
  63.     result = si_fma(SPLAT(v, 0), x, result);
  64.  
  65.     return result;
  66. }
  67.  
  68. static inline qword is_not_outside(qword plane, const qword* points_ws_0, const qword* points_ws_1)
  69. {
  70.     qword dp0 = dot4(plane, points_ws_0[0], points_ws_0[1], points_ws_0[2]);
  71.     qword dp1 = dot4(plane, points_ws_1[0], points_ws_1[1], points_ws_1[2]);
  72.  
  73.     qword dp0pos = si_fcgt(dp0, (qword)(0));
  74.     qword dp1pos = si_fcgt(dp1, (qword)(0));
  75.  
  76.     return si_orx(si_or(dp0pos, dp1pos));
  77. }
  78.  
  79. __attribute__((noinline)) unsigned int is_visible(const struct matrix43_t* transform, const struct aabb_t* aabb, const struct frustum_t* frustum)
  80. {
  81.     qword min = (qword)aabb->min;
  82.     qword max = (qword)aabb->max;
  83.  
  84.     // get aabb points (SoA)
  85.     qword minmax_x = SHUFFLE(min, max, L0, R0, L0, R0); // x X x X
  86.     qword minmax_y = SHUFFLE(min, max, L1, L1, R1, R1); // y y Y Y
  87.     qword minmax_z_0 = SPLAT(min, 2); // z z z z
  88.     qword minmax_z_1 = SPLAT(max, 2); // Z Z Z Z
  89.  
  90.     // transform points to world space
  91.     qword points_ws_0[3];
  92.     qword points_ws_1[3];
  93.  
  94.     transform_points_4(points_ws_0, minmax_x, minmax_y, minmax_z_0, transform);
  95.     transform_points_4(points_ws_1, minmax_x, minmax_y, minmax_z_1, transform);
  96.  
  97.     // for each plane...
  98.     qword nout0 = is_not_outside((qword)frustum->planes[0], points_ws_0, points_ws_1);
  99.     qword nout1 = is_not_outside((qword)frustum->planes[1], points_ws_0, points_ws_1);
  100.     qword nout2 = is_not_outside((qword)frustum->planes[2], points_ws_0, points_ws_1);
  101.     qword nout3 = is_not_outside((qword)frustum->planes[3], points_ws_0, points_ws_1);
  102.     qword nout4 = is_not_outside((qword)frustum->planes[4], points_ws_0, points_ws_1);
  103.     qword nout5 = is_not_outside((qword)frustum->planes[5], points_ws_0, points_ws_1);
  104.  
  105.     // merge "not outside" flags
  106.     qword nout01 = si_and(nout0, nout1);
  107.     qword nout012 = si_and(nout01, nout2);
  108.  
  109.     qword nout34 = si_and(nout3, nout4);
  110.     qword nout345 = si_and(nout34, nout5);
  111.  
  112.     qword nout = si_and(nout012, nout345);
  113.  
  114.     return si_to_uint(nout);
  115. }
  116.  
  117. // simple ortho frustum
  118. struct frustum_t frustum =
  119. {
  120.     {
  121.         { 1, 0, 0, 10 },
  122.         { -1, 0, 0, 10 },
  123.         { 0, 1, 0, 10 },
  124.         { 0, -1, 0, 10 },
  125.         { 0, 0, 1, 10 },
  126.         { 0, 0, -1, 10 }
  127.     }
  128. };
  129.  
  130. // small box
  131. struct aabb_t aabb =
  132. {
  133.     { -1, -2, -3 },
  134.     { 1, 2, 3 }
  135. };
  136.  
  137. // and some weird matrix
  138. struct matrix43_t transform =
  139. {
  140.     { 0.123f, 0.456f, 0.789f },
  141.     { 0.456f, 0.123f, 0.789f },
  142.     { 0.789f, 0.123f, 0.456f },
  143.     { 1.f, -1.f, 1.f }
  144. };
  145.  
  146. void _start()
  147. {
  148.     is_visible(&transform, &aabb, &frustum);
  149.     si_stop(0);
  150. }


Editing is locked.