25#include "cuda_runtime.h"
27using uint =
unsigned int;
28using ushort =
unsigned short;
37inline float fminf(
float a,
float b)
42inline float fmaxf(
float a,
float b)
47inline int max(
int a,
int b)
52inline int min(
int a,
int b)
57inline float rsqrtf(
float x)
59 return 1.0f / sqrtf(x);
67inline __host__ __device__ float2 make_float2(
float s)
69 return make_float2(s, s);
71inline __host__ __device__ float2 make_float2(float3 a)
73 return make_float2(a.x, a.y);
75inline __host__ __device__ float2 make_float2(int2 a)
77 return make_float2(
float(a.x),
float(a.y));
79inline __host__ __device__ float2 make_float2(uint2 a)
81 return make_float2(
float(a.x),
float(a.y));
84inline __host__ __device__ int2 make_int2(
int s)
86 return make_int2(s, s);
88inline __host__ __device__ int2 make_int2(int3 a)
90 return make_int2(a.x, a.y);
92inline __host__ __device__ int2 make_int2(uint2 a)
94 return make_int2(
int(a.x),
int(a.y));
96inline __host__ __device__ int2 make_int2(float2 a)
98 return make_int2(
int(a.x),
int(a.y));
101inline __host__ __device__ uint2 make_uint2(uint s)
103 return make_uint2(s, s);
105inline __host__ __device__ uint2 make_uint2(uint3 a)
107 return make_uint2(a.x, a.y);
109inline __host__ __device__ uint2 make_uint2(int2 a)
111 return make_uint2(uint(a.x), uint(a.y));
114inline __host__ __device__ float3 make_float3(
float s)
116 return make_float3(s, s, s);
118inline __host__ __device__ float3 make_float3(float2 a)
120 return make_float3(a.x, a.y, 0.0f);
122inline __host__ __device__ float3 make_float3(float2 a,
float s)
124 return make_float3(a.x, a.y, s);
126inline __host__ __device__ float3 make_float3(float4 a)
128 return make_float3(a.x, a.y, a.z);
130inline __host__ __device__ float3 make_float3(int3 a)
132 return make_float3(
float(a.x),
float(a.y),
float(a.z));
134inline __host__ __device__ float3 make_float3(uint3 a)
136 return make_float3(
float(a.x),
float(a.y),
float(a.z));
139inline __host__ __device__ int3 make_int3(
int s)
141 return make_int3(s, s, s);
143inline __host__ __device__ int3 make_int3(int2 a)
145 return make_int3(a.x, a.y, 0);
147inline __host__ __device__ int3 make_int3(int2 a,
int s)
149 return make_int3(a.x, a.y, s);
151inline __host__ __device__ int3 make_int3(uint3 a)
153 return make_int3(
int(a.x),
int(a.y),
int(a.z));
155inline __host__ __device__ int3 make_int3(float3 a)
157 return make_int3(
int(a.x),
int(a.y),
int(a.z));
160inline __host__ __device__ uint3 make_uint3(uint s)
162 return make_uint3(s, s, s);
164inline __host__ __device__ uint3 make_uint3(uint2 a)
166 return make_uint3(a.x, a.y, 0);
168inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
170 return make_uint3(a.x, a.y, s);
172inline __host__ __device__ uint3 make_uint3(uint4 a)
174 return make_uint3(a.x, a.y, a.z);
176inline __host__ __device__ uint3 make_uint3(int3 a)
178 return make_uint3(uint(a.x), uint(a.y), uint(a.z));
181inline __host__ __device__ float4 make_float4(
float s)
183 return make_float4(s, s, s, s);
185inline __host__ __device__ float4 make_float4(float3 a)
187 return make_float4(a.x, a.y, a.z, 0.0f);
189inline __host__ __device__ float4 make_float4(float3 a,
float w)
191 return make_float4(a.x, a.y, a.z, w);
193inline __host__ __device__ float4 make_float4(int4 a)
195 return make_float4(
float(a.x),
float(a.y),
float(a.z),
float(a.w));
197inline __host__ __device__ float4 make_float4(uint4 a)
199 return make_float4(
float(a.x),
float(a.y),
float(a.z),
float(a.w));
202inline __host__ __device__ int4 make_int4(
int s)
204 return make_int4(s, s, s, s);
206inline __host__ __device__ int4 make_int4(int3 a)
208 return make_int4(a.x, a.y, a.z, 0);
210inline __host__ __device__ int4 make_int4(int3 a,
int w)
212 return make_int4(a.x, a.y, a.z, w);
214inline __host__ __device__ int4 make_int4(uint4 a)
216 return make_int4(
int(a.x),
int(a.y),
int(a.z),
int(a.w));
218inline __host__ __device__ int4 make_int4(float4 a)
220 return make_int4(
int(a.x),
int(a.y),
int(a.z),
int(a.w));
224inline __host__ __device__ uint4 make_uint4(uint s)
226 return make_uint4(s, s, s, s);
228inline __host__ __device__ uint4 make_uint4(uint3 a)
230 return make_uint4(a.x, a.y, a.z, 0);
232inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
234 return make_uint4(a.x, a.y, a.z, w);
236inline __host__ __device__ uint4 make_uint4(int4 a)
238 return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
245inline __host__ __device__ float2 operator-(float2 &a)
247 return make_float2(-a.x, -a.y);
249inline __host__ __device__ int2 operator-(int2 &a)
251 return make_int2(-a.x, -a.y);
253inline __host__ __device__ float3 operator-(float3 &a)
255 return make_float3(-a.x, -a.y, -a.z);
257inline __host__ __device__ int3 operator-(int3 &a)
259 return make_int3(-a.x, -a.y, -a.z);
261inline __host__ __device__ float4 operator-(float4 &a)
263 return make_float4(-a.x, -a.y, -a.z, -a.w);
265inline __host__ __device__ int4 operator-(int4 &a)
267 return make_int4(-a.x, -a.y, -a.z, -a.w);
274inline __host__ __device__ float2 operator+(float2 a, float2 b)
276 return make_float2(a.x + b.x, a.y + b.y);
278inline __host__ __device__
void operator+=(float2 &a, float2 b)
280 a.x += b.x; a.y += b.y;
282inline __host__ __device__ float2 operator+(float2 a,
float b)
284 return make_float2(a.x + b, a.y + b);
286inline __host__ __device__ float2 operator+(
float b, float2 a)
288 return make_float2(a.x + b, a.y + b);
290inline __host__ __device__
void operator+=(float2 &a,
float b)
295inline __host__ __device__ int2 operator+(int2 a, int2 b)
297 return make_int2(a.x + b.x, a.y + b.y);
299inline __host__ __device__
void operator+=(int2 &a, int2 b)
301 a.x += b.x; a.y += b.y;
303inline __host__ __device__ int2 operator+(int2 a,
int b)
305 return make_int2(a.x + b, a.y + b);
307inline __host__ __device__ int2 operator+(
int b, int2 a)
309 return make_int2(a.x + b, a.y + b);
311inline __host__ __device__
void operator+=(int2 &a,
int b)
316inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
318 return make_uint2(a.x + b.x, a.y + b.y);
320inline __host__ __device__
void operator+=(uint2 &a, uint2 b)
322 a.x += b.x; a.y += b.y;
324inline __host__ __device__ uint2 operator+(uint2 a, uint b)
326 return make_uint2(a.x + b, a.y + b);
328inline __host__ __device__ uint2 operator+(uint b, uint2 a)
330 return make_uint2(a.x + b, a.y + b);
332inline __host__ __device__
void operator+=(uint2 &a, uint b)
338inline __host__ __device__ float3 operator+(float3 a, float3 b)
340 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
342inline __host__ __device__
void operator+=(float3 &a, float3 b)
344 a.x += b.x; a.y += b.y; a.z += b.z;
346inline __host__ __device__ float3 operator+(float3 a,
float b)
348 return make_float3(a.x + b, a.y + b, a.z + b);
350inline __host__ __device__
void operator+=(float3 &a,
float b)
352 a.x += b; a.y += b; a.z += b;
355inline __host__ __device__ int3 operator+(int3 a, int3 b)
357 return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
359inline __host__ __device__
void operator+=(int3 &a, int3 b)
361 a.x += b.x; a.y += b.y; a.z += b.z;
363inline __host__ __device__ int3 operator+(int3 a,
int b)
365 return make_int3(a.x + b, a.y + b, a.z + b);
367inline __host__ __device__
void operator+=(int3 &a,
int b)
369 a.x += b; a.y += b; a.z += b;
372inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
374 return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
376inline __host__ __device__
void operator+=(uint3 &a, uint3 b)
378 a.x += b.x; a.y += b.y; a.z += b.z;
380inline __host__ __device__ uint3 operator+(uint3 a, uint b)
382 return make_uint3(a.x + b, a.y + b, a.z + b);
384inline __host__ __device__
void operator+=(uint3 &a, uint b)
386 a.x += b; a.y += b; a.z += b;
389inline __host__ __device__ int3 operator+(
int b, int3 a)
391 return make_int3(a.x + b, a.y + b, a.z + b);
393inline __host__ __device__ uint3 operator+(uint b, uint3 a)
395 return make_uint3(a.x + b, a.y + b, a.z + b);
397inline __host__ __device__ float3 operator+(
float b, float3 a)
399 return make_float3(a.x + b, a.y + b, a.z + b);
402inline __host__ __device__ float4 operator+(float4 a, float4 b)
404 return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
406inline __host__ __device__
void operator+=(float4 &a, float4 b)
408 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
410inline __host__ __device__ float4 operator+(float4 a,
float b)
412 return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
414inline __host__ __device__ float4 operator+(
float b, float4 a)
416 return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
418inline __host__ __device__
void operator+=(float4 &a,
float b)
420 a.x += b; a.y += b; a.z += b; a.w += b;
423inline __host__ __device__ int4 operator+(int4 a, int4 b)
425 return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
427inline __host__ __device__
void operator+=(int4 &a, int4 b)
429 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
431inline __host__ __device__ int4 operator+(int4 a,
int b)
433 return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
435inline __host__ __device__ int4 operator+(
int b, int4 a)
437 return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
439inline __host__ __device__
void operator+=(int4 &a,
int b)
441 a.x += b; a.y += b; a.z += b; a.w += b;
444inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
446 return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
448inline __host__ __device__
void operator+=(uint4 &a, uint4 b)
450 a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
452inline __host__ __device__ uint4 operator+(uint4 a, uint b)
454 return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
456inline __host__ __device__ uint4 operator+(uint b, uint4 a)
458 return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
460inline __host__ __device__
void operator+=(uint4 &a, uint b)
462 a.x += b; a.y += b; a.z += b; a.w += b;
469inline __host__ __device__ float2 operator-(float2 a, float2 b)
471 return make_float2(a.x - b.x, a.y - b.y);
473inline __host__ __device__
void operator-=(float2 &a, float2 b)
475 a.x -= b.x; a.y -= b.y;
477inline __host__ __device__ float2 operator-(float2 a,
float b)
479 return make_float2(a.x - b, a.y - b);
481inline __host__ __device__ float2 operator-(
float b, float2 a)
483 return make_float2(b - a.x, b - a.y);
485inline __host__ __device__
void operator-=(float2 &a,
float b)
490inline __host__ __device__ int2 operator-(int2 a, int2 b)
492 return make_int2(a.x - b.x, a.y - b.y);
494inline __host__ __device__
void operator-=(int2 &a, int2 b)
496 a.x -= b.x; a.y -= b.y;
498inline __host__ __device__ int2 operator-(int2 a,
int b)
500 return make_int2(a.x - b, a.y - b);
502inline __host__ __device__ int2 operator-(
int b, int2 a)
504 return make_int2(b - a.x, b - a.y);
506inline __host__ __device__
void operator-=(int2 &a,
int b)
511inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
513 return make_uint2(a.x - b.x, a.y - b.y);
515inline __host__ __device__
void operator-=(uint2 &a, uint2 b)
517 a.x -= b.x; a.y -= b.y;
519inline __host__ __device__ uint2 operator-(uint2 a, uint b)
521 return make_uint2(a.x - b, a.y - b);
523inline __host__ __device__ uint2 operator-(uint b, uint2 a)
525 return make_uint2(b - a.x, b - a.y);
527inline __host__ __device__
void operator-=(uint2 &a, uint b)
532inline __host__ __device__ float3 operator-(float3 a, float3 b)
534 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
536inline __host__ __device__
void operator-=(float3 &a, float3 b)
538 a.x -= b.x; a.y -= b.y; a.z -= b.z;
540inline __host__ __device__ float3 operator-(float3 a,
float b)
542 return make_float3(a.x - b, a.y - b, a.z - b);
544inline __host__ __device__ float3 operator-(
float b, float3 a)
546 return make_float3(b - a.x, b - a.y, b - a.z);
548inline __host__ __device__
void operator-=(float3 &a,
float b)
550 a.x -= b; a.y -= b; a.z -= b;
553inline __host__ __device__ int3 operator-(int3 a, int3 b)
555 return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
557inline __host__ __device__
void operator-=(int3 &a, int3 b)
559 a.x -= b.x; a.y -= b.y; a.z -= b.z;
561inline __host__ __device__ int3 operator-(int3 a,
int b)
563 return make_int3(a.x - b, a.y - b, a.z - b);
565inline __host__ __device__ int3 operator-(
int b, int3 a)
567 return make_int3(b - a.x, b - a.y, b - a.z);
569inline __host__ __device__
void operator-=(int3 &a,
int b)
571 a.x -= b; a.y -= b; a.z -= b;
574inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
576 return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
578inline __host__ __device__
void operator-=(uint3 &a, uint3 b)
580 a.x -= b.x; a.y -= b.y; a.z -= b.z;
582inline __host__ __device__ uint3 operator-(uint3 a, uint b)
584 return make_uint3(a.x - b, a.y - b, a.z - b);
586inline __host__ __device__ uint3 operator-(uint b, uint3 a)
588 return make_uint3(b - a.x, b - a.y, b - a.z);
590inline __host__ __device__
void operator-=(uint3 &a, uint b)
592 a.x -= b; a.y -= b; a.z -= b;
595inline __host__ __device__ float4 operator-(float4 a, float4 b)
597 return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
599inline __host__ __device__
void operator-=(float4 &a, float4 b)
601 a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
603inline __host__ __device__ float4 operator-(float4 a,
float b)
605 return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
607inline __host__ __device__
void operator-=(float4 &a,
float b)
609 a.x -= b; a.y -= b; a.z -= b; a.w -= b;
612inline __host__ __device__ int4 operator-(int4 a, int4 b)
614 return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
616inline __host__ __device__
void operator-=(int4 &a, int4 b)
618 a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
620inline __host__ __device__ int4 operator-(int4 a,
int b)
622 return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
624inline __host__ __device__ int4 operator-(
int b, int4 a)
626 return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
628inline __host__ __device__
void operator-=(int4 &a,
int b)
630 a.x -= b; a.y -= b; a.z -= b; a.w -= b;
633inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
635 return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
637inline __host__ __device__
void operator-=(uint4 &a, uint4 b)
639 a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
641inline __host__ __device__ uint4 operator-(uint4 a, uint b)
643 return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
645inline __host__ __device__ uint4 operator-(uint b, uint4 a)
647 return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
649inline __host__ __device__
void operator-=(uint4 &a, uint b)
651 a.x -= b; a.y -= b; a.z -= b; a.w -= b;
658inline __host__ __device__ float2 operator*(float2 a, float2 b)
660 return make_float2(a.x * b.x, a.y * b.y);
662inline __host__ __device__
void operator*=(float2 &a, float2 b)
664 a.x *= b.x; a.y *= b.y;
666inline __host__ __device__ float2 operator*(float2 a,
float b)
668 return make_float2(a.x * b, a.y * b);
670inline __host__ __device__ float2 operator*(
float b, float2 a)
672 return make_float2(b * a.x, b * a.y);
674inline __host__ __device__
void operator*=(float2 &a,
float b)
679inline __host__ __device__ int2 operator*(int2 a, int2 b)
681 return make_int2(a.x * b.x, a.y * b.y);
683inline __host__ __device__
void operator*=(int2 &a, int2 b)
685 a.x *= b.x; a.y *= b.y;
687inline __host__ __device__ int2 operator*(int2 a,
int b)
689 return make_int2(a.x * b, a.y * b);
691inline __host__ __device__ int2 operator*(
int b, int2 a)
693 return make_int2(b * a.x, b * a.y);
695inline __host__ __device__
void operator*=(int2 &a,
int b)
700inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
702 return make_uint2(a.x * b.x, a.y * b.y);
704inline __host__ __device__
void operator*=(uint2 &a, uint2 b)
706 a.x *= b.x; a.y *= b.y;
708inline __host__ __device__ uint2 operator*(uint2 a, uint b)
710 return make_uint2(a.x * b, a.y * b);
712inline __host__ __device__ uint2 operator*(uint b, uint2 a)
714 return make_uint2(b * a.x, b * a.y);
716inline __host__ __device__
void operator*=(uint2 &a, uint b)
721inline __host__ __device__ float3 operator*(float3 a, float3 b)
723 return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
725inline __host__ __device__
void operator*=(float3 &a, float3 b)
727 a.x *= b.x; a.y *= b.y; a.z *= b.z;
729inline __host__ __device__ float3 operator*(float3 a,
float b)
731 return make_float3(a.x * b, a.y * b, a.z * b);
733inline __host__ __device__ float3 operator*(
float b, float3 a)
735 return make_float3(b * a.x, b * a.y, b * a.z);
737inline __host__ __device__
void operator*=(float3 &a,
float b)
739 a.x *= b; a.y *= b; a.z *= b;
742inline __host__ __device__ int3 operator*(int3 a, int3 b)
744 return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
746inline __host__ __device__
void operator*=(int3 &a, int3 b)
748 a.x *= b.x; a.y *= b.y; a.z *= b.z;
750inline __host__ __device__ int3 operator*(int3 a,
int b)
752 return make_int3(a.x * b, a.y * b, a.z * b);
754inline __host__ __device__ int3 operator*(
int b, int3 a)
756 return make_int3(b * a.x, b * a.y, b * a.z);
758inline __host__ __device__
void operator*=(int3 &a,
int b)
760 a.x *= b; a.y *= b; a.z *= b;
763inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
765 return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
767inline __host__ __device__
void operator*=(uint3 &a, uint3 b)
769 a.x *= b.x; a.y *= b.y; a.z *= b.z;
771inline __host__ __device__ uint3 operator*(uint3 a, uint b)
773 return make_uint3(a.x * b, a.y * b, a.z * b);
775inline __host__ __device__ uint3 operator*(uint b, uint3 a)
777 return make_uint3(b * a.x, b * a.y, b * a.z);
779inline __host__ __device__
void operator*=(uint3 &a, uint b)
781 a.x *= b; a.y *= b; a.z *= b;
784inline __host__ __device__ float4 operator*(float4 a, float4 b)
786 return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
788inline __host__ __device__
void operator*=(float4 &a, float4 b)
790 a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
792inline __host__ __device__ float4 operator*(float4 a,
float b)
794 return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
796inline __host__ __device__ float4 operator*(
float b, float4 a)
798 return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
800inline __host__ __device__
void operator*=(float4 &a,
float b)
802 a.x *= b; a.y *= b; a.z *= b; a.w *= b;
805inline __host__ __device__ int4 operator*(int4 a, int4 b)
807 return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
809inline __host__ __device__
void operator*=(int4 &a, int4 b)
811 a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
813inline __host__ __device__ int4 operator*(int4 a,
int b)
815 return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
817inline __host__ __device__ int4 operator*(
int b, int4 a)
819 return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
821inline __host__ __device__
void operator*=(int4 &a,
int b)
823 a.x *= b; a.y *= b; a.z *= b; a.w *= b;
826inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
828 return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
830inline __host__ __device__
void operator*=(uint4 &a, uint4 b)
832 a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
834inline __host__ __device__ uint4 operator*(uint4 a, uint b)
836 return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
838inline __host__ __device__ uint4 operator*(uint b, uint4 a)
840 return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
842inline __host__ __device__
void operator*=(uint4 &a, uint b)
844 a.x *= b; a.y *= b; a.z *= b; a.w *= b;
851inline __host__ __device__ float2 operator/(float2 a, float2 b)
853 return make_float2(a.x / b.x, a.y / b.y);
855inline __host__ __device__
void operator/=(float2 &a, float2 b)
857 a.x /= b.x; a.y /= b.y;
859inline __host__ __device__ float2 operator/(float2 a,
float b)
861 return make_float2(a.x / b, a.y / b);
863inline __host__ __device__
void operator/=(float2 &a,
float b)
867inline __host__ __device__ float2 operator/(
float b, float2 a)
869 return make_float2(b / a.x, b / a.y);
872inline __host__ __device__ float3 operator/(float3 a, float3 b)
874 return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
876inline __host__ __device__
void operator/=(float3 &a, float3 b)
878 a.x /= b.x; a.y /= b.y; a.z /= b.z;
880inline __host__ __device__ float3 operator/(float3 a,
float b)
882 return make_float3(a.x / b, a.y / b, a.z / b);
884inline __host__ __device__
void operator/=(float3 &a,
float b)
886 a.x /= b; a.y /= b; a.z /= b;
888inline __host__ __device__ float3 operator/(
float b, float3 a)
890 return make_float3(b / a.x, b / a.y, b / a.z);
893inline __host__ __device__ float4 operator/(float4 a, float4 b)
895 return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
897inline __host__ __device__
void operator/=(float4 &a, float4 b)
899 a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
901inline __host__ __device__ float4 operator/(float4 a,
float b)
903 return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
905inline __host__ __device__
void operator/=(float4 &a,
float b)
907 a.x /= b; a.y /= b; a.z /= b; a.w /= b;
909inline __host__ __device__ float4 operator/(
float b, float4 a){
910 return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
917inline __host__ __device__ float2 fminf(float2 a, float2 b)
919 return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
921inline __host__ __device__ float3 fminf(float3 a, float3 b)
923 return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
925inline __host__ __device__ float4 fminf(float4 a, float4 b)
927 return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
930inline __host__ __device__ int2 min(int2 a, int2 b)
932 return make_int2(min(a.x,b.x), min(a.y,b.y));
934inline __host__ __device__ int3 min(int3 a, int3 b)
936 return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
938inline __host__ __device__ int4 min(int4 a, int4 b)
940 return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
943inline __host__ __device__ uint2 min(uint2 a, uint2 b)
945 return make_uint2(min(a.x,b.x), min(a.y,b.y));
947inline __host__ __device__ uint3 min(uint3 a, uint3 b)
949 return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
951inline __host__ __device__ uint4 min(uint4 a, uint4 b)
953 return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
960inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
962 return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
964inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
966 return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
968inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
970 return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
973inline __host__ __device__ int2 max(int2 a, int2 b)
975 return make_int2(max(a.x,b.x), max(a.y,b.y));
977inline __host__ __device__ int3 max(int3 a, int3 b)
979 return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
981inline __host__ __device__ int4 max(int4 a, int4 b)
983 return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
986inline __host__ __device__ uint2 max(uint2 a, uint2 b)
988 return make_uint2(max(a.x,b.x), max(a.y,b.y));
990inline __host__ __device__ uint3 max(uint3 a, uint3 b)
992 return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
994inline __host__ __device__ uint4 max(uint4 a, uint4 b)
996 return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
1004#if !defined(__cplusplus) || (__cplusplus < 202002L)
1006inline __device__ __host__
float lerp(
float a,
float b,
float t)
1011inline __device__ __host__ float2 lerp(float2 a, float2 b,
float t)
1015inline __device__ __host__ float3 lerp(float3 a, float3 b,
float t)
1019inline __device__ __host__ float4 lerp(float4 a, float4 b,
float t)
1029inline __device__ __host__
float clamp(
float f,
float a,
float b)
1031 return fmaxf(a, fminf(f, b));
1033inline __device__ __host__
int clamp(
int f,
int a,
int b)
1035 return max(a, min(f, b));
1037inline __device__ __host__ uint clamp(uint f, uint a, uint b)
1039 return max(a, min(f, b));
1042inline __device__ __host__ float2 clamp(float2 v,
float a,
float b)
1044 return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
1046inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
1048 return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1050inline __device__ __host__ float3 clamp(float3 v,
float a,
float b)
1052 return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1054inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
1056 return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1058inline __device__ __host__ float4 clamp(float4 v,
float a,
float b)
1060 return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1062inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
1064 return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1067inline __device__ __host__ int2 clamp(int2 v,
int a,
int b)
1069 return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
1071inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
1073 return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1075inline __device__ __host__ int3 clamp(int3 v,
int a,
int b)
1077 return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1079inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
1081 return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1083inline __device__ __host__ int4 clamp(int4 v,
int a,
int b)
1085 return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1087inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
1089 return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1092inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
1094 return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
1096inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
1098 return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
1100inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
1102 return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
1104inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
1106 return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
1108inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
1110 return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
1112inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
1114 return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
1121inline __host__ __device__
float dot(float2 a, float2 b)
1123 return a.x * b.x + a.y * b.y;
1125inline __host__ __device__
float dot(float3 a, float3 b)
1127 return a.x * b.x + a.y * b.y + a.z * b.z;
1129inline __host__ __device__
float dot(float4 a, float4 b)
1131 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1134inline __host__ __device__
int dot(int2 a, int2 b)
1136 return a.x * b.x + a.y * b.y;
1138inline __host__ __device__
int dot(int3 a, int3 b)
1140 return a.x * b.x + a.y * b.y + a.z * b.z;
1142inline __host__ __device__
int dot(int4 a, int4 b)
1144 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1147inline __host__ __device__ uint dot(uint2 a, uint2 b)
1149 return a.x * b.x + a.y * b.y;
1151inline __host__ __device__ uint dot(uint3 a, uint3 b)
1153 return a.x * b.x + a.y * b.y + a.z * b.z;
1155inline __host__ __device__ uint dot(uint4 a, uint4 b)
1157 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
1164inline __host__ __device__
float length(float2 v)
1166 return sqrtf(dot(v, v));
1168inline __host__ __device__
float length(float3 v)
1170 return sqrtf(dot(v, v));
1172inline __host__ __device__
float length(float4 v)
1174 return sqrtf(dot(v, v));
1181inline __host__ __device__ float2 normalize(float2 v)
1183 float invLen = rsqrtf(dot(v, v));
1186inline __host__ __device__ float3 normalize(float3 v)
1188 float invLen = rsqrtf(dot(v, v));
1191inline __host__ __device__ float4 normalize(float4 v)
1193 float invLen = rsqrtf(dot(v, v));
1201inline __host__ __device__ float2 floorf(float2 v)
1203 return make_float2(floorf(v.x), floorf(v.y));
1205inline __host__ __device__ float3 floorf(float3 v)
1207 return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
1209inline __host__ __device__ float4 floorf(float4 v)
1211 return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
1218inline __host__ __device__
float fracf(
float v)
1220 return v - floorf(v);
1222inline __host__ __device__ float2 fracf(float2 v)
1224 return make_float2(fracf(v.x), fracf(v.y));
1226inline __host__ __device__ float3 fracf(float3 v)
1228 return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
1230inline __host__ __device__ float4 fracf(float4 v)
1232 return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
1239inline __host__ __device__ float2 fmodf(float2 a, float2 b)
1241 return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
1243inline __host__ __device__ float3 fmodf(float3 a, float3 b)
1245 return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
1247inline __host__ __device__ float4 fmodf(float4 a, float4 b)
1249 return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
1256inline __host__ __device__ float2 fabs(float2 v)
1258 return make_float2(fabs(v.x), fabs(v.y));
1260inline __host__ __device__ float3 fabs(float3 v)
1262 return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
1264inline __host__ __device__ float4 fabs(float4 v)
1266 return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
1269inline __host__ __device__ int2 abs(int2 v)
1271 return make_int2(abs(v.x), abs(v.y));
1273inline __host__ __device__ int3 abs(int3 v)
1275 return make_int3(abs(v.x), abs(v.y), abs(v.z));
1277inline __host__ __device__ int4 abs(int4 v)
1279 return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
1288inline __host__ __device__ float3 reflect(float3 i, float3 n)
1290 return i - 2.0f * n * dot(n,i);
1297inline __host__ __device__ float3 cross(float3 a, float3 b)
1299 return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
1309inline __device__ __host__
float smoothstep(
float a,
float b,
float x)
1311 float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1312 return (y*y*(3.0f - (2.0f*y)));
1314inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
1316 float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1317 return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
1319inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
1321 float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1322 return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
1324inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
1326 float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
1327 return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));