22 #ifndef __ND_CUDA_INTRINSICS_H__
23 #define __ND_CUDA_INTRINSICS_H__
26 #include <cuda_runtime.h>
28 #define D_GRANULARITY (1024 * 256)
31 inline T __device__ __host__ cuAbs(T A)
37 inline T __device__ __host__ cuFloor(T A)
43 inline T __device__ __host__ cuMax(T A, T B)
49 inline T __device__ __host__ cuMin(T A, T B)
55 inline T __device__ __host__ cuSelect(
bool test, T A, T B)
61 inline void __device__ __host__ cuSwap(T& A, T& B)