Newton Dynamics  4.00
ndOpenclSystem.h
1 /* Copyright (c) <2003-2021> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #include "ndNewtonStdafx.h"
23 #include <CL/cl.h>
24 
25 #define D_USE_GPU_DEVICE
26 //#define D_DEBUG_GPU_KERNELS
27 
28 #define D_OPENCL_BUFFER_SIZE 1024
29 
30 
31 template<class T>
32 class ndOpenclBuffer: public ndArray<T>
33 {
34  public:
35  ndOpenclBuffer(cl_mem_flags flags);
36  ~ndOpenclBuffer();
37 
38  void Cleanup();
39  void SyncSize(cl_context context, ndInt32 size);
40  void ReadData(cl_command_queue commandQueue);
41  void WriteData(cl_command_queue commandQueue);
42 
43  cl_mem m_gpuBuffer;
44  cl_mem_flags m_flags;
45 };
46 
48 {
49  public:
50  typedef union
51  {
52  cl_float8 m_data;
53  struct
54  {
55  cl_float4 m_linear;
56  cl_float4 m_angular;
57  };
59 
62 
63  void Cleanup();
64  void Resize(cl_context context, const ndArray<ndInt32>& bodyArray);
65  void CopyToGpu(cl_command_queue commandQueue, const ndArray<ndInt32>& bodyArray);
66  void SetKernelParameters(cl_kernel kernel, ndFloat32 timestep, const ndArray<ndBodyKinematic*>& bodyArray);
67 
68 #ifdef D_DEBUG_GPU_KERNELS
69  dVector MakeQuat(const dVector& axis, float angle);
70  dVector MultiplyQuat(const dVector& r, const dVector& q);
71  dVector NormalizeQuat(const dVector& r);
72  void DebudKernel(ndFloat32 timestepIn, const ndArray<ndBodyKinematic*>& bodyArray);
73 #endif
74 
78 };
79 
81 {
82  public:
83  class ndKernel
84  {
85  public:
86  ndKernel()
87  :m_kernel(nullptr)
88  ,m_workWroupSize(0)
89  {
90  }
91 
92  cl_kernel m_kernel;
93  size_t m_workWroupSize;
94  };
95 
96  ndOpenclSystem(cl_context context, cl_platform_id);
97  ~ndOpenclSystem();
98 
99  void Finish();
100  cl_program CompileProgram();
101  void Resize(const ndArray<ndInt32>& bodyArray);
102  void CopyToGpu(const ndArray<ndInt32>& bodyArray);
103  void SetKernel(const char* const name, ndKernel& kerner);
104  void ExecuteIntegrateBodyPosition(ndFloat32 timestep, const ndArray<ndBodyKinematic*>& bodyArray);
105 
106  static ndOpenclSystem* Singleton(ndInt32 driveNumber);
107 
108  ndOpenclBodyBuffer m_bodyArray;
109  char m_platformName[128];
110 
111  // Regular OpenCL objects:
112  cl_context m_context; // hold the context handler
113  cl_device_id m_device; // hold the selected device handler
114  cl_program m_solverProgram; // hold the program handler
115  cl_command_queue m_commandQueue; // hold the commands-queue handler
116 
117  ndKernel m_integrateBodiesPosition;
118  ndKernel m_integrateBodiesVelocity;
119  ndKernel m_integrateUnconstrainedBodies;
120  static const char* m_kernelSource;
121  ndInt32 m_computeUnits;
122 };
123 
124 template<class T>
125 ndOpenclBuffer<T>::ndOpenclBuffer(cl_mem_flags flags)
126  :ndArray<T>()
127  ,m_gpuBuffer(nullptr)
128  ,m_flags(flags)
129 {
130 }
131 
132 template<class T>
134 {
135  ndAssert(!m_gpuBuffer);
136 }
137 
138 template<class T>
140 {
141  if (m_gpuBuffer)
142  {
143  cl_int err = CL_SUCCESS;
144  err = clReleaseMemObject(m_gpuBuffer);
145  ndAssert(err == CL_SUCCESS);
147  }
148  m_gpuBuffer = nullptr;
149 }
150 
151 template<class T>
152 void ndOpenclBuffer<T>::SyncSize(cl_context context, ndInt32 size)
153 {
154  cl_int err = CL_SUCCESS;
155 
156  if (m_gpuBuffer == nullptr)
157  {
158  if (m_flags & CL_MEM_USE_HOST_PTR)
159  {
160  ndAssert(0);
161  //void* const hostBuffer = &(*this)[0];
162  //m_gpuBuffer = clCreateBuffer(context, m_flags, sizeof(T) * ndArray<T>::GetCapacity(), hostBuffer, &err);
163  }
164  else
165  {
166  m_gpuBuffer = clCreateBuffer(context, m_flags, sizeof(T) * size, nullptr, &err);
167  }
168  ndAssert(err == CL_SUCCESS);
169  ndArray<T>::Resize(size);
170  }
171  else
172  {
173  ndAssert(0);
174  }
175 }
176 
177 template<class T>
178 void ndOpenclBuffer<T>::ReadData(cl_command_queue commandQueue)
179 {
180  cl_int err = CL_SUCCESS;
181  void* const destination = &(*this)[0];
182  err = clEnqueueReadBuffer(
183  commandQueue, m_gpuBuffer,
184  CL_FALSE, 0, sizeof(T) * ndArray<T>::GetCount(), destination,
185  0, nullptr, nullptr);
186  ndAssert(err == CL_SUCCESS);
187 }
188 
189 template<class T>
190 void ndOpenclBuffer<T>::WriteData(cl_command_queue commandQueue)
191 {
192  const void* const source = &(*this)[0];
193 
194  cl_int err = CL_SUCCESS;
195  err = clEnqueueWriteBuffer(
196  commandQueue, m_gpuBuffer,
197  CL_FALSE, 0, sizeof(T) * ndArray<T>::GetCount(), source,
198  0, nullptr, nullptr);
199  ndAssert(err == CL_SUCCESS);
200 }
201 
ndClassAlloc
Base class for providing memory allocation for all other engine classes.
Definition: ndClassAlloc.h:30
ndArray
Generic template vector.
Definition: ndArray.h:42
ndOpenclBodyBuffer
Definition: ndOpenclSystem.h:48
ndOpenclSystem
Definition: ndOpenclSystem.h:81
ndOpenclBuffer
Definition: ndOpenclSystem.h:33
cl_float8
Definition: cl_platform.h:1237
ndOpenclBodyBuffer::ndOpenclJacobian
Definition: ndOpenclSystem.h:51
cl_float4
Definition: cl_platform.h:1218
ndArray::Resize
void Resize(ndInt32 count)
Set a new size.
Definition: ndArray.h:224
ndOpenclSystem::ndKernel
Definition: ndOpenclSystem.h:84