Newton Dynamics  4.00
cuDeviceBuffer.h
1 /* Copyright (c) <2003-2021> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_DEVICE_BUFFER_H__
23 #define __ND_DEVICE_BUFFER_H__
24 
25 #include <cuda.h>
26 #include <cuda_runtime.h>
27 #include <ndNewtonStdafx.h>
28 #include "cuIntrinsics.h"
29 
30 template<class T>
32 {
33  public:
35  ~cuDeviceBuffer();
36 
37  ndInt32 GetCount() const;
38  void SetCount(ndInt32 count);
39 
40  void Clear();
41  void Resize(ndInt32 count);
42  ndInt32 GetCapacity() const;
43 
44  T& operator[] (ndInt32 i);
45  const T& operator[] (ndInt32 i) const;
46 
47  void ReadData(const T* const src, ndInt32 elements);
48  void WriteData(T* const dst, ndInt32 elements) const;
49 
50  void ReadData(const T* const src, ndInt32 elements, cudaStream_t stream);
51  void WriteData(T* const dst, ndInt32 elements, cudaStream_t stream) const;
52 
53  T* m_array;
54  ndInt32 m_size;
55  ndInt32 m_capacity;
56 };
57 
58 template<class T>
60  :m_array(nullptr)
61  ,m_size(0)
62  ,m_capacity(0)
63 {
64  SetCount(D_GRANULARITY);
65  SetCount(0);
66 }
67 
68 template<class T>
70 {
71  if (m_array)
72  {
73  cudaError_t cudaStatus = cudaSuccess;
74  cudaStatus = cudaFree(m_array);
75  dAssert(cudaStatus == cudaSuccess);
76  if (cudaStatus != cudaSuccess)
77  {
78  dAssert(0);
79  }
80  }
81 }
82 
83 template<class T>
84 const T& cuDeviceBuffer<T>::operator[] (ndInt32 i) const
85 {
86  dAssert(i >= 0);
87  dAssert(i < m_size);
88  return m_array[i];
89 }
90 
91 template<class T>
93 {
94  dAssert(i >= 0);
95  dAssert(i < m_size);
96  return m_array[i];
97 }
98 
99 template<class T>
100 ndInt32 cuDeviceBuffer<T>::GetCount() const
101 {
102  return m_size;
103 }
104 
105 template<class T>
106 void cuDeviceBuffer<T>::SetCount(ndInt32 count)
107 {
108  while (count > m_capacity)
109  {
110  Resize(m_capacity * 2);
111  }
112  m_size = count;
113 }
114 
115 template<class T>
116 ndInt32 cuDeviceBuffer<T>::GetCapacity() const
117 {
118  return m_capacity;
119 }
120 
121 template<class T>
123 {
124  m_size = 0;
125 }
126 
127 template<class T>
128 void cuDeviceBuffer<T>::Resize(ndInt32 newSize)
129 {
130  cudaError_t cudaStatus = cudaSuccess;
131  if (newSize > m_capacity || (m_capacity == 0))
132  {
133  T* newArray;
134  newSize = dMax(newSize, D_GRANULARITY);
135  const ndInt32 itemSizeInBytes = sizeof(T);
136  cudaStatus = cudaMalloc((void**)&newArray, newSize * itemSizeInBytes);
137  dAssert(cudaStatus == cudaSuccess);
138  if (m_array)
139  {
140  cudaStatus = cudaMemcpy(newArray, m_array, m_size * itemSizeInBytes, cudaMemcpyDeviceToDevice);
141  dAssert(cudaStatus == cudaSuccess);
142  cudaStatus = cudaFree(m_array);
143  dAssert(cudaStatus == cudaSuccess);
144  }
145  m_array = newArray;
146  m_capacity = newSize;
147  }
148  else if (newSize < m_capacity)
149  {
150  T* newArray;
151  const ndInt32 itemSizeInBytes = sizeof(T);
152  newSize = dMax(newSize, D_GRANULARITY);
153  cudaStatus = cudaMalloc((void**)&newArray, newSize * itemSizeInBytes);
154  if (m_array)
155  {
156  cudaStatus = cudaMemcpy(newArray, m_array, newSize * itemSizeInBytes, cudaMemcpyDeviceToDevice);
157  cudaStatus = cudaFree(m_array);
158  dAssert(cudaStatus == cudaSuccess);
159  }
160 
161  m_capacity = newSize;
162  m_array = newArray;
163  }
164  if (cudaStatus != cudaSuccess)
165  {
166  dAssert(0);
167  }
168 }
169 
170 template<class T>
171 void cuDeviceBuffer<T>::ReadData(const T* const src, ndInt32 elements)
172 {
173  dAssert(elements <= m_size);
174  cudaMemcpy(m_array, src, sizeof (T) * elements, cudaMemcpyHostToDevice);
175 }
176 
177 template<class T>
178 void cuDeviceBuffer<T>::WriteData(T* const dst, ndInt32 elements) const
179 {
180  dAssert(elements <= m_size);
181  cudaMemcpy(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost);
182 }
183 
184 template<class T>
185 void cuDeviceBuffer<T>::ReadData(const T* const src, ndInt32 elements, cudaStream_t stream)
186 {
187  dAssert(elements <= m_size);
188  cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src, sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
189  dAssert(cudaStatus == cudaSuccess);
190  if (cudaStatus != cudaSuccess)
191  {
192  dAssert(0);
193  }
194 }
195 
196 template<class T>
197 void cuDeviceBuffer<T>::WriteData(T* const dst, ndInt32 elements, cudaStream_t stream) const
198 {
199  dAssert(elements <= m_size);
200  cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
201  dAssert(cudaStatus == cudaSuccess);
202  if (cudaStatus != cudaSuccess)
203  {
204  dAssert(0);
205  }
206 }
207 
208 #endif
cuDeviceBuffer
Definition: cuDeviceBuffer.h:32