Newton Dynamics  4.00
ndCudaDeviceBuffer.h
1 /* Copyright (c) <2003-2021> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_CUDA_DEVICE_BUFFER_H__
23 #define __ND_CUDA_DEVICE_BUFFER_H__
24 
25 #include <cuda.h>
26 #include <cuda_runtime.h>
27 #include "ndCudaIntrinsics.h"
28 
29 template<class T>
31 {
32  public:
35 
36  int GetCount() const;
37  void SetCount(int count);
38 
39  void Clear();
40  void Resize(int count);
41  int GetCapacity() const;
42 
43  T& operator[] (int i);
44  const T& operator[] (int i) const;
45 
46  void ReadData(const T* const src, int elements);
47  void WriteData(T* const dst, int elements) const;
48 
49  void ReadData(const T* const src, int elements, cudaStream_t stream);
50  void WriteData(T* const dst, int elements, cudaStream_t stream) const;
51 
52  T* m_array;
53  int m_size;
54  int m_capacity;
55 };
56 
57 template<class T>
59  :m_array(nullptr)
60  ,m_size(0)
61  ,m_capacity(0)
62 {
63  SetCount(D_GRANULARITY);
64  SetCount(0);
65 }
66 
67 template<class T>
69 {
70  if (m_array)
71  {
72  cudaError_t cudaStatus = cudaSuccess;
73  cudaStatus = cudaFree(m_array);
74  ndAssert(cudaStatus == cudaSuccess);
75  if (cudaStatus != cudaSuccess)
76  {
77  ndAssert(0);
78  }
79  }
80 }
81 
82 template<class T>
83 const T& ndCudaDeviceBuffer<T>::operator[] (int i) const
84 {
85  ndAssert(i >= 0);
86  ndAssert(i < m_size);
87  return m_array[i];
88 }
89 
90 template<class T>
92 {
93  ndAssert(i >= 0);
94  ndAssert(i < m_size);
95  return m_array[i];
96 }
97 
98 template<class T>
100 {
101  return m_size;
102 }
103 
104 template<class T>
105 void ndCudaDeviceBuffer<T>::SetCount(int count)
106 {
107  while (count > m_capacity)
108  {
109  Resize(m_capacity * 2);
110  }
111  m_size = count;
112 }
113 
114 template<class T>
116 {
117  return m_capacity;
118 }
119 
120 template<class T>
122 {
123  m_size = 0;
124 }
125 
126 template<class T>
127 void ndCudaDeviceBuffer<T>::Resize(int newSize)
128 {
129  cudaError_t cudaStatus = cudaSuccess;
130  if (newSize > m_capacity || (m_capacity == 0))
131  {
132  T* newArray;
133  newSize = std::max(newSize, D_GRANULARITY);
134  const int itemSizeInBytes = sizeof(T);
135  cudaStatus = cudaMalloc((void**)&newArray, newSize * itemSizeInBytes);
136  ndAssert(cudaStatus == cudaSuccess);
137  if (m_array)
138  {
139  cudaStatus = cudaMemcpy(newArray, m_array, m_size * itemSizeInBytes, cudaMemcpyDeviceToDevice);
140  ndAssert(cudaStatus == cudaSuccess);
141  cudaStatus = cudaFree(m_array);
142  ndAssert(cudaStatus == cudaSuccess);
143  }
144  m_array = newArray;
145  m_capacity = newSize;
146  }
147  else if (newSize < m_capacity)
148  {
149  T* newArray;
150  const int itemSizeInBytes = sizeof(T);
151  newSize = std::max(newSize, D_GRANULARITY);
152  cudaStatus = cudaMalloc((void**)&newArray, newSize * itemSizeInBytes);
153  if (m_array)
154  {
155  cudaStatus = cudaMemcpy(newArray, m_array, newSize * itemSizeInBytes, cudaMemcpyDeviceToDevice);
156  cudaStatus = cudaFree(m_array);
157  ndAssert(cudaStatus == cudaSuccess);
158  }
159 
160  m_capacity = newSize;
161  m_array = newArray;
162  }
163  if (cudaStatus != cudaSuccess)
164  {
165  ndAssert(0);
166  }
167 }
168 
169 template<class T>
170 void ndCudaDeviceBuffer<T>::ReadData(const T* const src, int elements)
171 {
172  ndAssert(elements <= m_size);
173  cudaMemcpy(m_array, src, sizeof (T) * elements, cudaMemcpyHostToDevice);
174 }
175 
176 template<class T>
177 void ndCudaDeviceBuffer<T>::WriteData(T* const dst, int elements) const
178 {
179  ndAssert(elements <= m_size);
180  cudaMemcpy(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost);
181 }
182 
183 template<class T>
184 void ndCudaDeviceBuffer<T>::ReadData(const T* const src, int elements, cudaStream_t stream)
185 {
186  ndAssert(elements <= m_size);
187  cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src, sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
188  ndAssert(cudaStatus == cudaSuccess);
189  if (cudaStatus != cudaSuccess)
190  {
191  ndAssert(0);
192  }
193 }
194 
195 template<class T>
196 void ndCudaDeviceBuffer<T>::WriteData(T* const dst, int elements, cudaStream_t stream) const
197 {
198  ndAssert(elements <= m_size);
199  cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
200  ndAssert(cudaStatus == cudaSuccess);
201  if (cudaStatus != cudaSuccess)
202  {
203  ndAssert(0);
204  }
205 }
206 
207 #endif
ndCudaDeviceBuffer
Definition: ndCudaDeviceBuffer.h:31