Newton Dynamics  4.00
ndCudaHostBuffer.h
1 /* Copyright (c) <2003-2021> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_CUDA_HOST_BUFFER_H__
23 #define __ND_CUDA_HOST_BUFFER_H__
24 
25 #include <cuda.h>
26 #include <cuda_runtime.h>
27 #include "ndCudaIntrinsics.h"
28 
29 template<class T>
31 {
32  public:
35 
36  int GetCount() const;
37  void SetCount(int count);
38 
39  void Clear();
40  void Resize(int count);
41  int GetCapacity() const;
42 
43  T& operator[] (int i);
44  const T& operator[] (int i) const;
45 
46  void Swap(ndCudaHostBuffer& buffer);
47 
48  void ReadData(const T* const src, int elements);
49  void WriteData(T* const dst, int elements) const;
50 
51  void ReadData(const T* const src, int elements, cudaStream_t stream);
52  void WriteData(T* const dst, int elements, cudaStream_t stream) const;
53 
54  T* m_array;
55  int m_size;
56  int m_capacity;
57 };
58 
59 template<class T>
61  :m_array(nullptr)
62  ,m_size(0)
63  ,m_capacity(0)
64 {
65  SetCount(D_GRANULARITY);
66  SetCount(0);
67 }
68 
69 template<class T>
71 {
72  if (m_array)
73  {
74  cudaError_t cudaStatus = cudaSuccess;
75  cudaStatus = cudaFreeHost(m_array);
76  ndAssert(cudaStatus == cudaSuccess);
77  if (cudaStatus != cudaSuccess)
78  {
79  ndAssert(0);
80  }
81  }
82 }
83 
84 template<class T>
85 const T& ndCudaHostBuffer<T>::operator[] (int i) const
86 {
87  ndAssert(i >= 0);
88  ndAssert(i < m_size);
89  return m_array[i];
90 }
91 
92 template<class T>
94 {
95  ndAssert(i >= 0);
96  ndAssert(i < m_size);
97  return m_array[i];
98 }
99 
100 template<class T>
102 {
103  return m_size;
104 }
105 
106 template<class T>
107 void ndCudaHostBuffer<T>::SetCount(int count)
108 {
109  while (count > m_capacity)
110  {
111  Resize(m_capacity * 2);
112  }
113  m_size = count;
114 }
115 
116 template<class T>
118 {
119  return m_capacity;
120 }
121 
122 template<class T>
124 {
125  m_size = 0;
126 }
127 
128 template<class T>
130 {
131  cuSwap(m_size, buffer.m_size);
132  cuSwap(m_array, buffer.m_array);
133  cuSwap(m_capacity, buffer.m_capacity);
134 }
135 
136 template<class T>
137 void ndCudaHostBuffer<T>::Resize(int newSize)
138 {
139  cudaError_t cudaStatus = cudaSuccess;
140  if (newSize > m_capacity || (m_capacity == 0))
141  {
142  T* newArray;
143  newSize = std::max(newSize, D_GRANULARITY);
144  cudaStatus = cudaMallocHost((void**)&newArray, newSize * sizeof(T));
145  ndAssert(cudaStatus == cudaSuccess);
146  if (m_array)
147  {
148  cudaStatus = cudaMemcpy(newArray, m_array, m_size * sizeof(T), cudaMemcpyDeviceToDevice);
149  ndAssert(cudaStatus == cudaSuccess);
150  cudaStatus = cudaFreeHost(m_array);
151  ndAssert(cudaStatus == cudaSuccess);
152  }
153  m_array = newArray;
154  m_capacity = newSize;
155  }
156  else if (newSize < m_capacity)
157  {
158  T* newArray;
159  newSize = std::max(newSize, D_GRANULARITY);
160  cudaStatus = cudaMallocHost((void**)&newArray, newSize * sizeof(T));
161  if (m_array)
162  {
163  cudaStatus = cudaMemcpy(newArray, m_array, newSize * sizeof(T), cudaMemcpyDeviceToDevice);
164  cudaStatus = cudaFreeHost(m_array);
165  ndAssert(cudaStatus == cudaSuccess);
166  }
167 
168  m_capacity = newSize;
169  m_array = newArray;
170  }
171  if (cudaStatus != cudaSuccess)
172  {
173  ndAssert(0);
174  }
175 }
176 
177 template<class T>
178 void ndCudaHostBuffer<T>::ReadData(const T* const src, int elements)
179 {
180  ndAssert(elements <= m_size);
181  cudaMemcpy(m_array, src, sizeof (T) * elements, cudaMemcpyHostToDevice);
182 }
183 
184 template<class T>
185 void ndCudaHostBuffer<T>::WriteData(T* const dst, int elements) const
186 {
187  ndAssert(elements <= m_size);
188  cudaMemcpy(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost);
189 }
190 
191 template<class T>
192 void ndCudaHostBuffer<T>::ReadData(const T* const src, int elements, cudaStream_t stream)
193 {
194  ndAssert(elements <= m_size);
195  cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src, sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
196  ndAssert(cudaStatus == cudaSuccess);
197  if (cudaStatus != cudaSuccess)
198  {
199  ndAssert(0);
200  }
201 }
202 
203 template<class T>
204 void ndCudaHostBuffer<T>::WriteData(T* const dst, int elements, cudaStream_t stream) const
205 {
206  ndAssert(elements <= m_size);
207  cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
208  ndAssert(cudaStatus == cudaSuccess);
209  if (cudaStatus != cudaSuccess)
210  {
211  ndAssert(0);
212  }
213 }
214 
215 #endif
ndCudaHostBuffer
Definition: ndCudaHostBuffer.h:31