Newton Dynamics  4.00
cuHostBuffer.h
1 /* Copyright (c) <2003-2021> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_HOST_BUFFER_H__
23 #define __ND_HOST_BUFFER_H__
24 
25 #include <cuda.h>
26 #include <cuda_runtime.h>
27 #include <ndNewtonStdafx.h>
28 #include "cuIntrinsics.h"
29 
30 template<class T>
32 {
33  public:
34  cuHostBuffer();
35  ~cuHostBuffer();
36 
37  ndInt32 GetCount() const;
38  void SetCount(ndInt32 count);
39 
40  void Clear();
41  void Resize(ndInt32 count);
42  ndInt32 GetCapacity() const;
43 
44  T& operator[] (ndInt32 i);
45  const T& operator[] (ndInt32 i) const;
46 
47  void Swap(cuHostBuffer& buffer);
48 
49  void ReadData(const T* const src, ndInt32 elements);
50  void WriteData(T* const dst, ndInt32 elements) const;
51 
52  void ReadData(const T* const src, ndInt32 elements, cudaStream_t stream);
53  void WriteData(T* const dst, ndInt32 elements, cudaStream_t stream) const;
54 
55  T* m_array;
56  ndInt32 m_size;
57  ndInt32 m_capacity;
58 };
59 
60 template<class T>
62  :m_array(nullptr)
63  ,m_size(0)
64  ,m_capacity(0)
65 {
66  SetCount(D_GRANULARITY);
67  SetCount(0);
68 }
69 
70 template<class T>
72 {
73  if (m_array)
74  {
75  cudaError_t cudaStatus = cudaSuccess;
76  cudaStatus = cudaFreeHost(m_array);
77  dAssert(cudaStatus == cudaSuccess);
78  if (cudaStatus != cudaSuccess)
79  {
80  dAssert(0);
81  }
82  }
83 }
84 
85 template<class T>
86 const T& cuHostBuffer<T>::operator[] (ndInt32 i) const
87 {
88  dAssert(i >= 0);
89  dAssert(i < m_size);
90  return m_array[i];
91 }
92 
93 template<class T>
94 T& cuHostBuffer<T>::operator[] (ndInt32 i)
95 {
96  dAssert(i >= 0);
97  dAssert(i < m_size);
98  return m_array[i];
99 }
100 
101 template<class T>
102 ndInt32 cuHostBuffer<T>::GetCount() const
103 {
104  return m_size;
105 }
106 
107 template<class T>
108 void cuHostBuffer<T>::SetCount(ndInt32 count)
109 {
110  while (count > m_capacity)
111  {
112  Resize(m_capacity * 2);
113  }
114  m_size = count;
115 }
116 
117 template<class T>
118 ndInt32 cuHostBuffer<T>::GetCapacity() const
119 {
120  return m_capacity;
121 }
122 
123 template<class T>
125 {
126  m_size = 0;
127 }
128 
129 template<class T>
131 {
132  dSwap(m_size, buffer.m_size);
133  dSwap(m_array, buffer.m_array);
134  dSwap(m_capacity, buffer.m_capacity);
135 }
136 
137 template<class T>
138 void cuHostBuffer<T>::Resize(ndInt32 newSize)
139 {
140  cudaError_t cudaStatus = cudaSuccess;
141  if (newSize > m_capacity || (m_capacity == 0))
142  {
143  T* newArray;
144  newSize = dMax(newSize, D_GRANULARITY);
145  cudaStatus = cudaMallocHost((void**)&newArray, newSize * sizeof(T));
146  dAssert(cudaStatus == cudaSuccess);
147  if (m_array)
148  {
149  cudaStatus = cudaMemcpy(newArray, m_array, m_size * sizeof(T), cudaMemcpyDeviceToDevice);
150  dAssert(cudaStatus == cudaSuccess);
151  cudaStatus = cudaFreeHost(m_array);
152  dAssert(cudaStatus == cudaSuccess);
153  }
154  m_array = newArray;
155  m_capacity = newSize;
156  }
157  else if (newSize < m_capacity)
158  {
159  T* newArray;
160  newSize = dMax(newSize, D_GRANULARITY);
161  cudaStatus = cudaMallocHost((void**)&newArray, newSize * sizeof(T));
162  if (m_array)
163  {
164  cudaStatus = cudaMemcpy(newArray, m_array, newSize * sizeof(T), cudaMemcpyDeviceToDevice);
165  cudaStatus = cudaFreeHost(m_array);
166  dAssert(cudaStatus == cudaSuccess);
167  }
168 
169  m_capacity = newSize;
170  m_array = newArray;
171  }
172  if (cudaStatus != cudaSuccess)
173  {
174  dAssert(0);
175  }
176 }
177 
178 template<class T>
179 void cuHostBuffer<T>::ReadData(const T* const src, ndInt32 elements)
180 {
181  dAssert(elements <= m_size);
182  cudaMemcpy(m_array, src, sizeof (T) * elements, cudaMemcpyHostToDevice);
183 }
184 
185 template<class T>
186 void cuHostBuffer<T>::WriteData(T* const dst, ndInt32 elements) const
187 {
188  dAssert(elements <= m_size);
189  cudaMemcpy(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost);
190 }
191 
192 template<class T>
193 void cuHostBuffer<T>::ReadData(const T* const src, ndInt32 elements, cudaStream_t stream)
194 {
195  dAssert(elements <= m_size);
196  cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src, sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
197  dAssert(cudaStatus == cudaSuccess);
198  if (cudaStatus != cudaSuccess)
199  {
200  dAssert(0);
201  }
202 }
203 
204 template<class T>
205 void cuHostBuffer<T>::WriteData(T* const dst, ndInt32 elements, cudaStream_t stream) const
206 {
207  dAssert(elements <= m_size);
208  cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array, sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
209  dAssert(cudaStatus == cudaSuccess);
210  if (cudaStatus != cudaSuccess)
211  {
212  dAssert(0);
213  }
214 }
215 
216 #endif
cuHostBuffer
Definition: cuHostBuffer.h:32