22 #ifndef __ND_CUDA_HOST_BUFFER_H__
23 #define __ND_CUDA_HOST_BUFFER_H__
26 #include <cuda_runtime.h>
27 #include "ndCudaIntrinsics.h"
37 void SetCount(
int count);
40 void Resize(
int count);
41 int GetCapacity()
const;
43 T& operator[] (
int i);
44 const T& operator[] (
int i)
const;
48 void ReadData(
const T*
const src,
int elements);
49 void WriteData(T*
const dst,
int elements)
const;
51 void ReadData(
const T*
const src,
int elements, cudaStream_t stream);
52 void WriteData(T*
const dst,
int elements, cudaStream_t stream)
const;
65 SetCount(D_GRANULARITY);
74 cudaError_t cudaStatus = cudaSuccess;
75 cudaStatus = cudaFreeHost(m_array);
76 ndAssert(cudaStatus == cudaSuccess);
77 if (cudaStatus != cudaSuccess)
109 while (count > m_capacity)
111 Resize(m_capacity * 2);
131 cuSwap(m_size, buffer.m_size);
132 cuSwap(m_array, buffer.m_array);
133 cuSwap(m_capacity, buffer.m_capacity);
139 cudaError_t cudaStatus = cudaSuccess;
140 if (newSize > m_capacity || (m_capacity == 0))
143 newSize = std::max(newSize, D_GRANULARITY);
144 cudaStatus = cudaMallocHost((
void**)&newArray, newSize *
sizeof(T));
145 ndAssert(cudaStatus == cudaSuccess);
148 cudaStatus = cudaMemcpy(newArray, m_array, m_size *
sizeof(T), cudaMemcpyDeviceToDevice);
149 ndAssert(cudaStatus == cudaSuccess);
150 cudaStatus = cudaFreeHost(m_array);
151 ndAssert(cudaStatus == cudaSuccess);
154 m_capacity = newSize;
156 else if (newSize < m_capacity)
159 newSize = std::max(newSize, D_GRANULARITY);
160 cudaStatus = cudaMallocHost((
void**)&newArray, newSize *
sizeof(T));
163 cudaStatus = cudaMemcpy(newArray, m_array, newSize *
sizeof(T), cudaMemcpyDeviceToDevice);
164 cudaStatus = cudaFreeHost(m_array);
165 ndAssert(cudaStatus == cudaSuccess);
168 m_capacity = newSize;
171 if (cudaStatus != cudaSuccess)
180 ndAssert(elements <= m_size);
181 cudaMemcpy(m_array, src,
sizeof (T) * elements, cudaMemcpyHostToDevice);
187 ndAssert(elements <= m_size);
188 cudaMemcpy(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost);
194 ndAssert(elements <= m_size);
195 cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src,
sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
196 ndAssert(cudaStatus == cudaSuccess);
197 if (cudaStatus != cudaSuccess)
206 ndAssert(elements <= m_size);
207 cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
208 ndAssert(cudaStatus == cudaSuccess);
209 if (cudaStatus != cudaSuccess)