22 #ifndef __ND_CUDA_DEVICE_BUFFER_H__
23 #define __ND_CUDA_DEVICE_BUFFER_H__
26 #include <cuda_runtime.h>
27 #include "ndCudaIntrinsics.h"
37 void SetCount(
int count);
40 void Resize(
int count);
41 int GetCapacity()
const;
43 T& operator[] (
int i);
44 const T& operator[] (
int i)
const;
46 void ReadData(
const T*
const src,
int elements);
47 void WriteData(T*
const dst,
int elements)
const;
49 void ReadData(
const T*
const src,
int elements, cudaStream_t stream);
50 void WriteData(T*
const dst,
int elements, cudaStream_t stream)
const;
63 SetCount(D_GRANULARITY);
72 cudaError_t cudaStatus = cudaSuccess;
73 cudaStatus = cudaFree(m_array);
74 ndAssert(cudaStatus == cudaSuccess);
75 if (cudaStatus != cudaSuccess)
107 while (count > m_capacity)
109 Resize(m_capacity * 2);
129 cudaError_t cudaStatus = cudaSuccess;
130 if (newSize > m_capacity || (m_capacity == 0))
133 newSize = std::max(newSize, D_GRANULARITY);
134 const int itemSizeInBytes =
sizeof(T);
135 cudaStatus = cudaMalloc((
void**)&newArray, newSize * itemSizeInBytes);
136 ndAssert(cudaStatus == cudaSuccess);
139 cudaStatus = cudaMemcpy(newArray, m_array, m_size * itemSizeInBytes, cudaMemcpyDeviceToDevice);
140 ndAssert(cudaStatus == cudaSuccess);
141 cudaStatus = cudaFree(m_array);
142 ndAssert(cudaStatus == cudaSuccess);
145 m_capacity = newSize;
147 else if (newSize < m_capacity)
150 const int itemSizeInBytes =
sizeof(T);
151 newSize = std::max(newSize, D_GRANULARITY);
152 cudaStatus = cudaMalloc((
void**)&newArray, newSize * itemSizeInBytes);
155 cudaStatus = cudaMemcpy(newArray, m_array, newSize * itemSizeInBytes, cudaMemcpyDeviceToDevice);
156 cudaStatus = cudaFree(m_array);
157 ndAssert(cudaStatus == cudaSuccess);
160 m_capacity = newSize;
163 if (cudaStatus != cudaSuccess)
172 ndAssert(elements <= m_size);
173 cudaMemcpy(m_array, src,
sizeof (T) * elements, cudaMemcpyHostToDevice);
179 ndAssert(elements <= m_size);
180 cudaMemcpy(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost);
186 ndAssert(elements <= m_size);
187 cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src,
sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
188 ndAssert(cudaStatus == cudaSuccess);
189 if (cudaStatus != cudaSuccess)
198 ndAssert(elements <= m_size);
199 cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
200 ndAssert(cudaStatus == cudaSuccess);
201 if (cudaStatus != cudaSuccess)