22 #ifndef __ND_DEVICE_BUFFER_H__
23 #define __ND_DEVICE_BUFFER_H__
26 #include <cuda_runtime.h>
27 #include <ndNewtonStdafx.h>
28 #include "cuIntrinsics.h"
37 ndInt32 GetCount()
const;
38 void SetCount(ndInt32 count);
41 void Resize(ndInt32 count);
42 ndInt32 GetCapacity()
const;
44 T& operator[] (ndInt32 i);
45 const T& operator[] (ndInt32 i)
const;
47 void ReadData(
const T*
const src, ndInt32 elements);
48 void WriteData(T*
const dst, ndInt32 elements)
const;
50 void ReadData(
const T*
const src, ndInt32 elements, cudaStream_t stream);
51 void WriteData(T*
const dst, ndInt32 elements, cudaStream_t stream)
const;
64 SetCount(D_GRANULARITY);
73 cudaError_t cudaStatus = cudaSuccess;
74 cudaStatus = cudaFree(m_array);
75 dAssert(cudaStatus == cudaSuccess);
76 if (cudaStatus != cudaSuccess)
108 while (count > m_capacity)
110 Resize(m_capacity * 2);
130 cudaError_t cudaStatus = cudaSuccess;
131 if (newSize > m_capacity || (m_capacity == 0))
134 newSize = dMax(newSize, D_GRANULARITY);
135 const ndInt32 itemSizeInBytes =
sizeof(T);
136 cudaStatus = cudaMalloc((
void**)&newArray, newSize * itemSizeInBytes);
137 dAssert(cudaStatus == cudaSuccess);
140 cudaStatus = cudaMemcpy(newArray, m_array, m_size * itemSizeInBytes, cudaMemcpyDeviceToDevice);
141 dAssert(cudaStatus == cudaSuccess);
142 cudaStatus = cudaFree(m_array);
143 dAssert(cudaStatus == cudaSuccess);
146 m_capacity = newSize;
148 else if (newSize < m_capacity)
151 const ndInt32 itemSizeInBytes =
sizeof(T);
152 newSize = dMax(newSize, D_GRANULARITY);
153 cudaStatus = cudaMalloc((
void**)&newArray, newSize * itemSizeInBytes);
156 cudaStatus = cudaMemcpy(newArray, m_array, newSize * itemSizeInBytes, cudaMemcpyDeviceToDevice);
157 cudaStatus = cudaFree(m_array);
158 dAssert(cudaStatus == cudaSuccess);
161 m_capacity = newSize;
164 if (cudaStatus != cudaSuccess)
173 dAssert(elements <= m_size);
174 cudaMemcpy(m_array, src,
sizeof (T) * elements, cudaMemcpyHostToDevice);
180 dAssert(elements <= m_size);
181 cudaMemcpy(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost);
187 dAssert(elements <= m_size);
188 cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src,
sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
189 dAssert(cudaStatus == cudaSuccess);
190 if (cudaStatus != cudaSuccess)
199 dAssert(elements <= m_size);
200 cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
201 dAssert(cudaStatus == cudaSuccess);
202 if (cudaStatus != cudaSuccess)