22 #ifndef __ND_HOST_BUFFER_H__
23 #define __ND_HOST_BUFFER_H__
26 #include <cuda_runtime.h>
27 #include <ndNewtonStdafx.h>
28 #include "cuIntrinsics.h"
37 ndInt32 GetCount()
const;
38 void SetCount(ndInt32 count);
41 void Resize(ndInt32 count);
42 ndInt32 GetCapacity()
const;
44 T& operator[] (ndInt32 i);
45 const T& operator[] (ndInt32 i)
const;
49 void ReadData(
const T*
const src, ndInt32 elements);
50 void WriteData(T*
const dst, ndInt32 elements)
const;
52 void ReadData(
const T*
const src, ndInt32 elements, cudaStream_t stream);
53 void WriteData(T*
const dst, ndInt32 elements, cudaStream_t stream)
const;
66 SetCount(D_GRANULARITY);
75 cudaError_t cudaStatus = cudaSuccess;
76 cudaStatus = cudaFreeHost(m_array);
77 dAssert(cudaStatus == cudaSuccess);
78 if (cudaStatus != cudaSuccess)
110 while (count > m_capacity)
112 Resize(m_capacity * 2);
132 dSwap(m_size, buffer.m_size);
133 dSwap(m_array, buffer.m_array);
134 dSwap(m_capacity, buffer.m_capacity);
140 cudaError_t cudaStatus = cudaSuccess;
141 if (newSize > m_capacity || (m_capacity == 0))
144 newSize = dMax(newSize, D_GRANULARITY);
145 cudaStatus = cudaMallocHost((
void**)&newArray, newSize *
sizeof(T));
146 dAssert(cudaStatus == cudaSuccess);
149 cudaStatus = cudaMemcpy(newArray, m_array, m_size *
sizeof(T), cudaMemcpyDeviceToDevice);
150 dAssert(cudaStatus == cudaSuccess);
151 cudaStatus = cudaFreeHost(m_array);
152 dAssert(cudaStatus == cudaSuccess);
155 m_capacity = newSize;
157 else if (newSize < m_capacity)
160 newSize = dMax(newSize, D_GRANULARITY);
161 cudaStatus = cudaMallocHost((
void**)&newArray, newSize *
sizeof(T));
164 cudaStatus = cudaMemcpy(newArray, m_array, newSize *
sizeof(T), cudaMemcpyDeviceToDevice);
165 cudaStatus = cudaFreeHost(m_array);
166 dAssert(cudaStatus == cudaSuccess);
169 m_capacity = newSize;
172 if (cudaStatus != cudaSuccess)
181 dAssert(elements <= m_size);
182 cudaMemcpy(m_array, src,
sizeof (T) * elements, cudaMemcpyHostToDevice);
188 dAssert(elements <= m_size);
189 cudaMemcpy(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost);
195 dAssert(elements <= m_size);
196 cudaError_t cudaStatus = cudaMemcpyAsync(m_array, src,
sizeof(T) * elements, cudaMemcpyHostToDevice, stream);
197 dAssert(cudaStatus == cudaSuccess);
198 if (cudaStatus != cudaSuccess)
207 dAssert(elements <= m_size);
208 cudaError_t cudaStatus = cudaMemcpyAsync(dst, m_array,
sizeof(T) * elements, cudaMemcpyDeviceToHost, stream);
209 dAssert(cudaStatus == cudaSuccess);
210 if (cudaStatus != cudaSuccess)