Newton Dynamics  4.00
ndSort.h
1 /* Copyright (c) <2003-2022> <Julio Jerez, Newton Game Dynamics>
2 *
3 * This software is provided 'as-is', without any express or implied
4 * warranty. In no event will the authors be held liable for any damages
5 * arising from the use of this software.
6 *
7 * Permission is granted to anyone to use this software for any purpose,
8 * including commercial applications, and to alter it and redistribute it
9 * freely, subject to the following restrictions:
10 *
11 * 1. The origin of this software must not be misrepresented; you must not
12 * claim that you wrote the original software. If you use this software
13 * in a product, an acknowledgment in the product documentation would be
14 * appreciated but is not required.
15 *
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 *
19 * 3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #ifndef __ND_SORT_H__
23 #define __ND_SORT_H__
24 
25 #include "ndCoreStdafx.h"
26 #include "ndArray.h"
27 #include "ndProfiler.h"
28 #include "ndThreadPool.h"
29 
30 template <class T, class dCompareKey>
31 void ndSort(T* const array, ndInt32 elements, void* const context)
32 {
33  //D_TRACKTIME();
34  const ndInt32 batchSize = 8;
35  ndInt32 stack[128][2];
36 
37  stack[0][0] = 0;
38  stack[0][1] = elements - 1;
39  ndInt32 stackIndex = 1;
40  const dCompareKey comparator;
41  while (stackIndex)
42  {
43  stackIndex--;
44  ndInt32 lo = stack[stackIndex][0];
45  ndInt32 hi = stack[stackIndex][1];
46  if ((hi - lo) > batchSize)
47  {
48  ndInt32 mid = (lo + hi) >> 1;
49  if (comparator.Compare(array[lo], array[mid], context) > 0)
50  {
51  ndSwap(array[lo], array[mid]);
52  }
53  if (comparator.Compare(array[mid], array[hi], context) > 0)
54  {
55  ndSwap(array[mid], array[hi]);
56  }
57  if (comparator.Compare(array[lo], array[mid], context) > 0)
58  {
59  ndSwap(array[lo], array[mid]);
60  }
61  ndInt32 i = lo + 1;
62  ndInt32 j = hi - 1;
63  const T pivot(array[mid]);
64  do
65  {
66  while (comparator.Compare(array[i], pivot, context) < 0)
67  {
68  i++;
69  }
70  while (comparator.Compare(array[j], pivot, context) > 0)
71  {
72  j--;
73  }
74 
75  if (i <= j)
76  {
77  ndSwap(array[i], array[j]);
78  i++;
79  j--;
80  }
81  } while (i <= j);
82 
83  if (i < hi)
84  {
85  stack[stackIndex][0] = i;
86  stack[stackIndex][1] = hi;
87  stackIndex++;
88  }
89  if (lo < j)
90  {
91  stack[stackIndex][0] = lo;
92  stack[stackIndex][1] = j;
93  stackIndex++;
94  }
95  ndAssert(stackIndex < ndInt32(sizeof(stack) / (2 * sizeof(stack[0][0]))));
96  }
97  }
98 
99  ndInt32 stride = batchSize + 1;
100  if (elements < stride)
101  {
102  stride = elements;
103  }
104  for (ndInt32 i = 1; i < stride; ++i)
105  {
106  if (comparator.Compare(array[0], array[i], context) > 0)
107  {
108  ndSwap(array[0], array[i]);
109  }
110  }
111 
112  for (ndInt32 i = 1; i < elements; ++i)
113  {
114  ndInt32 j = i;
115  const T tmp(array[i]);
116  for (; comparator.Compare(array[j - 1], tmp, context) > 0; --j)
117  {
118  ndAssert(j > 0);
119  array[j] = array[j - 1];
120  }
121  array[j] = tmp;
122  }
123 
124 //#ifdef _DEBUG
125 #if 0
126  for (ndInt32 i = 0; i < (elements - 1); ++i)
127  {
128  ndAssert(comparator.Compare(array[i], array[i + 1], context) <= 0);
129  }
130 #endif
131 }
132 
133 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
134 void ndCountingSortInPlace(T* const array, T* const scratchBuffer, ndInt32 size, ndUnsigned32* const prefixScanOut, void* const context)
135 {
136  //D_TRACKTIME();
137  ndAssert(keyBitSize > 0);
138  ndUnsigned32 scans[(1 << keyBitSize) + 1];
139  ndEvaluateKey evaluator(context);
140  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
141  {
142  scans[i] = 0;
143  }
144  for (ndInt32 i = 0; i < size; ++i)
145  {
146  const T& entry = array[i];
147  scratchBuffer[i] = entry;
148  const ndInt32 key = evaluator.GetKey(entry);
149  ndAssert(key >= 0);
150  ndAssert(key < (1 << keyBitSize));
151  scans[key] ++;
152  }
153 
154  ndUnsigned32 sum = 0;
155  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
156  {
157  ndUnsigned32 partialSum = scans[i];
158  scans[i] = sum;
159  sum += partialSum;
160  }
161 
162  if (prefixScanOut)
163  {
164  for (ndInt32 i = 0; i < ((1 << keyBitSize) + 1); ++i)
165  {
166  prefixScanOut[i] = scans[i];
167  }
168  }
169 
170  for (ndInt32 i = 0; i < size; ++i)
171  {
172  const T& entry = scratchBuffer[i];
173  const ndInt32 key = evaluator.GetKey(entry);
174  ndAssert(key >= 0);
175  ndAssert(key < (1 << keyBitSize));
176  const ndUnsigned32 index = scans[key];
177  array[index] = entry;
178  scans[key] = index + 1;
179  }
180 
181  //#ifdef _DEBUG
182 #if 0
183  for (ndInt32 i = size - 2; i >= 0; --i)
184  {
185  ndAssert(evaluator.GetKey(scratchBuffer[i]) <= evaluator.GetKey(scratchBuffer[i + 1]));
186  }
187 #endif
188 }
189 
190 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
191 void ndCountingSortInPlace(ndThreadPool& threadPool, T* const array, T* const scratchBuffer, ndInt32 size, ndUnsigned32* const prefixScanOut, void* const context)
192 {
193  D_TRACKTIME();
194  ndEvaluateKey evaluator(context);
195  const ndInt32 threadCount = threadPool.GetThreadCount();
196 
197  ndUnsigned32* const sum = ndAlloca(ndUnsigned32, 1 << keyBitSize);
198  ndUnsigned32* const scans = ndAlloca(ndUnsigned32, threadCount * (1 << keyBitSize));
199 
200  auto ndBuildHistogram = ndMakeObject::ndFunction([&array, &scratchBuffer, size, &evaluator, &scans](ndInt32 threadIndex, ndInt32 threadCount)
201  {
202  D_TRACKTIME_NAMED(ndBuildHistogram);
203  ndUnsigned32* const scan = &scans[threadIndex * (1 << keyBitSize)];
204 
205  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
206  {
207  scan[i] = 0;
208  }
209 
210  ndStartEnd startEnd(size, threadIndex, threadCount);
211  for (ndInt32 i = startEnd.m_start; i < startEnd.m_end; ++i)
212  {
213  const T& entry = array[i];
214  const ndInt32 key = evaluator.GetKey(entry);
215  ndAssert(key >= 0);
216  ndAssert(key < (1 << keyBitSize));
217  scan[key] ++;
218  scratchBuffer[i] = entry;
219  }
220  });
221 
222  auto ndShuffleArray = ndMakeObject::ndFunction([&array, &scratchBuffer, size, &evaluator, &scans, &sum](ndInt32 threadIndex, ndInt32 threadCount)
223  {
224  D_TRACKTIME_NAMED(ndShuffleArray);
225  ndUnsigned32* const scan = &scans[threadIndex * (1 << keyBitSize)];
226 
227  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
228  {
229  scan[i] += sum[i];
230  }
231  ndStartEnd startEnd(size, threadIndex, threadCount);
232  for (ndInt32 i = startEnd.m_start; i < startEnd.m_end; ++i)
233  {
234  const T& entry = scratchBuffer[i];
235  const ndInt32 key = evaluator.GetKey(entry);
236  ndAssert(key >= 0);
237  ndAssert(key < (1 << keyBitSize));
238  const ndUnsigned32 index = scan[key];
239  array[index] = entry;
240  scan[key] = index + 1;
241  }
242  });
243 
244  threadPool.ParallelExecute(ndBuildHistogram);
245 
246  ndInt32 bits = keyBitSize;
247  if (bits < 11)
248  {
249  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
250  {
251  sum[i] = 0;
252  }
253  for (ndInt32 j = 0; j < threadCount; ++j)
254  {
255  ndUnsigned32* const scan = &scans[j * (1 << keyBitSize)];
256  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
257  {
258  ndUnsigned32 partialSum = scan[i];
259  scan[i] = sum[i];
260  sum[i] += partialSum;
261  }
262  }
263 
264  ndUnsigned32 accSum = 0;
265  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
266  {
267  ndUnsigned32 partialSum = sum[i];
268  sum[i] = accSum;
269  accSum += partialSum;
270  }
271  }
272  else
273  {
274  ndAssert(0);
275  }
276 
277  if (prefixScanOut)
278  {
279  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
280  {
281  prefixScanOut[i] = sum[i];
282  }
283  prefixScanOut[1 << keyBitSize] = ndUnsigned32(size);
284  }
285 
286  threadPool.ParallelExecute(ndShuffleArray);
287 
288 //#ifdef _DEBUG
289 #if 0
290  for (ndInt32 i = 1; i < size; ++i)
291  {
292  ndInt32 key0 = evaluator.GetKey(array[i - 1]);
293  ndInt32 key1 = evaluator.GetKey(array[i + 0]);
294  ndAssert(key0 <= key1);
295  }
296 #endif
297 }
298 
299 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
300 void ndCountingSort(const T* const srcArray, T* const dstArray, ndInt32 size, ndUnsigned32* const prefixScanOut, void* const context)
301 {
302  //D_TRACKTIME();
303  ndAssert(keyBitSize > 0);
304  ndUnsigned32 scans[(1 << keyBitSize) + 1];
305  ndEvaluateKey evaluator(context);
306  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
307  {
308  scans[i] = 0;
309  }
310  for (ndInt32 i = 0; i < size; ++i)
311  {
312  const T& entry = srcArray[i];
313  const ndInt32 key = evaluator.GetKey(entry);
314  ndAssert(key >= 0);
315  ndAssert(key < (1 << keyBitSize));
316  scans[key] ++;
317  }
318 
319  ndUnsigned32 sum = 0;
320  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
321  {
322  ndUnsigned32 partialSum = scans[i];
323  scans[i] = sum;
324  sum += partialSum;
325  }
326 
327  if (prefixScanOut)
328  {
329  for (ndInt32 i = 0; i < ((1 << keyBitSize) + 1); ++i)
330  {
331  prefixScanOut[i] = scans[i];
332  }
333  }
334 
335  for (ndInt32 i = 0; i < size; ++i)
336  {
337  const T& entry = srcArray[i];
338  const ndInt32 key = evaluator.GetKey(entry);
339  ndAssert(key >= 0);
340  ndAssert(key < (1 << keyBitSize));
341  const ndUnsigned32 index = scans[key];
342  dstArray[index] = entry;
343  scans[key] = index + 1;
344  }
345 
346 //#ifdef _DEBUG
347 #if 0
348  for (ndInt32 i = size - 2; i >= 0; --i)
349  {
350  ndAssert(evaluator.GetKey(scratchBuffer[i]) <= evaluator.GetKey(scratchBuffer[i + 1]));
351  }
352 #endif
353 }
354 
355 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
356 void ndCountingSort(ndThreadPool& threadPool, const T* const srcArray, T* const dstArray, ndInt32 size, ndUnsigned32* const prefixScanOut, void* const context)
357 {
358  D_TRACKTIME();
359  ndEvaluateKey evaluator(context);
360  const ndInt32 threadCount = threadPool.GetThreadCount();
361 
362  ndUnsigned32* const sum = ndAlloca(ndUnsigned32, 1 << keyBitSize);
363  ndUnsigned32* const scans = ndAlloca(ndUnsigned32, threadCount * (1 << keyBitSize));
364 
365  auto ndBuildHistogram = ndMakeObject::ndFunction([&srcArray, size, &evaluator, &scans](ndInt32 threadIndex, ndInt32 threadCount)
366  {
367  D_TRACKTIME_NAMED(ndBuildHistogram);
368  ndUnsigned32* const scan = &scans[threadIndex * (1 << keyBitSize)];
369 
370  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
371  {
372  scan[i] = 0;
373  }
374 
375  ndStartEnd startEnd(size, threadIndex, threadCount);
376  for (ndInt32 i = startEnd.m_start; i < startEnd.m_end; ++i)
377  {
378  const T& entry = srcArray[i];
379  const ndInt32 key = evaluator.GetKey(entry);
380  ndAssert(key >= 0);
381  ndAssert(key < (1 << keyBitSize));
382  scan[key] ++;
383  }
384  });
385 
386  auto ndShuffleArray = ndMakeObject::ndFunction([&srcArray, &dstArray, size, &evaluator, &scans, &sum](ndInt32 threadIndex, ndInt32 threadCount)
387  {
388  D_TRACKTIME_NAMED(ndShuffleArray);
389  ndUnsigned32* const scan = &scans[threadIndex * (1 << keyBitSize)];
390 
391  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
392  {
393  scan[i] += sum[i];
394  }
395 
396  ndStartEnd startEnd(size, threadIndex, threadCount);
397  for (ndInt32 i = startEnd.m_start; i < startEnd.m_end; ++i)
398  {
399  const T& entry = srcArray[i];
400  const ndInt32 key = evaluator.GetKey(entry);
401  ndAssert(key >= 0);
402  ndAssert(key < (1 << keyBitSize));
403  const ndUnsigned32 index = scan[key];
404  dstArray[index] = entry;
405  scan[key] = index + 1;
406  }
407  });
408 
409  threadPool.ParallelExecute(ndBuildHistogram);
410 
411  ndInt32 bits = keyBitSize;
412  if (bits < 11)
413  {
414  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
415  {
416  sum[i] = 0;
417  }
418  for (ndInt32 j = 0; j < threadCount; ++j)
419  {
420  ndUnsigned32* const scan = &scans[j * (1 << keyBitSize)];
421  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
422  {
423  ndUnsigned32 partialSum = scan[i];
424  scan[i] = sum[i];
425  sum[i] += partialSum;
426  }
427  }
428 
429  ndUnsigned32 accSum = 0;
430  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
431  {
432  ndUnsigned32 partialSum = sum[i];
433  sum[i] = accSum;
434  accSum += partialSum;
435  }
436  }
437  else
438  {
439  ndAssert(0);
440  }
441 
442  if (prefixScanOut)
443  {
444  for (ndInt32 i = 0; i < (1 << keyBitSize); ++i)
445  {
446  prefixScanOut[i] = sum[i];
447  }
448  prefixScanOut[1 << keyBitSize] = ndUnsigned32(size);
449  }
450 
451  threadPool.ParallelExecute(ndShuffleArray);
452 
453 //#ifdef _DEBUG
454 #if 0
455  for (ndInt32 i = 1; i < size; ++i)
456  {
457  ndInt32 key0 = evaluator.GetKey(scratchBuffer[i - 1]);
458  ndInt32 key1 = evaluator.GetKey(scratchBuffer[i + 0]);
459  ndAssert(key0 <= key1);
460  }
461 #endif
462 }
463 
464 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
465 void ndCountingSort(ndThreadPool& threadPool, ndArray<T>& array, ndArray<T>& scratchBuffer, ndUnsigned32* const prefixScanOut, void* const context)
466 {
467  scratchBuffer.SetCount(array.GetCount());
468  ndCountingSort<T, ndEvaluateKey, keyBitSize>(threadPool, &array[0], &scratchBuffer[0], array.GetCount(), prefixScanOut, context);
469  array.Swap(scratchBuffer);
470 }
471 
472 template <class T, class ndEvaluateKey, ndInt32 keyBitSize>
473 void ndCountingSort(ndArray<T>& array, ndArray<T>& scratchBuffer, ndUnsigned32* const prefixScanOut, void* const context)
474 {
475  scratchBuffer.SetCount(array.GetCount());
476  ndCountingSort<T, ndEvaluateKey, keyBitSize>(&array[0], &scratchBuffer[0], array.GetCount(), prefixScanOut, context);
477  array.Swap(scratchBuffer);
478 }
479 
480 #endif
ndArray
Generic template vector.
Definition: ndArray.h:42
ndArray::Swap
void Swap(ndArray &other)
Interchange all the information with other.
Definition: ndArray.h:262
ndStartEnd
Definition: ndThreadPool.h:39
ndArray::GetCount
ndInt32 GetCount() const
return the size of the array.
Definition: ndArray.h:182
ndThreadPool
Definition: ndThreadPool.h:65
ndArray::SetCount
void SetCount(ndInt32 count)
Set a new size.
Definition: ndArray.h:188