File size: 2,179 Bytes
66c9c8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/** Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 */

#include "warp.h"
#include "sort.h"
#include "string.h"

#include <cstdint>

void radix_sort_pairs_host(int* keys, int* values, int n)
{
	static int tables[2][1 << 16];
	memset(tables, 0, sizeof(tables));
		
	int* auxKeys = keys + n;
	int* auxValues = values + n;

	// build histograms
	for (int i=0; i < n; ++i)
	{
		const unsigned short low = keys[i] & 0xffff;
		const unsigned short high = keys[i] >> 16;
		
		++tables[0][low];
		++tables[1][high];
	}
	
	// convert histograms to offset tables in-place
	int offlow = 0;
	int offhigh = 0;
	
	for (int i=0; i < 65536; ++i)
	{
		const int newofflow = offlow + tables[0][i];
		const int newoffhigh = offhigh + tables[1][i];
		
		tables[0][i] = offlow;
		tables[1][i] = offhigh;
		
		offlow = newofflow;
		offhigh = newoffhigh;
	}
		
	// pass 1 - sort by low 16 bits
	for (int i=0; i < n; ++i)
	{
		// lookup offset of input
		const int k = keys[i];
		const int v = values[i];
		const int b = k & 0xffff;
		
		// find offset and increment
		const int offset = tables[0][b]++;
		
		auxKeys[offset] = k;
		auxValues[offset] = v;
	}	
		
	// pass 2 - sort by high 16 bits
	for (int i=0; i < n; ++i)
	{
		// lookup offset of input
		const int k = auxKeys[i];
		const int v = auxValues[i];

		const int b = k >> 16;
		
		const int offset = tables[1][b]++;
		
		keys[offset] = k;
		values[offset] = v;
	}	
}

#if !WP_ENABLE_CUDA

void radix_sort_reserve(void* context, int n, void** mem_out, size_t* size_out) {}

void radix_sort_pairs_int_device(uint64_t keys, uint64_t values, int n) {}

#endif // !WP_ENABLE_CUDA


void radix_sort_pairs_int_host(uint64_t keys, uint64_t values, int n)
{
    radix_sort_pairs_host(
        reinterpret_cast<int *>(keys),
        reinterpret_cast<int *>(values), n);
}