File size: 5,851 Bytes
66c9c8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#pragma once

#include "builtin.h"

namespace wp
{

struct fabricbucket_t
{
    size_t index_start;
    size_t index_end;
    void* ptr;
    size_t* lengths;
};


template <typename T>
struct fabricarray_t
{
    CUDA_CALLABLE inline fabricarray_t() {}
    CUDA_CALLABLE inline fabricarray_t(int) {} // for backward a = 0 initialization syntax

    CUDA_CALLABLE inline bool empty() const { return !size; }

    fabricbucket_t* buckets;  // array of fabricbucket_t on the correct device

    size_t nbuckets;
    size_t size;
};


template <typename T>
struct indexedfabricarray_t
{
    CUDA_CALLABLE inline indexedfabricarray_t() {}
    CUDA_CALLABLE inline indexedfabricarray_t(int) {} // for backward a = 0 initialization syntax

    CUDA_CALLABLE inline bool empty() const { return !size; }

    fabricarray_t<T> fa;

    // TODO: we use 32-bit indices for consistency with other Warp indexed arrays,
    // but Fabric uses 64-bit indexing.
    int* indices;
    size_t size;
};


#ifndef FABRICARRAY_USE_BINARY_SEARCH
#define FABRICARRAY_USE_BINARY_SEARCH 1
#endif

template <typename T>
CUDA_CALLABLE inline const fabricbucket_t* fabricarray_find_bucket(const fabricarray_t<T>& fa, size_t i)
{
#if FABRICARRAY_USE_BINARY_SEARCH
    // use binary search to find the right bucket
    const fabricbucket_t* bucket = nullptr;
    size_t lo = 0;
    size_t hi = fa.nbuckets - 1;
    while (hi >= lo)
    {
        size_t mid = (lo + hi) >> 1;
        bucket = fa.buckets + mid;
        if (i >= bucket->index_end)
            lo = mid + 1;
        else if (i < bucket->index_start)
            hi = mid - 1;
        else
            return bucket;
    }
    return nullptr;
#else
    // use linear search to find the right bucket
    const fabricbucket_t* bucket = fa.buckets;
    const fabricbucket_t* bucket_end = bucket + fa.nbuckets;
    for (; bucket < bucket_end; ++bucket)
    {
        if (i < bucket->index_end)
            return bucket;
    }
    return nullptr;
#endif
}


// Compute the pointer to a fabricarray element at index i.
// This function is similar to wp::index(), but the array data type doesn't need to be known at compile time.
CUDA_CALLABLE inline void* fabricarray_element_ptr(const fabricarray_t<void>& fa, size_t i, size_t elem_size)
{
    const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);

    size_t index_in_bucket = i - bucket->index_start;

    return (char*)bucket->ptr + index_in_bucket * elem_size;
}


template <typename T>
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i)
{
    const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
    assert(bucket && "Fabric array index out of range");

    size_t index_in_bucket = i - bucket->index_start;

    T& result = *((T*)bucket->ptr + index_in_bucket);

    FP_VERIFY_FWD_1(result)

    return result;
}


// indexing for fabric array of arrays
template <typename T>
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i, size_t j)
{
    const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
    assert(bucket && "Fabric array index out of range");

    assert(bucket->lengths && "Missing inner array lengths");

    size_t index_in_bucket = i - bucket->index_start;

    void* ptr = *((void**)bucket->ptr + index_in_bucket);
    size_t length = *((size_t*)bucket->lengths + index_in_bucket);

    assert(j < length && "Fabric array inner index out of range");

    T& result = *((T*)ptr + j);

    FP_VERIFY_FWD_1(result)

    return result;
}


template <typename T>
CUDA_CALLABLE inline array_t<T> view(fabricarray_t<T>& fa, size_t i)
{
    const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
    assert(bucket && "Fabric array index out of range");

    assert(bucket->lengths && "Missing inner array lengths");

    size_t index_in_bucket = i - bucket->index_start;

    void* ptr = *((void**)bucket->ptr + index_in_bucket);
    size_t length = *((size_t*)bucket->lengths + index_in_bucket);

    return array_t<T>((T*)ptr, int(length));
}


template <typename T>
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i)
{
    // index lookup
    assert(i < ifa.size);
    i = size_t(ifa.indices[i]);

    const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
    assert(bucket && "Fabric array index out of range");

    size_t index_in_bucket = i - bucket->index_start;

    T& result = *((T*)bucket->ptr + index_in_bucket);

    FP_VERIFY_FWD_1(result)

    return result;
}


// indexing for fabric array of arrays
template <typename T>
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i, size_t j)
{
    // index lookup
    assert(i < ifa.size);
    i = size_t(ifa.indices[i]);

    const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
    assert(bucket && "Fabric array index out of range");

    assert(bucket->lengths && "Missing inner array lengths");

    size_t index_in_bucket = i - bucket->index_start;

    void* ptr = *((void**)bucket->ptr + index_in_bucket);
    size_t length = *((size_t*)bucket->lengths + index_in_bucket);

    assert(j < length && "Fabric array inner index out of range");

    T& result = *((T*)ptr + j);

    FP_VERIFY_FWD_1(result)

    return result;
}


template <typename T>
CUDA_CALLABLE inline array_t<T> view(indexedfabricarray_t<T>& ifa, size_t i)
{
    // index lookup
    assert(i < ifa.size);
    i = size_t(ifa.indices[i]);

    const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
    assert(bucket && "Fabric array index out of range");

    assert(bucket->lengths && "Missing inner array lengths");

    size_t index_in_bucket = i - bucket->index_start;

    void* ptr = *((void**)bucket->ptr + index_in_bucket);
    size_t length = *((size_t*)bucket->lengths + index_in_bucket);

    return array_t<T>((T*)ptr, int(length));
}

} // namespace wp