File size: 6,134 Bytes
e737d04 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | #include "virtgpu-utils.h"
#include <malloc.h>
#include <stdlib.h>
#include <cstring>
#define NODE_ALLOC_ALIGN 64
#define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
#define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1)
#define NULL_NODE 0
#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
#define os_free_aligned(_ptr) free(_ptr)
#define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new))
static inline uint64_t util_logbase2_64(uint64_t n) {
#if defined(HAVE___BUILTIN_CLZLL)
return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
#else
uint64_t pos = 0ull;
if (n >= 1ull << 32) {
n >>= 32;
pos += 32;
}
if (n >= 1ull << 16) {
n >>= 16;
pos += 16;
}
if (n >= 1ull << 8) {
n >>= 8;
pos += 8;
}
if (n >= 1ull << 4) {
n >>= 4;
pos += 4;
}
if (n >= 1ull << 2) {
n >>= 2;
pos += 2;
}
if (n >= 1ull << 1) {
pos += 1;
}
return pos;
#endif
}
void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
memset(arr, 0, sizeof(*arr));
arr->elem_size = elem_size;
arr->node_size_log2 = util_logbase2_64(node_size);
assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
}
static inline void * os_malloc_aligned(size_t size, size_t alignment) {
void * ptr;
alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
if (posix_memalign(&ptr, alignment, size) != 0) {
return NULL;
}
return ptr;
}
static inline void * _util_sparse_array_node_data(uintptr_t handle) {
return (void *) (handle & NODE_PTR_MASK);
}
static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
return handle & NODE_LEVEL_MASK;
}
static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
if (_util_sparse_array_node_level(node) > 0) {
uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node);
size_t node_size = 1ull << arr->node_size_log2;
for (size_t i = 0; i < node_size; i++) {
if (children[i]) {
_util_sparse_array_node_finish(arr, children[i]);
}
}
}
os_free_aligned(_util_sparse_array_node_data(node));
}
static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
assert(data != NULL);
assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
assert((level & NODE_PTR_MASK) == 0);
return (uintptr_t) data | level;
}
inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
size_t size;
if (level == 0) {
size = arr->elem_size << arr->node_size_log2;
} else {
size = sizeof(uintptr_t) << arr->node_size_log2;
}
void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
memset(data, 0, size);
return _util_sparse_array_node(data, level);
}
static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
if (prev_node != cmp_node) {
/* We lost the race. Free this one and return the one that was already
* allocated.
*/
os_free_aligned(_util_sparse_array_node_data(node));
return prev_node;
} else {
return node;
}
}
void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
const unsigned node_size_log2 = arr->node_size_log2;
uintptr_t root = p_atomic_read(&arr->root);
if (unlikely(!root)) {
unsigned root_level = 0;
uint64_t idx_iter = idx >> node_size_log2;
while (idx_iter) {
idx_iter >>= node_size_log2;
root_level++;
}
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
}
while (1) {
unsigned root_level = _util_sparse_array_node_level(root);
uint64_t root_idx = idx >> (root_level * node_size_log2);
if (likely(root_idx < (1ull << node_size_log2))) {
break;
}
/* In this case, we have a root but its level is low enough that the
* requested index is out-of-bounds.
*/
uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
new_root_children[0] = root;
/* We only add one at a time instead of the whole tree because it's
* easier to ensure correctness of both the tree building and the
* clean-up path. Because we're only adding one node we never have to
* worry about trying to free multiple things without freeing the old
* things.
*/
root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
}
void * node_data = _util_sparse_array_node_data(root);
unsigned node_level = _util_sparse_array_node_level(root);
while (node_level > 0) {
uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
uintptr_t * children = (uintptr_t *) node_data;
uintptr_t child = p_atomic_read(&children[child_idx]);
if (unlikely(!child)) {
child = _util_sparse_array_node_alloc(arr, node_level - 1);
child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
}
node_data = _util_sparse_array_node_data(child);
node_level = _util_sparse_array_node_level(child);
}
uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
}
|