arudradey's picture
download
raw
43.5 kB
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h"
#include <string.h> // memcpy, memset
#include <stdlib.h> // atexit
#define MI_MEMID_INIT(kind) {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ }
#define MI_MEMID_STATIC MI_MEMID_INIT(MI_MEM_STATIC)
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
MI_ATOMIC_VAR_INIT(0), // xthread_id
NULL, // free
0, // used
0, // capacity
0, // reserved capacity
0, // retire_expire
false, // is_zero
NULL, // local_free
MI_ATOMIC_VAR_INIT(0), // xthread_free
0, // block_size
NULL, // page_start
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 }, // keys
#endif
NULL, // theap
NULL, // heap
NULL, NULL, // next, prev
MI_ARENA_SLICE_SIZE, // page_committed
MI_MEMID_STATIC // memid
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#elif (MI_PADDING>0)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#else
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
#endif
// Empty page queues for every bin
#define QNULL(sz) { NULL, NULL, 0, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0}
// Empty statistics
#define MI_STATS_NULL \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0 }, { 0 }, \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0 }, { 0 }, { 0 }, { 0 }, \
{ 0 }, { 0 }, { 0 }, { 0 }, \
\
{ 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, \
MI_INIT6(MI_STAT_COUNT_NULL), \
{ 0 }, { 0 }, { 0 }, { 0 }, { 0 }, \
\
{ MI_INIT4(MI_STAT_COUNT_NULL) }, \
{ { 0 }, { 0 }, { 0 }, { 0 } }, \
\
{ MI_INIT74(MI_STAT_COUNT_NULL) }, \
{ MI_INIT74(MI_STAT_COUNT_NULL) }, \
{ MI_INIT5(MI_STAT_COUNT_NULL) }
// --------------------------------------------------------
// Statically allocate an empty theap as the initial
// thread local value for the default theap,
// and statically allocate the backing theap for the main
// thread so it can function without doing any allocation
// itself (as accessing a thread local for the first time
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
static mi_decl_cache_align mi_subproc_t subproc_main
#if __cplusplus
= { }; // empty initializer to prevent running the constructor (with msvc)
#else
= { 0 }; // C zero initialize
#endif
static mi_subproc_t* subprocs = &subproc_main;
static mi_lock_t subprocs_lock;
static mi_decl_cache_align mi_tld_t tld_empty = {
0, // thread_id
0, // thread_seq
0, // default numa node
&subproc_main, // subproc
NULL, // theaps list
MI_LOCK_INITIALIZER, // theaps lock
false, // recurse
false, // is_in_threadpool
MI_MEMID_STATIC // memid
};
mi_decl_cache_align const mi_theap_t _mi_theap_empty = {
&tld_empty, // tld
MI_ATOMIC_VAR_INIT(NULL), // heap
MI_ATOMIC_VAR_INIT(1), // refcount
0, // heartbeat
0, // cookie
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, // pages_full_size
0, 0, // generic count
NULL, NULL, // tnext, tprev
NULL, NULL, // hnext, hprev
0, // full page retain
false, // allow reclaim
true, // allow abandon
#if MI_GUARDED
0, 0, 0, 1, // sample count is 1 so we never write to it (see `internal.h:mi_theap_malloc_use_guarded`)
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC,
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
};
mi_decl_cache_align const mi_theap_t _mi_theap_empty_wrong = {
&tld_empty, // tld
MI_ATOMIC_VAR_INIT(NULL), // heap
MI_ATOMIC_VAR_INIT(1), // refcount
0, // heartbeat
0, // cookie
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, // pages_full_size
0, 0, // generic count
NULL, NULL, // tnext, tprev
NULL, NULL, // hnext, hprev
0, // full page retain
false, // allow reclaim
true, // allow abandon
#if MI_GUARDED
0, 0, 0, 1, // sample count is 1 so we never write to it (see `internal.h:mi_theap_malloc_use_guarded`)
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC,
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
};
// Heap for the main thread
extern mi_decl_hidden mi_decl_cache_align mi_theap_t theap_main;
extern mi_decl_hidden mi_decl_cache_align mi_heap_t heap_main;
static mi_decl_cache_align mi_tld_t tld_main = {
0, // thread_id
0, // thread_seq
0, // numa node
&subproc_main, // subproc
&theap_main, // theaps list
MI_LOCK_INITIALIZER, // theaps lock
false, // recurse
false, // is_in_threadpool
MI_MEMID_STATIC // memid
};
mi_decl_cache_align mi_theap_t theap_main = {
&tld_main, // thread local data
MI_ATOMIC_VAR_INIT(&heap_main), // main heap
MI_ATOMIC_VAR_INIT(1), // refcount
0, // heartbeat
0, // initial cookie
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, // pages_full_size
0, 0, // generic count
NULL, NULL, // tnext, tprev
NULL, NULL, // hnext, hprev
2, // full page retain
true, // allow page reclaim
true, // allow page abandon
#if MI_GUARDED
0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC,
{ sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL }, // stats
};
mi_decl_cache_align mi_heap_t heap_main
#if __cplusplus
= { }; // empty initializer to prevent running the constructor (with msvc)
#else
= { 0 }; // C zero initialize
#endif
// the theap belonging to the main heap
mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_main = NULL;
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return _mi_prim_thread_id();
}
#if MI_TLS_MODEL_THREAD_LOCAL
// the thread-local main theap for allocation
mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_default = (mi_theap_t*)&_mi_theap_empty;
// the last used non-main theap
mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_cached = (mi_theap_t*)&_mi_theap_empty;
#endif
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { sizeof(mi_stats_t), MI_STAT_VERSION, MI_STATS_NULL };
#if MI_GUARDED
mi_decl_export void mi_theap_guarded_set_sample_rate(mi_theap_t* theap, size_t sample_rate, size_t seed) {
theap->guarded_sample_rate = sample_rate;
theap->guarded_sample_count = sample_rate; // count down samples
if (theap->guarded_sample_rate > 1) {
if (seed == 0) {
seed = _mi_theap_random_next(theap);
}
theap->guarded_sample_count = (seed % theap->guarded_sample_rate) + 1; // start at random count between 1 and `sample_rate`
}
}
mi_decl_export void mi_theap_guarded_set_size_bound(mi_theap_t* theap, size_t min, size_t max) {
theap->guarded_size_min = min;
theap->guarded_size_max = (min > max ? min : max);
}
void _mi_theap_guarded_init(mi_theap_t* theap) {
mi_theap_guarded_set_sample_rate(theap,
(size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX),
(size_t)mi_option_get(mi_option_guarded_sample_seed));
mi_theap_guarded_set_size_bound(theap,
(size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX),
(size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );
}
#else
mi_decl_export void mi_theap_guarded_set_sample_rate(mi_theap_t* theap, size_t sample_rate, size_t seed) {
MI_UNUSED(theap); MI_UNUSED(sample_rate); MI_UNUSED(seed);
}
mi_decl_export void mi_theap_guarded_set_size_bound(mi_theap_t* theap, size_t min, size_t max) {
MI_UNUSED(theap); MI_UNUSED(min); MI_UNUSED(max);
}
void _mi_theap_guarded_init(mi_theap_t* theap) {
MI_UNUSED(theap);
}
#endif
/* -----------------------------------------------------------
Initialization
Note: on some platforms lock_init or just a thread local access
can cause allocation and induce recursion during initialization.
----------------------------------------------------------- */
// Initialize main subproc
static void mi_subproc_main_init(void) {
if (subproc_main.memid.memkind != MI_MEM_STATIC) {
subproc_main.memid = _mi_memid_create(MI_MEM_STATIC);
subproc_main.heaps = &heap_main;
subproc_main.heap_total_count = 1;
subproc_main.heap_count = 1;
mi_atomic_store_ptr_release(mi_heap_t, &subproc_main.heap_main, &heap_main);
__mi_stat_increase_mt(&subproc_main.stats.heaps, 1);
mi_lock_init(&subproc_main.arena_reserve_lock);
mi_lock_init(&subproc_main.heaps_lock);
mi_lock_init(&subprocs_lock);
mi_lock_init(&tld_empty.theaps_lock);
}
}
// Initialize main tld
static void mi_tld_main_init(void) {
if (tld_main.thread_id == 0) {
tld_main.thread_id = _mi_prim_thread_id();
mi_lock_init(&tld_main.theaps_lock);
}
}
void _mi_theap_options_init(mi_theap_t* theap) {
theap->allow_page_reclaim = (mi_option_get(mi_option_page_reclaim_on_free) >= 0);
theap->allow_page_abandon = (mi_option_get(mi_option_page_full_retain) >= 0);
theap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
}
// Initialization of the (statically allocated) main theap, and the main tld and subproc.
static void mi_theap_main_init(void) {
if mi_unlikely(theap_main.memid.memkind != MI_MEM_STATIC) {
// theap
theap_main.memid = _mi_memid_create(MI_MEM_STATIC);
#if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&theap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking (issue #1185)
#else
_mi_random_init(&theap_main.random);
#endif
theap_main.cookie = _mi_theap_random_next(&theap_main);
_mi_theap_options_init(&theap_main);
_mi_theap_guarded_init(&theap_main);
}
}
// Initialize main heap
static void mi_heap_main_init(void) {
if mi_unlikely(heap_main.subproc == NULL) {
heap_main.subproc = &subproc_main;
heap_main.theaps = &theap_main;
mi_theap_main_init();
mi_subproc_main_init();
mi_tld_main_init();
mi_lock_init(&heap_main.theaps_lock);
mi_lock_init(&heap_main.os_abandoned_pages_lock);
mi_lock_init(&heap_main.arena_pages_lock);
}
}
/* -----------------------------------------------------------
Thread local data
----------------------------------------------------------- */
// Allocate fresh tld
static mi_tld_t* mi_tld_alloc(void) {
if (_mi_is_main_thread()) {
mi_atomic_increment_relaxed(&tld_main.subproc->thread_count);
return &tld_main;
}
else {
// allocate tld meta-data
// note: we need to be careful to not access the tld from `_mi_meta_zalloc`
// (and in turn from `_mi_arena_alloc_aligned` and `_mi_os_alloc_aligned`).
mi_memid_t memid;
mi_tld_t* tld = (mi_tld_t*)_mi_meta_zalloc(sizeof(mi_tld_t), &memid);
if (tld==NULL) {
_mi_error_message(ENOMEM, "unable to allocate memory for thread local data\n");
return NULL;
}
tld->memid = memid;
tld->theaps = NULL;
mi_lock_init(&tld->theaps_lock);
tld->subproc = &subproc_main;
tld->numa_node = _mi_os_numa_node();
tld->thread_id = _mi_prim_thread_id();
tld->thread_seq = mi_atomic_increment_relaxed(&tld->subproc->thread_total_count);
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
mi_atomic_increment_relaxed(&tld->subproc->thread_count);
return tld;
}
}
#define MI_TLD_INVALID ((mi_tld_t*)1)
mi_decl_noinline static void mi_tld_free(mi_tld_t* tld) {
mi_lock_done(&tld->theaps_lock);
if (tld != NULL && tld != MI_TLD_INVALID) {
mi_atomic_decrement_relaxed(&tld->subproc->thread_count);
_mi_meta_free(tld, sizeof(mi_tld_t), tld->memid);
}
#if 0
// do not read/write to `thread_tld` on older macOS <= 14 as that will re-initialize the thread local storage
// (since we are calling this during pthread shutdown)
// (and this could happen on other systems as well, so let's never do it)
thread_tld = MI_TLD_INVALID;
#endif
}
// return the thread local heap ensuring it is initialized (and not `NULL` or `&_mi_theap_empty`);
mi_theap_t* _mi_theap_default_safe(void) {
mi_theap_t* theap = _mi_theap_default();
if mi_likely(mi_theap_is_initialized(theap)) return theap;
mi_thread_init();
mi_assert_internal(mi_theap_is_initialized(_mi_theap_default()));
return _mi_theap_default();
}
// return the main theap ensuring it is initialized.
mi_theap_t* _mi_theap_main_safe(void) {
mi_theap_t* theap = __mi_theap_main;
if mi_unlikely(theap==NULL) { // if thread_init or default_set was never called
mi_thread_init(); // sets the default slot to the main theap
theap = _mi_theap_default();
mi_assert_internal(theap!=NULL);
mi_assert_internal(_mi_is_theap_main(theap));
if (_mi_is_theap_main(theap)) {
__mi_theap_main = theap;
}
}
mi_assert_internal(theap!=NULL && _mi_is_theap_main(theap));
return theap;
}
mi_subproc_t* _mi_subproc_main(void) {
return &subproc_main;
}
mi_subproc_t* _mi_subproc(void) {
// should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
// todo: this will still fail on OS systems where the first access to a thread-local causes allocation.
// on such systems we can check for this with the _mi_prim_get_default_theap as those are protected (by being
// stored in a TLS slot for example)
mi_theap_t* theap = _mi_theap_default();
if (theap == NULL) {
return _mi_subproc_main();
}
else {
return theap->tld->subproc; // avoid using thread local storage (`thread_tld`)
}
}
mi_heap_t* _mi_subproc_heap_main(mi_subproc_t* subproc) {
mi_heap_t* heap = mi_atomic_load_ptr_relaxed(mi_heap_t,&subproc->heap_main);
if mi_likely(heap!=NULL) {
return heap;
}
else {
mi_heap_main_init();
mi_assert_internal(mi_atomic_load_relaxed(&subproc->heap_main) != NULL);
return mi_atomic_load_ptr_relaxed(mi_heap_t,&subproc->heap_main);
}
}
mi_heap_t* mi_heap_main(void) {
return _mi_subproc_heap_main(_mi_subproc()); // don't use mi_theap_main_init_get() so this call works during process_init
}
bool _mi_is_heap_main(const mi_heap_t* heap) {
mi_assert_internal(heap!=NULL);
return (_mi_subproc_heap_main(heap->subproc) == heap);
}
bool _mi_is_theap_main(const mi_theap_t* theap) {
return (mi_theap_is_initialized(theap) && _mi_is_heap_main(_mi_theap_heap(theap)));
}
/* -----------------------------------------------------------
Sub process
----------------------------------------------------------- */
mi_subproc_id_t mi_subproc_main(void) {
return _mi_subproc_main();
}
mi_subproc_id_t mi_subproc_current(void) {
return _mi_subproc();
}
mi_subproc_id_t mi_subproc_new(void) {
static _Atomic(size_t) subproc_total_count;
mi_memid_t memid;
mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
subproc->subproc_seq = mi_atomic_increment_relaxed(&subproc_total_count) + 1;
mi_lock_init(&subproc->arena_reserve_lock);
mi_lock_init(&subproc->heaps_lock);
mi_lock(&subprocs_lock) {
// push on subproc list
subproc->next = subprocs;
if (subprocs!=NULL) { subprocs->prev = subproc; }
subprocs = subproc;
}
return subproc;
}
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id);
}
// destroy all subproc resources including arena's, heap's etc.
static void mi_subproc_unsafe_destroy(mi_subproc_t* subproc, bool acquire_subprocs_lock)
{
// remove from the subproc list
mi_lock_maybe(&subprocs_lock, acquire_subprocs_lock) {
if (subproc->next!=NULL) { subproc->next->prev = subproc->prev; }
if (subproc->prev!=NULL) { subproc->prev->next = subproc->next; }
else { mi_assert_internal(subprocs==subproc); subprocs = subproc->next; }
}
// destroy all subproc heaps
mi_lock(&subproc->heaps_lock) {
mi_heap_t* heap = subproc->heaps;
while (heap != NULL) {
mi_heap_t* next = heap->next;
if (heap!=subproc->heap_main) { mi_heap_destroy(heap); }
heap = next;
}
mi_assert_internal(subproc->heaps == subproc->heap_main);
_mi_heap_force_destroy(subproc->heap_main); // no warning if destroying the main heap
}
// remove associated arenas
_mi_arenas_unsafe_destroy_all(subproc);
// merge stats back into the main subproc?
if (subproc!=&subproc_main) {
_mi_stats_merge_into(&subproc_main.stats, &subproc->stats);
}
// safe to release
// todo: should we refcount subprocesses?
mi_lock_done(&subproc->arena_reserve_lock);
mi_lock_done(&subproc->heaps_lock);
if (subproc!=&subproc_main) {
_mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid);
}
else {
// for the main subproc, also release the global page map
_mi_page_map_unsafe_destroy(&subproc_main);
}
}
void mi_subproc_destroy(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_unsafe_destroy(_mi_subproc_from_id(subproc_id), true /* take lock */);
}
static void mi_subprocs_unsafe_destroy_all(void) {
mi_lock(&subprocs_lock) {
mi_subproc_t* subproc = subprocs;
while (subproc!=NULL) {
mi_subproc_t* next = subproc->next;
if (subproc!=&subproc_main) {
mi_subproc_unsafe_destroy(subproc, false /* take subprocs lock */);
}
subproc = next;
}
}
mi_subproc_unsafe_destroy(&subproc_main, true /* take subprocs lock */);
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
mi_tld_t* const tld = _mi_theap_default_safe()->tld;
mi_assert(tld->subproc== &subproc_main);
if (tld->subproc != &subproc_main) {
_mi_warning_message("unable to add thread to the subprocess as it was already in another subprocess (id: %p)\n", subproc);
return;
}
tld->subproc = subproc;
tld->thread_seq = mi_atomic_increment_relaxed(&subproc->thread_total_count);
mi_atomic_decrement_relaxed(&subproc_main.thread_count);
mi_atomic_increment_relaxed(&subproc->thread_count);
}
bool mi_subproc_visit_heaps(mi_subproc_id_t subproc_id, mi_heap_visit_fun* visitor, void* arg) {
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
if (subproc==NULL) return false;
bool ok = true;
mi_lock(&subproc->heaps_lock) {
for (mi_heap_t* heap = subproc->heaps; heap!=NULL && ok; heap = heap->next) {
ok = (*visitor)(heap, arg);
}
}
return ok;
}
/* -----------------------------------------------------------
Allocate theap data
----------------------------------------------------------- */
// Initialize the thread local default theap, called from `mi_thread_init`
static mi_theap_t* _mi_thread_init_theap_default(void) {
mi_theap_t* theap = _mi_theap_default();
if (mi_theap_is_initialized(theap)) return theap;
if (_mi_is_main_thread()) {
mi_heap_main_init();
theap = &theap_main;
}
else {
// allocates tld data
// note: we cannot access thread-locals yet as that can cause (recursive) allocation
// (on macOS <= 14 for example where the loader allocates thread-local data on demand).
mi_tld_t* tld = mi_tld_alloc();
// allocate and initialize the theap for the main heap
theap = _mi_theap_create(mi_heap_main(), tld);
}
// associate the theap with this thread
// (this is safe, on macOS for example, the theap is set in a dedicated TLS slot and thus does not cause recursive allocation)
_mi_theap_default_set(theap);
return theap;
}
// Free the thread local theaps
static void mi_thread_theaps_done(mi_tld_t* tld)
{
// reset the thread local theaps
_mi_theap_default_set((mi_theap_t*)&_mi_theap_empty);
_mi_theap_cached_set((mi_theap_t*)&_mi_theap_empty);
__mi_theap_main = NULL;
// abandon the pages of all theaps in this thread
mi_lock(&tld->theaps_lock) {
mi_theap_t* theap = tld->theaps;
while (theap != NULL) {
mi_theap_t* next = theap->tnext;
// never destroy theaps; if a dll is linked statically with mimalloc,
// there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_theap_collect_abandon(theap);
mi_assert_internal(theap->page_count==0);
theap = next;
}
}
// free the theaps of this thread.
// This can run concurrently with a `mi_heap_free_theaps` and we need to ensure we free theaps atomically.
// We do this in a loop where we release the theaps_lock at every potential re-iteration to unblock
// potential concurrent `mi_heap_free_theaps` which tries to remove the theap from our theaps list.
bool all_freed;
do {
all_freed = true;
mi_lock(&tld->theaps_lock) {
mi_theap_t* theap = tld->theaps;
while (theap != NULL) {
mi_theap_t* next = theap->tnext;
mi_assert_internal(theap->page_count==0);
if (!_mi_theap_free(theap, true /* acquire heap->theaps_lock */, false /* dont re-acquire the tld->theaps_lock*/ )) {
all_freed = false;
}
theap = next;
}
}
if (!all_freed) {
mi_subproc_stat_counter_increase(tld->subproc,heaps_delete_wait,1);
_mi_prim_thread_yield();
}
else {
mi_assert_internal(tld->theaps==NULL);
}
} while (!all_freed);
mi_assert(_mi_theap_default()==(mi_theap_t*)&_mi_theap_empty); // careful to not re-initialize the default theap during theap_delete
mi_assert(!mi_theap_is_initialized(_mi_theap_default()));
}
// --------------------------------------------------------
// Try to run `mi_thread_done()` automatically so any memory
// owned by the thread but not yet released can be abandoned
// and re-owned by another thread.
//
// 1. windows dynamic library:
// call from DllMain on DLL_THREAD_DETACH
// 2. windows static library:
// use special linker section to call a destructor when the thread is done
// 3. unix, pthreads:
// use a pthread key to call a destructor when a pthread is done
//
// In the last two cases we also need to call `mi_process_init`
// to set up the thread local keys.
// --------------------------------------------------------
// Set up handlers so `mi_thread_done` is called automatically
static void mi_process_setup_auto_thread_done(void) {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return;
tls_initialized = true;
_mi_prim_thread_init_auto_done();
_mi_theap_default_set(&theap_main);
}
bool _mi_is_main_thread(void) {
return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id());
}
// Initialize thread
void mi_thread_init(void) mi_attr_noexcept
{
// ensure our process has started already
mi_process_init();
// if the theap_default is already set we have already initialized
if (_mi_thread_is_initialized()) return;
// initialize the default theap
_mi_thread_init_theap_default();
mi_heap_stat_increase(mi_heap_main(), threads, 1);
// _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
void mi_thread_done(void) mi_attr_noexcept {
_mi_thread_done(NULL);
}
void _mi_thread_done(mi_theap_t* _theap_main)
{
// NULL can be passed on some platforms
if (_theap_main==NULL) {
_theap_main = __mi_theap_main; // don't call `mi_theap_main_safe` as that re-initializes the thread
if (_theap_main==NULL) { // can happen if `mi_theap_main_safe` is never called; but then the default is main
_theap_main = _mi_theap_default();
mi_assert_internal(_theap_main==NULL || _mi_is_theap_main(_theap_main));
}
}
// prevent re-entrancy through theap_done/theap_set_default_direct (issue #699)
if (!mi_theap_is_initialized(_theap_main)) {
return;
}
// release dynamic thread_local's
_mi_thread_locals_thread_done();
// note: we store the tld as we should avoid reading `thread_tld` at this point (to avoid reinitializing the thread local storage)
mi_tld_t* const tld = _theap_main->tld;
// adjust stats
mi_heap_stat_decrease(_mi_subproc_heap_main(tld->subproc), threads, 1); // todo: or `_theap_main->heap`?
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local theaps...
if (tld->thread_id != _mi_prim_thread_id()) return;
// delete the thread local theaps
mi_thread_theaps_done(tld);
// free thread local data
mi_tld_free(tld);
}
mi_decl_cold mi_decl_noinline mi_theap_t* _mi_theap_empty_get(void) {
return (mi_theap_t*)&_mi_theap_empty;
}
#if MI_TLS_MODEL_DYNAMIC_WIN32
// If we can, we use one of the 64 direct TLS slots (but fall back to expansion slots if needed)
// See <https://en.wikipedia.org/wiki/Win32_Thread_Information_Block> for the offsets.
#if MI_SIZE_SIZE==4
#define MI_TLS_DIRECT_FIRST (0x0E10 / MI_SIZE_SIZE)
#else
#define MI_TLS_DIRECT_FIRST (0x1480 / MI_SIZE_SIZE)
#endif
#define MI_TLS_DIRECT_SLOTS (64)
#define MI_TLS_EXPANSION_SLOTS (1024)
#if !MI_WIN_DIRECT_TLS
#define MI_TLS_INITIAL_SLOT MI_TLS_EXPANSION_SLOT
#define MI_TLS_INITIAL_EXPANSION_SLOT (MI_TLS_EXPANSION_SLOTS-1)
#else
// with only direct entries, use the "arbitrary user data" field
// and assume it is NULL (see also <http://www.nynaeve.net/?p=98>)
#define MI_TLS_INITIAL_SLOT (5)
#define MI_TLS_INITIAL_EXPANSION_SLOT (0)
#endif
// we initially use the last of the expansion slots as the default NULL.
// note: this will fail if the program allocates exactly 1024+64 slots with TlsAlloc (which is quite unlikely)
mi_decl_hidden mi_decl_cache_align size_t _mi_theap_default_slot = MI_TLS_INITIAL_SLOT;
mi_decl_hidden size_t _mi_theap_default_expansion_slot = MI_TLS_INITIAL_EXPANSION_SLOT;
mi_decl_hidden size_t _mi_theap_cached_slot = MI_TLS_INITIAL_SLOT;
mi_decl_hidden size_t _mi_theap_cached_expansion_slot = MI_TLS_INITIAL_EXPANSION_SLOT;
static DWORD mi_tls_raw_index_default = TLS_OUT_OF_INDEXES;
static DWORD mi_tls_raw_index_cached = TLS_OUT_OF_INDEXES;
static bool mi_win_tls_slot_alloc(size_t* slot, size_t* extended, DWORD* raw_index) {
const DWORD index = TlsAlloc();
*raw_index = index;
if (index==TLS_OUT_OF_INDEXES) {
*extended = 0;
*slot = 0;
return false;
}
else if (index<MI_TLS_DIRECT_SLOTS) {
*extended = 0;
*slot = index + MI_TLS_DIRECT_FIRST;
return true;
}
#if !MI_WIN_DIRECT_TLS
else if (index < MI_TLS_DIRECT_SLOTS + MI_TLS_EXPANSION_SLOTS - 1) { // check maximum number of expansion slots - 1 (as we use the last one as the default)
*extended = index - MI_TLS_DIRECT_SLOTS;
*slot = MI_TLS_EXPANSION_SLOT;
return true;
}
#endif
else {
// to high an index for us
_mi_error_message(EFAULT, "returned tls index was too high (%u)\n", index);
TlsFree(index);
*raw_index = TLS_OUT_OF_INDEXES;
*extended = 0;
*slot = 0;
return false;
}
}
static void mi_win_tls_slot_free(DWORD* raw_index) {
if (*raw_index != TLS_OUT_OF_INDEXES) {
TlsFree(*raw_index);
*raw_index = TLS_OUT_OF_INDEXES;
}
}
static void mi_tls_slots_init(void) {
mi_atomic_do_once {
bool ok = mi_win_tls_slot_alloc(&_mi_theap_default_slot, &_mi_theap_default_expansion_slot, &mi_tls_raw_index_default);
if (ok) {
ok = mi_win_tls_slot_alloc(&_mi_theap_cached_slot, &_mi_theap_cached_expansion_slot, &mi_tls_raw_index_cached);
}
if (!ok) {
_mi_error_message(EFAULT, "unable to allocate fast TLS user slot (0x%zx)\n", _mi_theap_cached_slot);
}
}
}
static void mi_tls_slots_done(void) {
mi_win_tls_slot_free(&mi_tls_raw_index_default);
mi_win_tls_slot_free(&mi_tls_raw_index_cached );
}
static void mi_win_tls_slot_set(size_t slot, size_t extended_slot, void* value) {
mi_assert_internal((slot >= MI_TLS_DIRECT_FIRST && slot < MI_TLS_DIRECT_FIRST + MI_TLS_DIRECT_SLOTS) || slot == MI_TLS_EXPANSION_SLOT);
if (slot < MI_TLS_DIRECT_FIRST + MI_TLS_DIRECT_SLOTS) {
mi_prim_tls_slot_set(slot, value);
}
else {
mi_assert_internal(extended_slot < MI_TLS_EXPANSION_SLOTS);
TlsSetValue((DWORD)(extended_slot + MI_TLS_DIRECT_SLOTS), value); // use TlsSetValue to initialize the TlsExpansion array if needed
}
}
#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
// only for pthreads for now
mi_decl_hidden pthread_key_t _mi_theap_default_key = 0;
mi_decl_hidden pthread_key_t _mi_theap_cached_key = 0;
// create a non-zero pthread key
static int mi_pthread_key_create( pthread_key_t* pkey ) {
pthread_key_t key;
int err = pthread_key_create(&key, NULL);
if (err!=0) return err;
if (key==0) {
// if we get a zero key, create another one as we use 0 for an invalid key
pthread_key_t key2;
err = pthread_key_create(&key2, NULL);
pthread_key_delete(key); // delete the old key
if (err!=0) return err;
key = key2;
}
mi_assert_internal(key!=0);
*pkey = key;
return 0;
}
static void mi_tls_slots_init(void) {
mi_atomic_do_once {
int err = mi_pthread_key_create(&_mi_theap_default_key);
if (err==0) {
err = mi_pthread_key_create(&_mi_theap_cached_key);
}
if (err!=0) {
_mi_error_message(EFAULT, "unable to allocate pthread keys (error %d)\n", err);
}
}
}
static void mi_tls_slots_done(void) {
if (_mi_theap_default_key != 0) {
pthread_key_delete(_mi_theap_default_key);
_mi_theap_default_key = 0;
}
if (_mi_theap_cached_key != 0) {
pthread_key_delete(_mi_theap_cached_key);
_mi_theap_cached_key = 0;
}
}
#else
static void mi_tls_slots_init(void) {
// nothing
}
static void mi_tls_slots_done(void) {
// nothing
}
#endif
void _mi_theap_cached_set(mi_theap_t* theap) {
mi_theap_t* prev = _mi_theap_cached();
if (prev==theap) return;
// set
mi_tls_slots_init();
#if MI_TLS_MODEL_THREAD_LOCAL
__mi_theap_cached = theap;
#elif MI_TLS_MODEL_FIXED_SLOT
mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_CACHED, theap);
#elif MI_TLS_MODEL_DYNAMIC_WIN32
mi_win_tls_slot_set(_mi_theap_cached_slot, _mi_theap_cached_expansion_slot, theap);
#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
if (_mi_theap_cached_key!=0) pthread_setspecific(_mi_theap_cached_key, theap);
#endif
// update refcounts (so cached theap memory keeps available until no longer cached)
_mi_theap_incref(theap);
_mi_theap_decref(prev);
}
void _mi_theap_default_set(mi_theap_t* theap) {
mi_theap_t* const theap_old = _mi_theap_default();
mi_assert_internal(theap != NULL);
mi_assert_internal(theap->tld->thread_id==0 || theap->tld->thread_id==_mi_thread_id());
mi_tls_slots_init();
#if MI_TLS_MODEL_THREAD_LOCAL
__mi_theap_default = theap;
#elif MI_TLS_MODEL_FIXED_SLOT
mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_DEFAULT, theap);
#elif MI_TLS_MODEL_DYNAMIC_WIN32
mi_win_tls_slot_set(_mi_theap_default_slot, _mi_theap_default_expansion_slot, theap);
#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
if (_mi_theap_default_key!=0) pthread_setspecific(_mi_theap_default_key, theap);
#endif
// set theap main if needed
if (mi_theap_is_initialized(theap)) {
// ensure the default theap is passed to `_mi_thread_done` as on some platforms we cannot access TLS at thread termination (as it would allocate again)
_mi_prim_thread_associate_default_theap(theap);
if (_mi_is_heap_main(_mi_theap_heap(theap))) {
__mi_theap_main = theap;
}
}
// ensure either the default slot contains the main theap, or __mi_theap_main is initialized
if (mi_theap_is_initialized(theap_old) && _mi_is_heap_main(_mi_theap_heap(theap_old))) {
__mi_theap_main = theap_old;
}
}
void mi_thread_set_in_threadpool(void) mi_attr_noexcept {
mi_theap_t* theap = _mi_theap_default_safe();
theap->tld->is_in_threadpool = true;
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
// --------------------------------------------------------
static bool os_preloading = true; // true until this module is initialized
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
bool mi_decl_noinline _mi_preloading(void) {
return os_preloading;
}
// Returns true if mimalloc was redirected
mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
return _mi_is_redirected();
}
// Called once by the process loader from `src/prim/prim.c`
void _mi_auto_process_init(void) {
// mi_heap_main_init();
// #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
// volatile mi_theap_t* dummy = __mi_theap_default; // access TLS to allocate it before setting tls_initialized to true;
// if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697)
// #endif
os_preloading = false;
mi_assert_internal(_mi_is_main_thread());
mi_process_init();
mi_process_setup_auto_thread_done();
_mi_thread_locals_init();
_mi_options_post_init(); // now we can print to stderr
if (_mi_is_redirected()) _mi_verbose_message("malloc is redirected.\n");
// show message from the redirector (if present)
const char* msg = NULL;
_mi_allocator_init(&msg);
if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
_mi_fputs(NULL,NULL,NULL,msg);
}
// reseed random
_mi_random_reinit_if_weak(&theap_main.random);
}
// CPU features
mi_decl_cache_align size_t _mi_cpu_movsb_max = 0; // for size <= max, rep movsb is fast
mi_decl_cache_align size_t _mi_cpu_stosb_max = 0; // for size <= max, rep stosb is fast
mi_decl_cache_align bool _mi_cpu_has_popcnt = false;
#if (MI_ARCH_X64 || MI_ARCH_X86)
#if defined(__GNUC__)
// #include <cpuid.h>
static bool mi_cpuid(uint32_t* regs4, uint32_t level, uint32_t sublevel) {
// note: use explicit assembly instead of __get_cpuid as we need the sublevel (in ecx)
// (on Ubuntu 22 with WSL the __get_cpuid does not clear ecx for level 7 which is incorrect).
uint32_t eax, ebx, ecx, edx;
__asm __volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(level), "c"(sublevel) : );
regs4[0] = eax;
regs4[1] = ebx;
regs4[2] = ecx;
regs4[3] = edx;
return true;
}
#elif defined(_MSC_VER)
static bool mi_cpuid(uint32_t* regs4, uint32_t level, uint32_t sublevel) {
__cpuidex((int32_t*)regs4, (int32_t)level, (int32_t)sublevel);
return true;
}
#else
static bool mi_cpuid(uint32_t* regs4, uint32_t level, uint32_t sublevel) {
MI_UNUSED(regs4); MI_UNUSED(level); MI_UNUSED(sublevel);
return false;
}
#endif
static void mi_detect_cpu_features(void) {
// FSRM for fast short rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
// EMRS for fast enhanced rep movsb/stosb support (not used at the moment, memcpy always seems faster?)
// FSRS for fast short rep stosb
bool amd = false;
bool fsrm = false;
// bool erms = false;
bool fsrs = false;
uint32_t cpu_info[4];
if (mi_cpuid(cpu_info, 0, 0)) {
amd = (cpu_info[2]==0x444d4163); // (Auth enti cAMD)
}
if (mi_cpuid(cpu_info, 7, 0)) {
fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
// erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
}
if (mi_cpuid(cpu_info, 7, 1)) {
fsrs = ((cpu_info[1] & (1 << 11)) != 0); // bit 11 of EBX: see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=1:_Extended_Features>
}
if (mi_cpuid(cpu_info, 1, 0)) {
_mi_cpu_has_popcnt = ((cpu_info[2] & (1 << 23)) != 0); // bit 23 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits>
}
if (fsrm) {
_mi_cpu_movsb_max = 127;
}
if (fsrs || (amd && fsrm)) { // fsrm on amd implies fsrs, see: https://marc.info/?l=git-commits-head&m=168186277717803
_mi_cpu_stosb_max = 127;
}
}
#else
static void mi_detect_cpu_features(void) {
#if MI_ARCH_ARM64
_mi_cpu_has_popcnt = true;
#endif
}
#endif
// Initialize the process; called by thread_init or the process loader
static void mi_process_init_once(void) mi_attr_noexcept {
_mi_process_is_initialized = true;
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
mi_detect_cpu_features();
_mi_options_init();
_mi_stats_init();
_mi_os_init();
// the following can potentially allocate (on freeBSD for pthread keys)
// todo: do 2-phase so we can use stats at first, then later init the keys?
mi_heap_main_init(); // before page_map_init so stats are working
_mi_page_map_init(); // todo: this could fail.. should we abort in that case?
mi_thread_init();
_mi_process_is_initialized = true;
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)
// On windows, when building as a static lib the FLS cleanup happens to early for the main thread.
// To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
// will not call _mi_thread_done on the (still executing) main thread. See issue #508.
_mi_prim_thread_associate_default_theap(NULL);
#endif
// mi_stats_reset(); // only call stat reset *after* thread init (or the theap tld == NULL)
mi_track_init();
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024);
int reserve_at = (int)mi_option_get_clamp(mi_option_reserve_huge_os_pages_at, -1, INT_MAX);
if (reserve_at != -1) {
mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
} else {
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
}
if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
long ksize = mi_option_get(mi_option_reserve_os_memory);
if (ksize > 0) {
mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true);
}
}
}
// Initialize the process; called by thread_init or the process loader
void mi_process_init(void) mi_attr_noexcept {
// #if _MSC_VER < 1920
// mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main
// #endif
mi_atomic_do_once {
mi_process_init_once();
}
}
// Called when the process is done (cdecl as it is used with `at_exit` on some platforms)
void mi_cdecl mi_process_done(void) mi_attr_noexcept {
// only shutdown if we were initialized
if (!_mi_process_is_initialized) return;
// ensure we are called once
static bool process_done = false;
if (process_done) return;
process_done = true;
// free dynamic thread locals (if used at all)
_mi_thread_locals_done();
// release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
_mi_prim_thread_done_auto_done();
#ifndef MI_SKIP_COLLECT_ON_EXIT
#if (MI_DEBUG || !defined(MI_SHARED_LIB))
// free all memory if possible on process exit. This is not needed for a stand-alone process
// but should be done if mimalloc is statically linked into another shared library which
// is repeatedly loaded/unloaded, see issue #281.
mi_theap_collect(_mi_theap_default(), true /* force */);
#endif
#endif
// done with tracking tools
mi_track_done()
// Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
// since after process_done there might still be other code running that calls `free` (like at_exit routines,
// or C-runtime termination code.
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_subprocs_unsafe_destroy_all(); // destroys all subprocs, arenas, and the page_map!
}
else {
mi_heap_stats_merge_to_subproc(mi_heap_main());
}
// careful now to no longer access any allocator functionality
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_subproc_stats_print_out(NULL, NULL, NULL);
}
mi_lock_done(&subprocs_lock);
mi_tls_slots_done();
_mi_allocator_done();
_mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id);
os_preloading = true; // don't call the C runtime anymore
}
void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept {
if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return;
mi_process_done();
}

Xet Storage Details

Size:
43.5 kB
·
Xet hash:
c8e22cdbef5148b4ef9345b5a47217200c50af5a2cdbc6137bd278e7556771ed

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.