/* * safetensors_reader.h — SafeTensors Binary Format Reader * * ╔═══════════════════════════════════════════════════════════════╗ * ║ HExState SafeTensors Input Module ║ * ║ Parses HuggingFace SafeTensors files in pure C ║ * ║ Supports mmap for zero-copy tensor access ║ * ╚═══════════════════════════════════════════════════════════════╝ * * SafeTensors file layout: * [8 bytes: header_size (uint64_t LE)] * [header_size bytes: JSON metadata] * [rest of file: raw tensor data] * * JSON header maps tensor names → {dtype, shape, data_offsets} * Offsets are relative to the start of the data section. */ #ifndef SAFETENSORS_READER_H #define SAFETENSORS_READER_H #include #include #include #include #include #include #include #include /* ═══════════════════════════════════════════════════════════════════════ * CONSTANTS * ═══════════════════════════════════════════════════════════════════════ */ #define ST_MAX_TENSORS 4096 #define ST_MAX_NAME_LEN 256 #define ST_MAX_DIMS 8 #define ST_MAX_HEADER_SIZE (100 * 1024 * 1024) /* 100 MB safety limit */ /* ═══════════════════════════════════════════════════════════════════════ * TENSOR DTYPE * ═══════════════════════════════════════════════════════════════════════ */ typedef enum { ST_DTYPE_F32, ST_DTYPE_F16, ST_DTYPE_BF16, ST_DTYPE_F64, ST_DTYPE_I8, ST_DTYPE_I16, ST_DTYPE_I32, ST_DTYPE_I64, ST_DTYPE_U8, ST_DTYPE_BOOL, ST_DTYPE_UNKNOWN } STDtype; static inline int st_dtype_size(STDtype dtype) { switch (dtype) { case ST_DTYPE_F32: return 4; case ST_DTYPE_F16: return 2; case ST_DTYPE_BF16: return 2; case ST_DTYPE_F64: return 8; case ST_DTYPE_I8: return 1; case ST_DTYPE_I16: return 2; case ST_DTYPE_I32: return 4; case ST_DTYPE_I64: return 8; case ST_DTYPE_U8: return 1; case ST_DTYPE_BOOL: return 1; default: return 0; } } static inline STDtype st_parse_dtype(const char *s, int len) { if (len == 3 && strncmp(s, "F32", 3) == 0) return ST_DTYPE_F32; if (len == 3 && strncmp(s, "F16", 3) == 0) return ST_DTYPE_F16; if (len == 4 && strncmp(s, "BF16", 4) == 0) return ST_DTYPE_BF16; if (len == 3 && strncmp(s, "F64", 3) == 0) return ST_DTYPE_F64; if (len == 2 && strncmp(s, "I8", 2) == 0) return ST_DTYPE_I8; if (len == 3 && strncmp(s, "I16", 3) == 0) return ST_DTYPE_I16; if (len == 3 && strncmp(s, "I32", 3) == 0) return ST_DTYPE_I32; if (len == 3 && strncmp(s, "I64", 3) == 0) return ST_DTYPE_I64; if (len == 2 && strncmp(s, "U8", 2) == 0) return ST_DTYPE_U8; if (len == 4 && strncmp(s, "BOOL", 4) == 0) return ST_DTYPE_BOOL; return ST_DTYPE_UNKNOWN; } /* ═══════════════════════════════════════════════════════════════════════ * TENSOR DESCRIPTOR * ═══════════════════════════════════════════════════════════════════════ */ typedef struct { char name[ST_MAX_NAME_LEN]; STDtype dtype; int n_dims; int64_t shape[ST_MAX_DIMS]; int64_t n_elements; /* Product of shape dims */ uint64_t data_offset_begin; /* Offset from data section start */ uint64_t data_offset_end; uint64_t data_size; /* end - begin */ } STTensorInfo; /* ═══════════════════════════════════════════════════════════════════════ * SAFETENSORS FILE HANDLE * ═══════════════════════════════════════════════════════════════════════ */ typedef struct { /* File mapping */ int fd; uint8_t *mmap_base; size_t file_size; /* Header */ uint64_t header_size; char *header_json; /* Not null-terminated in file, we add a null for parsing */ /* Data section */ uint8_t *data_base; /* Points into mmap at header+8 */ /* Tensor catalog */ STTensorInfo tensors[ST_MAX_TENSORS]; int n_tensors; } STFile; /* ═══════════════════════════════════════════════════════════════════════ * MINIMAL JSON PARSER * * This is a hand-rolled, zero-allocation JSON parser designed * specifically for the SafeTensors header format. It does NOT handle * arbitrary JSON — only the specific structure used by SafeTensors. * * Expected format: * { * "__metadata__": { ... }, * "tensor_name": { * "dtype": "F16", * "shape": [1024, 4096], * "data_offsets": [0, 8388608] * }, * ... * } * ═══════════════════════════════════════════════════════════════════════ */ /* Skip whitespace */ static inline const char *st_skip_ws(const char *p) { while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++; return p; } /* Parse a JSON string (returns pointer after closing quote). * Copies string content to buf (up to buflen-1 chars). */ static inline const char *st_parse_json_string(const char *p, char *buf, int buflen) { if (*p != '"') return NULL; p++; int i = 0; while (*p && *p != '"') { if (*p == '\\') { p++; /* skip escape */ if (!*p) return NULL; } if (i < buflen - 1) buf[i++] = *p; p++; } buf[i] = '\0'; if (*p == '"') p++; return p; } /* Parse a JSON integer */ static inline const char *st_parse_json_int(const char *p, int64_t *out) { char numbuf[32]; int i = 0; if (*p == '-') { numbuf[i++] = *p; p++; } while (*p >= '0' && *p <= '9' && i < 30) { numbuf[i++] = *p; p++; } numbuf[i] = '\0'; *out = strtoll(numbuf, NULL, 10); return p; } /* Skip a JSON value (string, number, object, array, bool, null) */ static inline const char *st_skip_json_value(const char *p) { p = st_skip_ws(p); if (*p == '"') { /* String */ p++; while (*p && *p != '"') { if (*p == '\\') p++; if (*p) p++; } if (*p == '"') p++; return p; } if (*p == '{') { /* Object */ int depth = 1; p++; while (*p && depth > 0) { if (*p == '{') depth++; else if (*p == '}') depth--; else if (*p == '"') { p++; while (*p && *p != '"') { if (*p == '\\') p++; if (*p) p++; } } if (*p) p++; } return p; } if (*p == '[') { /* Array */ int depth = 1; p++; while (*p && depth > 0) { if (*p == '[') depth++; else if (*p == ']') depth--; else if (*p == '"') { p++; while (*p && *p != '"') { if (*p == '\\') p++; if (*p) p++; } } if (*p) p++; } return p; } /* Number, bool, null — skip until delimiter */ while (*p && *p != ',' && *p != '}' && *p != ']' && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') { p++; } return p; } /* Parse the SafeTensors JSON header and populate the tensor catalog */ static inline int st_parse_header(STFile *st) { const char *p = st->header_json; p = st_skip_ws(p); if (*p != '{') return -1; p++; st->n_tensors = 0; while (*p) { p = st_skip_ws(p); if (*p == '}') break; if (*p == ',') { p++; continue; } /* Parse key */ char key[ST_MAX_NAME_LEN]; p = st_parse_json_string(p, key, sizeof(key)); if (!p) return -1; p = st_skip_ws(p); if (*p != ':') return -1; p++; p = st_skip_ws(p); /* Skip __metadata__ */ if (strcmp(key, "__metadata__") == 0) { p = st_skip_json_value(p); continue; } /* Parse tensor object */ if (*p != '{') { p = st_skip_json_value(p); continue; } p++; STTensorInfo *ti = &st->tensors[st->n_tensors]; memset(ti, 0, sizeof(*ti)); strncpy(ti->name, key, ST_MAX_NAME_LEN - 1); while (*p) { p = st_skip_ws(p); if (*p == '}') { p++; break; } if (*p == ',') { p++; continue; } char field[64]; p = st_parse_json_string(p, field, sizeof(field)); if (!p) return -1; p = st_skip_ws(p); if (*p != ':') return -1; p++; p = st_skip_ws(p); if (strcmp(field, "dtype") == 0) { char dtype_str[16]; p = st_parse_json_string(p, dtype_str, sizeof(dtype_str)); if (!p) return -1; ti->dtype = st_parse_dtype(dtype_str, strlen(dtype_str)); } else if (strcmp(field, "shape") == 0) { /* Parse array of ints */ if (*p != '[') return -1; p++; ti->n_dims = 0; ti->n_elements = 1; while (*p) { p = st_skip_ws(p); if (*p == ']') { p++; break; } if (*p == ',') { p++; continue; } int64_t dim_val; p = st_parse_json_int(p, &dim_val); if (ti->n_dims < ST_MAX_DIMS) { ti->shape[ti->n_dims++] = dim_val; ti->n_elements *= dim_val; } } } else if (strcmp(field, "data_offsets") == 0) { /* Parse [begin, end] */ if (*p != '[') return -1; p++; p = st_skip_ws(p); int64_t begin_val, end_val; p = st_parse_json_int(p, &begin_val); p = st_skip_ws(p); if (*p == ',') p++; p = st_skip_ws(p); p = st_parse_json_int(p, &end_val); p = st_skip_ws(p); if (*p == ']') p++; ti->data_offset_begin = (uint64_t)begin_val; ti->data_offset_end = (uint64_t)end_val; ti->data_size = ti->data_offset_end - ti->data_offset_begin; } else { p = st_skip_json_value(p); } } if (st->n_tensors < ST_MAX_TENSORS) st->n_tensors++; } return 0; } /* ═══════════════════════════════════════════════════════════════════════ * OPEN / CLOSE A SAFETENSORS FILE * ═══════════════════════════════════════════════════════════════════════ */ static inline STFile *st_open(const char *path) { STFile *st = (STFile *)calloc(1, sizeof(STFile)); if (!st) return NULL; /* Open file */ st->fd = open(path, O_RDONLY); if (st->fd < 0) { fprintf(stderr, "st_open: cannot open '%s'\n", path); free(st); return NULL; } /* Get file size */ struct stat sb; if (fstat(st->fd, &sb) < 0) { close(st->fd); free(st); return NULL; } st->file_size = sb.st_size; /* Memory-map the entire file */ st->mmap_base = (uint8_t *)mmap(NULL, st->file_size, PROT_READ, MAP_PRIVATE, st->fd, 0); if (st->mmap_base == MAP_FAILED) { fprintf(stderr, "st_open: mmap failed for '%s'\n", path); close(st->fd); free(st); return NULL; } /* Read header size (first 8 bytes, little-endian uint64) */ memcpy(&st->header_size, st->mmap_base, sizeof(uint64_t)); if (st->header_size > ST_MAX_HEADER_SIZE || st->header_size + 8 > st->file_size) { fprintf(stderr, "st_open: invalid header size %lu\n", (unsigned long)st->header_size); munmap(st->mmap_base, st->file_size); close(st->fd); free(st); return NULL; } /* Copy header JSON and null-terminate for our parser */ st->header_json = (char *)malloc(st->header_size + 1); memcpy(st->header_json, st->mmap_base + 8, st->header_size); st->header_json[st->header_size] = '\0'; /* Data section starts right after header */ st->data_base = st->mmap_base + 8 + st->header_size; /* Parse the header */ if (st_parse_header(st) != 0) { fprintf(stderr, "st_open: failed to parse header of '%s'\n", path); free(st->header_json); munmap(st->mmap_base, st->file_size); close(st->fd); free(st); return NULL; } return st; } static inline void st_close(STFile *st) { if (!st) return; free(st->header_json); if (st->mmap_base && st->mmap_base != MAP_FAILED) munmap(st->mmap_base, st->file_size); if (st->fd >= 0) close(st->fd); free(st); } /* ═══════════════════════════════════════════════════════════════════════ * TENSOR DATA ACCESS * * Returns a raw pointer into the mmap'd region. * Caller must interpret the bytes according to the tensor's dtype. * ═══════════════════════════════════════════════════════════════════════ */ static inline const void *st_tensor_data(const STFile *st, int tensor_idx) { if (tensor_idx < 0 || tensor_idx >= st->n_tensors) return NULL; return st->data_base + st->tensors[tensor_idx].data_offset_begin; } /* ═══════════════════════════════════════════════════════════════════════ * TENSOR → FLOAT32 CONVERSION * * Converts tensor data to float32, handling FP16 and BF16 input. * Caller must free the returned buffer. * ═══════════════════════════════════════════════════════════════════════ */ /* Forward declaration of fp16/bf16 converters from gguf_format.h */ /* (Already included when both headers are used together) */ static inline float *st_tensor_to_f32(const STFile *st, int tensor_idx) { const STTensorInfo *ti = &st->tensors[tensor_idx]; const uint8_t *raw = (const uint8_t *)st_tensor_data(st, tensor_idx); if (!raw) return NULL; float *out = (float *)malloc(ti->n_elements * sizeof(float)); if (!out) return NULL; switch (ti->dtype) { case ST_DTYPE_F32: memcpy(out, raw, ti->n_elements * sizeof(float)); break; case ST_DTYPE_F16: { const uint16_t *fp16 = (const uint16_t *)raw; for (int64_t i = 0; i < ti->n_elements; i++) { out[i] = gguf_fp16_to_fp32(fp16[i]); } break; } case ST_DTYPE_BF16: { const uint16_t *bf16 = (const uint16_t *)raw; for (int64_t i = 0; i < ti->n_elements; i++) { out[i] = gguf_bf16_to_fp32(bf16[i]); } break; } case ST_DTYPE_F64: { const double *f64 = (const double *)raw; for (int64_t i = 0; i < ti->n_elements; i++) { out[i] = (float)f64[i]; } break; } default: /* For integer types, just cast */ for (int64_t i = 0; i < ti->n_elements; i++) { switch (ti->dtype) { case ST_DTYPE_I8: out[i] = (float)((int8_t *)raw)[i]; break; case ST_DTYPE_I16: out[i] = (float)((int16_t *)raw)[i]; break; case ST_DTYPE_I32: out[i] = (float)((int32_t *)raw)[i]; break; case ST_DTYPE_U8: out[i] = (float)raw[i]; break; default: out[i] = 0.0f; break; } } break; } return out; } /* ═══════════════════════════════════════════════════════════════════════ * FIND TENSOR BY NAME * ═══════════════════════════════════════════════════════════════════════ */ static inline int st_find_tensor(const STFile *st, const char *name) { for (int i = 0; i < st->n_tensors; i++) { if (strcmp(st->tensors[i].name, name) == 0) return i; } return -1; } /* ═══════════════════════════════════════════════════════════════════════ * DIAGNOSTICS * ═══════════════════════════════════════════════════════════════════════ */ static inline void st_print_summary(const STFile *st) { printf(" ╔═══════════════════════════════════════════════════════════════╗\n"); printf(" ║ SafeTensors File Summary ║\n"); printf(" ╠═══════════════════════════════════════════════════════════════╣\n"); printf(" ║ File size: %12lu bytes ║\n", (unsigned long)st->file_size); printf(" ║ Header size: %12lu bytes ║\n", (unsigned long)st->header_size); printf(" ║ Tensors: %12d ║\n", st->n_tensors); printf(" ╚═══════════════════════════════════════════════════════════════╝\n\n"); const char *dtype_names[] = { "F32", "F16", "BF16", "F64", "I8", "I16", "I32", "I64", "U8", "BOOL", "???" }; for (int i = 0; i < st->n_tensors; i++) { const STTensorInfo *ti = &st->tensors[i]; printf(" [%3d] %-50s %4s [", i, ti->name, dtype_names[ti->dtype < ST_DTYPE_UNKNOWN ? ti->dtype : ST_DTYPE_UNKNOWN]); for (int d = 0; d < ti->n_dims; d++) { printf("%ld%s", (long)ti->shape[d], d < ti->n_dims - 1 ? "×" : ""); } printf("] %lu bytes\n", (unsigned long)ti->data_size); } printf("\n"); } /* ═══════════════════════════════════════════════════════════════════════ * MULTI-SHARD SAFETENSORS SUPPORT * * Most models >3B parameters are split across multiple shards: * model-00001-of-00005.safetensors * model-00002-of-00005.safetensors * ... * * The mapping from tensor name → shard file is stored in: * model.safetensors.index.json * * This module provides a unified view across all shards. * ═══════════════════════════════════════════════════════════════════════ */ #include #define ST_MAX_SHARDS 256 typedef struct { STFile *shards[ST_MAX_SHARDS]; int n_shards; /* Unified tensor catalog — maps to (shard_idx, tensor_idx_in_shard) */ struct { char name[ST_MAX_NAME_LEN]; int shard_idx; int tensor_idx; } tensor_map[ST_MAX_TENSORS]; int n_tensors; } STMultiFile; /* Compare function for sorting filenames */ static int st_cmp_str(const void *a, const void *b) { return strcmp(*(const char **)a, *(const char **)b); } /* Open a model directory containing one or more .safetensors files. * If only a single model.safetensors exists, opens just that file. * If model.safetensors.index.json exists, reads all referenced shards. */ static STMultiFile *st_open_dir(const char *model_dir) { STMultiFile *mf = (STMultiFile *)calloc(1, sizeof(STMultiFile)); if (!mf) return NULL; /* Canonicalize directory path */ char dir[512]; strncpy(dir, model_dir, sizeof(dir) - 2); dir[sizeof(dir) - 2] = '\0'; int dlen = strlen(dir); if (dlen > 0 && dir[dlen - 1] != '/') { dir[dlen] = '/'; dir[dlen + 1] = '\0'; } /* Try single-file first */ char single_path[1024]; snprintf(single_path, sizeof(single_path), "%smodel.safetensors", dir); { FILE *check = fopen(single_path, "rb"); if (check) { fclose(check); STFile *sf = st_open(single_path); if (sf) { mf->shards[0] = sf; mf->n_shards = 1; /* Build tensor map from single shard */ for (int i = 0; i < sf->n_tensors && mf->n_tensors < ST_MAX_TENSORS; i++) { strncpy(mf->tensor_map[mf->n_tensors].name, sf->tensors[i].name, ST_MAX_NAME_LEN - 1); mf->tensor_map[mf->n_tensors].shard_idx = 0; mf->tensor_map[mf->n_tensors].tensor_idx = i; mf->n_tensors++; } return mf; } } } /* Scan for shard files matching *.safetensors */ DIR *d = opendir(model_dir); if (!d) { fprintf(stderr, " st_open_dir: cannot open directory '%s'\n", model_dir); free(mf); return NULL; } char *shard_names[ST_MAX_SHARDS]; int n_found = 0; struct dirent *de; while ((de = readdir(d)) != NULL && n_found < ST_MAX_SHARDS) { int nlen = strlen(de->d_name); if (nlen > 12 && strcmp(de->d_name + nlen - 12, ".safetensors") == 0) { /* Skip the index.json file itself */ if (strstr(de->d_name, ".index.json") != NULL) continue; shard_names[n_found] = strdup(de->d_name); n_found++; } } closedir(d); if (n_found == 0) { fprintf(stderr, " st_open_dir: no .safetensors files in '%s'\n", model_dir); free(mf); return NULL; } /* Sort for deterministic ordering */ qsort(shard_names, n_found, sizeof(char *), st_cmp_str); /* Open each shard */ for (int s = 0; s < n_found; s++) { char path[1024]; snprintf(path, sizeof(path), "%s%s", dir, shard_names[s]); STFile *sf = st_open(path); if (!sf) { fprintf(stderr, " st_open_dir: failed to open shard '%s'\n", path); free(shard_names[s]); continue; } int si = mf->n_shards; mf->shards[si] = sf; /* Add all tensors from this shard to unified map */ for (int i = 0; i < sf->n_tensors && mf->n_tensors < ST_MAX_TENSORS; i++) { strncpy(mf->tensor_map[mf->n_tensors].name, sf->tensors[i].name, ST_MAX_NAME_LEN - 1); mf->tensor_map[mf->n_tensors].shard_idx = si; mf->tensor_map[mf->n_tensors].tensor_idx = i; mf->n_tensors++; } mf->n_shards++; free(shard_names[s]); } if (mf->n_shards == 0) { free(mf); return NULL; } printf(" Opened %d shards, %d tensors total\n\n", mf->n_shards, mf->n_tensors); return mf; } /* Find a tensor across all shards. Returns a pointer to the unified map entry index, * or -1 if not found. */ static int st_multi_find_tensor(const STMultiFile *mf, const char *name) { for (int i = 0; i < mf->n_tensors; i++) { if (strcmp(mf->tensor_map[i].name, name) == 0) return i; } return -1; } /* Get the STTensorInfo for a unified map index */ static const STTensorInfo *st_multi_tensor_info(const STMultiFile *mf, int unified_idx) { if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL; int si = mf->tensor_map[unified_idx].shard_idx; int ti = mf->tensor_map[unified_idx].tensor_idx; return &mf->shards[si]->tensors[ti]; } /* Convert a tensor to F32 from across shards */ static float *st_multi_tensor_to_f32(const STMultiFile *mf, int unified_idx) { if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL; int si = mf->tensor_map[unified_idx].shard_idx; int ti = mf->tensor_map[unified_idx].tensor_idx; return st_tensor_to_f32(mf->shards[si], ti); } /* Get raw tensor data from across shards */ static const void *st_multi_tensor_data(const STMultiFile *mf, int unified_idx) { if (unified_idx < 0 || unified_idx >= mf->n_tensors) return NULL; int si = mf->tensor_map[unified_idx].shard_idx; int ti = mf->tensor_map[unified_idx].tensor_idx; return st_tensor_data(mf->shards[si], ti); } static void st_multi_close(STMultiFile *mf) { if (!mf) return; for (int i = 0; i < mf->n_shards; i++) st_close(mf->shards[i]); free(mf); } static void st_multi_print_summary(const STMultiFile *mf) { printf(" ╔═══════════════════════════════════════════════════════════════╗\n"); printf(" ║ SafeTensors Multi-Shard Summary ║\n"); printf(" ╠═══════════════════════════════════════════════════════════════╣\n"); printf(" ║ Shards: %12d ║\n", mf->n_shards); uint64_t total_size = 0; for (int s = 0; s < mf->n_shards; s++) total_size += mf->shards[s]->file_size; printf(" ║ Total size: %12lu bytes (%6.1f MB) ║\n", (unsigned long)total_size, (double)total_size / (1024.0 * 1024.0)); printf(" ║ Tensors: %12d ║\n", mf->n_tensors); printf(" ╚═══════════════════════════════════════════════════════════════╝\n\n"); const char *dtype_names[] = { "F32", "F16", "BF16", "F64", "I8", "I16", "I32", "I64", "U8", "BOOL", "???" }; for (int i = 0; i < mf->n_tensors; i++) { const STTensorInfo *ti = st_multi_tensor_info(mf, i); printf(" [%3d] s%-2d %-48s %4s [", i, mf->tensor_map[i].shard_idx, ti->name, dtype_names[ti->dtype < ST_DTYPE_UNKNOWN ? ti->dtype : ST_DTYPE_UNKNOWN]); for (int d = 0; d < ti->n_dims; d++) { printf("%ld%s", (long)ti->shape[d], d < ti->n_dims - 1 ? "×" : ""); } printf("] %lu bytes\n", (unsigned long)ti->data_size); } printf("\n"); } #endif /* SAFETENSORS_READER_H */