// test_safetensors.cpp — enumerate Qwen3-235B tensors and spot-check specific names. #include "safetensors_loader.h" #include #include #include #include #include int main(int argc, char** argv) { std::string dir = argc > 1 ? argv[1] : "/path/to/Qwen3-235B-A22B-Instruct-2507-BF16"; SafetensorsLoader loader; auto t0 = std::chrono::steady_clock::now(); if (!loader.open(dir)) { fprintf(stderr, "FAIL: open(%s)\n", dir.c_str()); return 1; } auto t1 = std::chrono::steady_clock::now(); double ms = std::chrono::duration(t1 - t0).count(); size_t n = loader.tensor_count(); size_t s = loader.shard_count(); size_t bytes = loader.total_bytes(); printf("Open took %.1f ms\n", ms); printf("Shards: %zu\n", s); printf("Tensors: %zu\n", n); printf("Total bytes: %.2f GB\n", bytes / 1e9); // Expect 94 layers × 384 MoE + attention + router + norms + embed/head // = 36945 (confirmed from index.json inspection) bool ok_count = (n == 36945); printf("Tensor count check: %s (expected 36945)\n", ok_count ? "OK" : "FAIL"); // Spot-check specific names auto check = [&](const std::string& name) { auto* m = loader.get(name); if (!m) { printf(" MISSING: %s\n", name.c_str()); return false; } printf(" OK: %s dtype=%s shape=[", name.c_str(), m->dtype.c_str()); for (size_t i = 0; i < m->shape.size(); i++) printf("%s%ld", i ? "," : "", m->shape[i]); printf("] shard=%d offset=%zu nbytes=%zu\n", m->shard_id, m->offset, m->nbytes); return true; }; bool ok_names = true; ok_names &= check("model.embed_tokens.weight"); // [vocab, D] ok_names &= check("model.layers.0.input_layernorm.weight"); // [D] ok_names &= check("model.layers.0.self_attn.q_proj.weight"); // [D, D] ok_names &= check("model.layers.0.self_attn.k_proj.weight"); // [n_kv*head_dim, D] ok_names &= check("model.layers.0.self_attn.o_proj.weight"); // [D, D] ok_names &= check("model.layers.0.post_attention_layernorm.weight"); ok_names &= check("model.layers.0.mlp.gate.weight"); // [E, D] ok_names &= check("model.layers.0.mlp.experts.0.gate_proj.weight"); // [I, D] ok_names &= check("model.layers.0.mlp.experts.127.down_proj.weight"); // [D, I] ok_names &= check("model.layers.93.self_attn.q_proj.weight"); // last layer ok_names &= check("model.norm.weight"); ok_names &= check("lm_head.weight"); // Spot-check data_ptr: read first few bytes of embed_tokens auto t_read0 = std::chrono::steady_clock::now(); const void* embed_ptr = loader.data_ptr("model.embed_tokens.weight"); if (embed_ptr) { const uint16_t* bf16 = (const uint16_t*)embed_ptr; printf("\nmodel.embed_tokens.weight first 8 BF16 raw: "); for (int i = 0; i < 8; i++) printf("0x%04x ", bf16[i]); printf("\n"); } auto t_read1 = std::chrono::steady_clock::now(); double read_ms = std::chrono::duration(t_read1 - t_read0).count(); printf("(first data_ptr() access including mmap: %.1f ms)\n", read_ms); // Count MoE expert tensors for layer 0 int expert_count = 0; for (auto& name : loader.list_tensor_names()) { if (name.find("layers.0.mlp.experts.") != std::string::npos) expert_count++; } bool ok_expert = (expert_count == 384); // 128 experts × 3 projs printf("\nLayer 0 expert tensor count: %d (expected 384) %s\n", expert_count, ok_expert ? "OK" : "FAIL"); bool pass = ok_count && ok_names && ok_expert && (embed_ptr != nullptr); printf("\n%s\n", pass ? "=== test_safetensors PASS ===" : "=== test_safetensors FAIL ==="); return pass ? 0 : 1; }