#!/usr/bin/env python3 """ Thanatos-27B — verify the README "Architecture" forward-pass bullets against the actual GGUF metadata. Reads either the qwen35- or qwen36-stamped bundle (or any GGUF that declares one of those `general.architecture` values), prints each README claim alongside the metadata key it derives from, and exits non-zero if any value mismatches the expected README claim. Useful as a manual audit after the bundle is re-stamped or after upstream re-conversion. Usage: python3 scripts/verify_arch.py # default bundle python3 scripts/verify_arch.py Thanatos-27B.Q4_K_M.gguf python3 scripts/verify_arch.py /path/to/some-other.gguf Exit code 0 = all claims verify, 1 = at least one mismatch. Note: this does NOT verify the 27B parameter count directly (no such KV in the GGUF) — that comes from llama.cpp's `case 64: LLM_TYPE_27B` branch in `src/models/qwen35.cpp`, not from the file itself. """ from __future__ import annotations import sys from pathlib import Path from gguf import GGUFReader EXPECTED = { "block_count": (64, "64 transformer layers"), "context_length": (262144, "262 144 native context"), "embedding_length": (5120, "Hidden size 5120"), "feed_forward_length": (17408, "FFN intermediate 17408"), "attention.head_count": (24, "Gated Attention: 24 Q-heads"), "attention.head_count_kv": (4, "Gated Attention: 4 KV-heads (GQA)"), "attention.key_length": (256, "Gated Attention: head_dim 256 (key)"), "attention.value_length": (256, "Gated Attention: head_dim 256 (value)"), "rope.dimension_count": (64, "Partial RoPE: 64 of 256 dims (factor 0.25)"), "full_attention_interval": (4, "Hybrid stack: every 4th layer is full attention (16 cycles)"), "ssm.state_size": (128, "Gated DeltaNet: head_dim 128"), "ssm.time_step_rank": (48, "Gated DeltaNet: 48 V-heads"), "ssm.group_count": (16, "Gated DeltaNet: 16 QK-heads"), } EXPECTED_VOCAB = 248320 EXPECTED_ARCHS = {"qwen35", "qwen36"} def read_scalar(reader: GGUFReader, key: str): f = reader.fields.get(key) if f is None: return None arr = f.parts[f.data[0]] val = arr.tolist() if hasattr(arr, "tolist") else arr if isinstance(val, list) and len(val) == 1: return val[0] return val def read_arch(reader: GGUFReader) -> str: f = reader.fields["general.architecture"] return bytes(f.parts[f.data[0]]).decode() def main() -> int: if len(sys.argv) > 2: print(f"usage: {sys.argv[0]} [path/to/Thanatos-27B.Q4_K_M.gguf]", file=sys.stderr) return 2 root = Path(__file__).resolve().parent.parent default_paths = [ root / "Thanatos-27B.Q4_K_M.qwen35.gguf", root / "Thanatos-27B.Q4_K_M.qwen36.gguf", root / "Thanatos-27B.Q4_K_M.gguf", ] if len(sys.argv) == 2: path = Path(sys.argv[1]) else: path = next((p for p in default_paths if p.exists() and p.stat().st_size > 1024), None) if path is None: print("[!] no Thanatos-27B GGUF found in repo root; pass a path explicitly", file=sys.stderr) return 2 print(f"[*] reading: {path}") reader = GGUFReader(str(path), "r") arch = read_arch(reader) if arch not in EXPECTED_ARCHS: print(f"[!] unexpected general.architecture: {arch!r} (expected one of {EXPECTED_ARCHS})", file=sys.stderr) return 1 print(f"[*] general.architecture: {arch}") print() mismatches = 0 fmt = " {marker} {claim:55s} {key:35s} = {actual}" for suffix, (expected, claim) in EXPECTED.items(): key = f"{arch}.{suffix}" actual = read_scalar(reader, key) ok = actual == expected marker = "[ ok ]" if ok else "[FAIL]" print(fmt.format(marker=marker, claim=claim, key=key, actual=actual)) if not ok: mismatches += 1 # Vocab count comes from the tokenizer tokens array length, not a scalar KV. f = reader.fields.get("tokenizer.ggml.tokens") vocab_actual = len(f.data) if f is not None else None ok = vocab_actual == EXPECTED_VOCAB marker = "[ ok ]" if ok else "[FAIL]" print(fmt.format(marker=marker, claim=f"Vocab {EXPECTED_VOCAB}", key="tokenizer.ggml.tokens (length)", actual=vocab_actual)) if not ok: mismatches += 1 print() if mismatches: print(f"[!] {mismatches} mismatch(es) — README Architecture claims disagree with GGUF metadata.") return 1 print("[+] all Architecture claims verify against GGUF metadata.") return 0 if __name__ == "__main__": sys.exit(main())