Thanatos-27B / scripts /verify_arch.py
FoolDev's picture
Rename back: Thanatos-27B-Heretic → Thanatos-27B (HF repo also renamed)
7197abd
#!/usr/bin/env python3
"""
Thanatos-27B — verify the README "Architecture" forward-pass bullets
against the actual GGUF metadata.
Reads either the qwen35- or qwen36-stamped bundle (or any GGUF that
declares one of those `general.architecture` values), prints each
README claim alongside the metadata key it derives from, and exits
non-zero if any value mismatches the expected README claim. Useful
as a manual audit after the bundle is re-stamped or after upstream
re-conversion.
Usage:
python3 scripts/verify_arch.py # default bundle
python3 scripts/verify_arch.py Thanatos-27B.Q4_K_M.gguf
python3 scripts/verify_arch.py /path/to/some-other.gguf
Exit code 0 = all claims verify, 1 = at least one mismatch.
Note: this does NOT verify the 27B parameter count directly (no such
KV in the GGUF) — that comes from llama.cpp's `case 64: LLM_TYPE_27B`
branch in `src/models/qwen35.cpp`, not from the file itself.
"""
from __future__ import annotations
import sys
from pathlib import Path
from gguf import GGUFReader
EXPECTED = {
"block_count": (64, "64 transformer layers"),
"context_length": (262144, "262 144 native context"),
"embedding_length": (5120, "Hidden size 5120"),
"feed_forward_length": (17408, "FFN intermediate 17408"),
"attention.head_count": (24, "Gated Attention: 24 Q-heads"),
"attention.head_count_kv": (4, "Gated Attention: 4 KV-heads (GQA)"),
"attention.key_length": (256, "Gated Attention: head_dim 256 (key)"),
"attention.value_length": (256, "Gated Attention: head_dim 256 (value)"),
"rope.dimension_count": (64, "Partial RoPE: 64 of 256 dims (factor 0.25)"),
"full_attention_interval": (4, "Hybrid stack: every 4th layer is full attention (16 cycles)"),
"ssm.state_size": (128, "Gated DeltaNet: head_dim 128"),
"ssm.time_step_rank": (48, "Gated DeltaNet: 48 V-heads"),
"ssm.group_count": (16, "Gated DeltaNet: 16 QK-heads"),
}
EXPECTED_VOCAB = 248320
EXPECTED_ARCHS = {"qwen35", "qwen36"}
def read_scalar(reader: GGUFReader, key: str):
f = reader.fields.get(key)
if f is None:
return None
arr = f.parts[f.data[0]]
val = arr.tolist() if hasattr(arr, "tolist") else arr
if isinstance(val, list) and len(val) == 1:
return val[0]
return val
def read_arch(reader: GGUFReader) -> str:
f = reader.fields["general.architecture"]
return bytes(f.parts[f.data[0]]).decode()
def main() -> int:
if len(sys.argv) > 2:
print(f"usage: {sys.argv[0]} [path/to/Thanatos-27B.Q4_K_M.gguf]", file=sys.stderr)
return 2
root = Path(__file__).resolve().parent.parent
default_paths = [
root / "Thanatos-27B.Q4_K_M.qwen35.gguf",
root / "Thanatos-27B.Q4_K_M.qwen36.gguf",
root / "Thanatos-27B.Q4_K_M.gguf",
]
if len(sys.argv) == 2:
path = Path(sys.argv[1])
else:
path = next((p for p in default_paths if p.exists() and p.stat().st_size > 1024), None)
if path is None:
print("[!] no Thanatos-27B GGUF found in repo root; pass a path explicitly", file=sys.stderr)
return 2
print(f"[*] reading: {path}")
reader = GGUFReader(str(path), "r")
arch = read_arch(reader)
if arch not in EXPECTED_ARCHS:
print(f"[!] unexpected general.architecture: {arch!r} (expected one of {EXPECTED_ARCHS})", file=sys.stderr)
return 1
print(f"[*] general.architecture: {arch}")
print()
mismatches = 0
fmt = " {marker} {claim:55s} {key:35s} = {actual}"
for suffix, (expected, claim) in EXPECTED.items():
key = f"{arch}.{suffix}"
actual = read_scalar(reader, key)
ok = actual == expected
marker = "[ ok ]" if ok else "[FAIL]"
print(fmt.format(marker=marker, claim=claim, key=key, actual=actual))
if not ok:
mismatches += 1
# Vocab count comes from the tokenizer tokens array length, not a scalar KV.
f = reader.fields.get("tokenizer.ggml.tokens")
vocab_actual = len(f.data) if f is not None else None
ok = vocab_actual == EXPECTED_VOCAB
marker = "[ ok ]" if ok else "[FAIL]"
print(fmt.format(marker=marker, claim=f"Vocab {EXPECTED_VOCAB}", key="tokenizer.ggml.tokens (length)", actual=vocab_actual))
if not ok:
mismatches += 1
print()
if mismatches:
print(f"[!] {mismatches} mismatch(es) — README Architecture claims disagree with GGUF metadata.")
return 1
print("[+] all Architecture claims verify against GGUF metadata.")
return 0
if __name__ == "__main__":
sys.exit(main())