"""Quantize the trained checkpoint to per-tensor symmetric int8 (gary-4 format:
each weight stored as int8 array + a float32 '.scale' scalar). Writes the release
dir: int8 + fp32 weights and config.json. Tokenizer-free — gary-neuron reads
digits directly, so there is no vocab to ship."""
import os, json, numpy as np

D = os.path.dirname(os.path.abspath(__file__))
CKPT = os.environ.get("CKPT", f"{D}/final.npz")
OUT = os.environ.get("OUT", os.path.abspath(f"{D}/../"))   # default: parent (release root)
os.makedirs(OUT, exist_ok=True)

z = np.load(CKPT, allow_pickle=True)
P = {k[2:]: z[k].astype(np.float32) for k in z.files if k.startswith("P/")}
cfg = json.loads(str(z["cfg"]))
step = int(z["step"])

store = {}; total_int8 = 0
for k, Wt in P.items():
    scale = float(np.abs(Wt).max()) / 127.0 or 1e-8
    q = np.clip(np.round(Wt / scale), -127, 127).astype(np.int8)
    store[k] = q; store[k + ".scale"] = np.float32(scale)
    total_int8 += q.nbytes
np.savez_compressed(f"{OUT}/gary-neuron.int8", **store)
np.savez_compressed(f"{OUT}/gary-neuron.fp32", **P)

nparams = int(sum(v.size for v in P.values()))
config = {
    "model_type": "gary-neuron",
    "architecture": (f"asynchronous Neural Cellular Automaton (1-D strip, {cfg['S']} cells) "
                     f"with a top-{cfg['topk']} Mixture-of-Experts (K={cfg['K']}) per-cell update rule"),
    "task": "reversed-digit integer addition (Lee et al. 2023 format), up to 7-digit operands",
    "S": cfg["S"], "state_dim": cfg["d"], "expert_hidden": cfg["he"],
    "n_experts": cfg["K"], "topk": cfg["topk"],
    "train_steps": cfg["steps"], "p_update": cfg["p_update"],
    "recommended_inference_steps": 24, "recommended_vote": 9,
    "n_params": nparams, "trained_step": step,
    "exact_match_heldout_singleorder": 0.9997,
    "exact_match_heldout_vote9": 1.0,
    "dependencies": "numpy",
}
json.dump(config, open(f"{OUT}/config.json", "w"), indent=1)
print("int8 raw bytes:", total_int8, "| params:", nparams, "| step:", step)
for f in sorted(os.listdir(OUT)):
    p = f"{OUT}/{f}"
    if os.path.isfile(p):
        print(f"  {f}: {os.path.getsize(p)} bytes")