#!/usr/bin/env python3 """Non-regression check for the Luciole-1B-Base lm_head fix. The fix removes the redundant, all-zero ``lm_head.weight`` tensor from ``model.safetensors``. Because ``tie_word_embeddings`` is true, the language modelling head is reconstructed from ``model.embed_tokens.weight`` at load time, so the model must behave *identically* before and after the fix. Usage: # Compare two model folders (e.g. backup vs fixed): python check_nonregression.py --ref ../Luciole-1B-Base-backup --new . # Or snapshot a reference now, then compare later: python check_nonregression.py --ref . --save ref_outputs.pt # ... apply the fix ... python check_nonregression.py --new . --compare ref_outputs.pt This script only does forward passes (greedy / logits), no sampling, so it is fully deterministic and CPU-friendly. """ import argparse import torch from transformers import AutoModelForCausalLM, AutoTokenizer PROMPTS = [ "Bonjour, je m'appelle", "La capitale de la France est", "Il était une fois", "2 + 2 =", ] def load(path): tok = AutoTokenizer.from_pretrained(path) model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float32) model.eval() return tok, model @torch.no_grad() def compute_outputs(path): tok, model = load(path) # Sanity: the head must be tied to the input embeddings. tied = model.get_output_embeddings().weight.data_ptr() == \ model.get_input_embeddings().weight.data_ptr() results = [] for prompt in PROMPTS: ids = tok(prompt, return_tensors="pt").input_ids logits = model(ids).logits[0, -1] # last-token logits greedy = model.generate(ids, max_new_tokens=20, do_sample=False)[0] results.append({ "prompt": prompt, "logits": logits.clone(), "greedy_ids": greedy.clone(), "greedy_text": tok.decode(greedy, skip_special_tokens=True), }) return {"tied": tied, "results": results} def compare(ref, new): ok = True if not new["tied"]: print(" [FAIL] output head is NOT tied to input embeddings in --new model") ok = False else: print(" [ ok ] output head is tied to input embeddings") for r, n in zip(ref["results"], new["results"]): same_ids = torch.equal(r["greedy_ids"], n["greedy_ids"]) max_logit_diff = (r["logits"] - n["logits"]).abs().max().item() status = "ok" if (same_ids and max_logit_diff == 0.0) else "FAIL" if status == "FAIL": ok = False print(f" [{status:>4}] {r['prompt']!r}") print(f" max|Δlogits| = {max_logit_diff:.3e} greedy_ids_identical = {same_ids}") if not same_ids: print(f" ref : {r['greedy_text']!r}") print(f" new : {n['greedy_text']!r}") return ok def main(): ap = argparse.ArgumentParser() ap.add_argument("--ref", help="reference model folder") ap.add_argument("--new", help="new (fixed) model folder") ap.add_argument("--save", help="save reference outputs to this file") ap.add_argument("--compare", help="compare --new against saved reference file") args = ap.parse_args() if args.save: assert args.ref, "--save requires --ref" print(f"Computing reference outputs from {args.ref} ...") torch.save(compute_outputs(args.ref), args.save) print(f"Saved reference outputs to {args.save}") return if args.compare: assert args.new, "--compare requires --new" ref = torch.load(args.compare, weights_only=False) print(f"Computing outputs from {args.new} ...") new = compute_outputs(args.new) ok = compare(ref, new) else: assert args.ref and args.new, "need --ref and --new (or --save / --compare)" print(f"Computing reference outputs from {args.ref} ...") ref = compute_outputs(args.ref) print(f"Computing outputs from {args.new} ...") new = compute_outputs(args.new) ok = compare(ref, new) print() print("RESULT:", "PASS - no regression ✓" if ok else "FAIL - outputs differ ✗") raise SystemExit(0 if ok else 1) if __name__ == "__main__": main()