| |
| """Non-regression check for the Luciole-1B-Base lm_head fix. |
| |
| The fix removes the redundant, all-zero ``lm_head.weight`` tensor from |
| ``model.safetensors``. Because ``tie_word_embeddings`` is true, the language |
| modelling head is reconstructed from ``model.embed_tokens.weight`` at load |
| time, so the model must behave *identically* before and after the fix. |
| |
| Usage: |
| # Compare two model folders (e.g. backup vs fixed): |
| python check_nonregression.py --ref ../Luciole-1B-Base-backup --new . |
| |
| # Or snapshot a reference now, then compare later: |
| python check_nonregression.py --ref . --save ref_outputs.pt |
| # ... apply the fix ... |
| python check_nonregression.py --new . --compare ref_outputs.pt |
| |
| This script only does forward passes (greedy / logits), no sampling, so it is |
| fully deterministic and CPU-friendly. |
| """ |
| import argparse |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| PROMPTS = [ |
| "Bonjour, je m'appelle", |
| "La capitale de la France est", |
| "Il était une fois", |
| "2 + 2 =", |
| ] |
|
|
|
|
| def load(path): |
| tok = AutoTokenizer.from_pretrained(path) |
| model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float32) |
| model.eval() |
| return tok, model |
|
|
|
|
| @torch.no_grad() |
| def compute_outputs(path): |
| tok, model = load(path) |
| |
| tied = model.get_output_embeddings().weight.data_ptr() == \ |
| model.get_input_embeddings().weight.data_ptr() |
| results = [] |
| for prompt in PROMPTS: |
| ids = tok(prompt, return_tensors="pt").input_ids |
| logits = model(ids).logits[0, -1] |
| greedy = model.generate(ids, max_new_tokens=20, do_sample=False)[0] |
| results.append({ |
| "prompt": prompt, |
| "logits": logits.clone(), |
| "greedy_ids": greedy.clone(), |
| "greedy_text": tok.decode(greedy, skip_special_tokens=True), |
| }) |
| return {"tied": tied, "results": results} |
|
|
|
|
| def compare(ref, new): |
| ok = True |
| if not new["tied"]: |
| print(" [FAIL] output head is NOT tied to input embeddings in --new model") |
| ok = False |
| else: |
| print(" [ ok ] output head is tied to input embeddings") |
| for r, n in zip(ref["results"], new["results"]): |
| same_ids = torch.equal(r["greedy_ids"], n["greedy_ids"]) |
| max_logit_diff = (r["logits"] - n["logits"]).abs().max().item() |
| status = "ok" if (same_ids and max_logit_diff == 0.0) else "FAIL" |
| if status == "FAIL": |
| ok = False |
| print(f" [{status:>4}] {r['prompt']!r}") |
| print(f" max|Δlogits| = {max_logit_diff:.3e} greedy_ids_identical = {same_ids}") |
| if not same_ids: |
| print(f" ref : {r['greedy_text']!r}") |
| print(f" new : {n['greedy_text']!r}") |
| return ok |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--ref", help="reference model folder") |
| ap.add_argument("--new", help="new (fixed) model folder") |
| ap.add_argument("--save", help="save reference outputs to this file") |
| ap.add_argument("--compare", help="compare --new against saved reference file") |
| args = ap.parse_args() |
|
|
| if args.save: |
| assert args.ref, "--save requires --ref" |
| print(f"Computing reference outputs from {args.ref} ...") |
| torch.save(compute_outputs(args.ref), args.save) |
| print(f"Saved reference outputs to {args.save}") |
| return |
|
|
| if args.compare: |
| assert args.new, "--compare requires --new" |
| ref = torch.load(args.compare, weights_only=False) |
| print(f"Computing outputs from {args.new} ...") |
| new = compute_outputs(args.new) |
| ok = compare(ref, new) |
| else: |
| assert args.ref and args.new, "need --ref and --new (or --save / --compare)" |
| print(f"Computing reference outputs from {args.ref} ...") |
| ref = compute_outputs(args.ref) |
| print(f"Computing outputs from {args.new} ...") |
| new = compute_outputs(args.new) |
| ok = compare(ref, new) |
|
|
| print() |
| print("RESULT:", "PASS - no regression ✓" if ok else "FAIL - outputs differ ✗") |
| raise SystemExit(0 if ok else 1) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|