Text Generation
Safetensors
nemotron
pretrained
openllm-france
Luciole-1B-Base / check_nonregression.py
Jeronymous's picture
Initial release of Luciole-1B-Base
50c5fef
#!/usr/bin/env python3
"""Non-regression check for the Luciole-1B-Base lm_head fix.
The fix removes the redundant, all-zero ``lm_head.weight`` tensor from
``model.safetensors``. Because ``tie_word_embeddings`` is true, the language
modelling head is reconstructed from ``model.embed_tokens.weight`` at load
time, so the model must behave *identically* before and after the fix.
Usage:
# Compare two model folders (e.g. backup vs fixed):
python check_nonregression.py --ref ../Luciole-1B-Base-backup --new .
# Or snapshot a reference now, then compare later:
python check_nonregression.py --ref . --save ref_outputs.pt
# ... apply the fix ...
python check_nonregression.py --new . --compare ref_outputs.pt
This script only does forward passes (greedy / logits), no sampling, so it is
fully deterministic and CPU-friendly.
"""
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
PROMPTS = [
"Bonjour, je m'appelle",
"La capitale de la France est",
"Il était une fois",
"2 + 2 =",
]
def load(path):
tok = AutoTokenizer.from_pretrained(path)
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float32)
model.eval()
return tok, model
@torch.no_grad()
def compute_outputs(path):
tok, model = load(path)
# Sanity: the head must be tied to the input embeddings.
tied = model.get_output_embeddings().weight.data_ptr() == \
model.get_input_embeddings().weight.data_ptr()
results = []
for prompt in PROMPTS:
ids = tok(prompt, return_tensors="pt").input_ids
logits = model(ids).logits[0, -1] # last-token logits
greedy = model.generate(ids, max_new_tokens=20, do_sample=False)[0]
results.append({
"prompt": prompt,
"logits": logits.clone(),
"greedy_ids": greedy.clone(),
"greedy_text": tok.decode(greedy, skip_special_tokens=True),
})
return {"tied": tied, "results": results}
def compare(ref, new):
ok = True
if not new["tied"]:
print(" [FAIL] output head is NOT tied to input embeddings in --new model")
ok = False
else:
print(" [ ok ] output head is tied to input embeddings")
for r, n in zip(ref["results"], new["results"]):
same_ids = torch.equal(r["greedy_ids"], n["greedy_ids"])
max_logit_diff = (r["logits"] - n["logits"]).abs().max().item()
status = "ok" if (same_ids and max_logit_diff == 0.0) else "FAIL"
if status == "FAIL":
ok = False
print(f" [{status:>4}] {r['prompt']!r}")
print(f" max|Δlogits| = {max_logit_diff:.3e} greedy_ids_identical = {same_ids}")
if not same_ids:
print(f" ref : {r['greedy_text']!r}")
print(f" new : {n['greedy_text']!r}")
return ok
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--ref", help="reference model folder")
ap.add_argument("--new", help="new (fixed) model folder")
ap.add_argument("--save", help="save reference outputs to this file")
ap.add_argument("--compare", help="compare --new against saved reference file")
args = ap.parse_args()
if args.save:
assert args.ref, "--save requires --ref"
print(f"Computing reference outputs from {args.ref} ...")
torch.save(compute_outputs(args.ref), args.save)
print(f"Saved reference outputs to {args.save}")
return
if args.compare:
assert args.new, "--compare requires --new"
ref = torch.load(args.compare, weights_only=False)
print(f"Computing outputs from {args.new} ...")
new = compute_outputs(args.new)
ok = compare(ref, new)
else:
assert args.ref and args.new, "need --ref and --new (or --save / --compare)"
print(f"Computing reference outputs from {args.ref} ...")
ref = compute_outputs(args.ref)
print(f"Computing outputs from {args.new} ...")
new = compute_outputs(args.new)
ok = compare(ref, new)
print()
print("RESULT:", "PASS - no regression ✓" if ok else "FAIL - outputs differ ✗")
raise SystemExit(0 if ok else 1)
if __name__ == "__main__":
main()