Luciole-1B-Base / check_nonregression.py

Initial release of Luciole-1B-Base

50c5fef 2 days ago

4.27 kB

	#!/usr/bin/env python3
	"""Non-regression check for the Luciole-1B-Base lm_head fix.

	The fix removes the redundant, all-zero ``lm_head.weight`` tensor from
	``model.safetensors``. Because ``tie_word_embeddings`` is true, the language
	modelling head is reconstructed from ``model.embed_tokens.weight`` at load
	time, so the model must behave identically before and after the fix.

	Usage:
	# Compare two model folders (e.g. backup vs fixed):
	python check_nonregression.py --ref ../Luciole-1B-Base-backup --new .

	# Or snapshot a reference now, then compare later:
	python check_nonregression.py --ref . --save ref_outputs.pt
	# ... apply the fix ...
	python check_nonregression.py --new . --compare ref_outputs.pt

	This script only does forward passes (greedy / logits), no sampling, so it is
	fully deterministic and CPU-friendly.
	"""
	import argparse
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	PROMPTS = [
	"Bonjour, je m'appelle",
	"La capitale de la France est",
	"Il était une fois",
	"2 + 2 =",
	]


	def load(path):
	tok = AutoTokenizer.from_pretrained(path)
	model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float32)
	model.eval()
	return tok, model


	@torch.no_grad()
	def compute_outputs(path):
	tok, model = load(path)
	# Sanity: the head must be tied to the input embeddings.
	tied = model.get_output_embeddings().weight.data_ptr() == \
	model.get_input_embeddings().weight.data_ptr()
	results = []
	for prompt in PROMPTS:
	ids = tok(prompt, return_tensors="pt").input_ids
	logits = model(ids).logits[0, -1] # last-token logits
	greedy = model.generate(ids, max_new_tokens=20, do_sample=False)[0]
	results.append({
	"prompt": prompt,
	"logits": logits.clone(),
	"greedy_ids": greedy.clone(),
	"greedy_text": tok.decode(greedy, skip_special_tokens=True),
	})
	return {"tied": tied, "results": results}


	def compare(ref, new):
	ok = True
	if not new["tied"]:
	print(" [FAIL] output head is NOT tied to input embeddings in --new model")
	ok = False
	else:
	print(" [ ok ] output head is tied to input embeddings")
	for r, n in zip(ref["results"], new["results"]):
	same_ids = torch.equal(r["greedy_ids"], n["greedy_ids"])
	max_logit_diff = (r["logits"] - n["logits"]).abs().max().item()
	status = "ok" if (same_ids and max_logit_diff == 0.0) else "FAIL"
	if status == "FAIL":
	ok = False
	print(f" [{status:>4}] {r['prompt']!r}")
	print(f" max\|Δlogits\| = {max_logit_diff:.3e} greedy_ids_identical = {same_ids}")
	if not same_ids:
	print(f" ref : {r['greedy_text']!r}")
	print(f" new : {n['greedy_text']!r}")
	return ok


	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--ref", help="reference model folder")
	ap.add_argument("--new", help="new (fixed) model folder")
	ap.add_argument("--save", help="save reference outputs to this file")
	ap.add_argument("--compare", help="compare --new against saved reference file")
	args = ap.parse_args()

	if args.save:
	assert args.ref, "--save requires --ref"
	print(f"Computing reference outputs from {args.ref} ...")
	torch.save(compute_outputs(args.ref), args.save)
	print(f"Saved reference outputs to {args.save}")
	return

	if args.compare:
	assert args.new, "--compare requires --new"
	ref = torch.load(args.compare, weights_only=False)
	print(f"Computing outputs from {args.new} ...")
	new = compute_outputs(args.new)
	ok = compare(ref, new)
	else:
	assert args.ref and args.new, "need --ref and --new (or --save / --compare)"
	print(f"Computing reference outputs from {args.ref} ...")
	ref = compute_outputs(args.ref)
	print(f"Computing outputs from {args.new} ...")
	new = compute_outputs(args.new)
	ok = compare(ref, new)

	print()
	print("RESULT:", "PASS - no regression ✓" if ok else "FAIL - outputs differ ✗")
	raise SystemExit(0 if ok else 1)


	if __name__ == "__main__":
	main()