# /// script # requires-python = ">=3.10" # dependencies = [ # "lighteval>=0.6.0", # "torch>=2.0.0", # "transformers>=4.40.0", # "accelerate>=0.30.0", # "peft>=0.10.0", # ] # /// """ v2 Finetuned: All 6 benchmarks (MMLU, GSM8K, ARC-C, Winogrande, TruthfulQA, HellaSwag). Merges LoRA adapter before evaluation. """ import gc import glob import os import subprocess def main(): hf_token = os.getenv("HF_TOKEN") if hf_token: os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token) os.environ.setdefault("HF_HUB_TOKEN", hf_token) os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch print("Merging v2 adapter...") model = AutoModelForCausalLM.from_pretrained( "LiquidAI/LFM2.5-1.2B-Instruct", trust_remote_code=True, torch_dtype=torch.float16, device_map="cpu", ) model = PeftModel.from_pretrained(model, "wheattoast11/agent-zero-lfm-1.2b-v2") model = model.merge_and_unload() merged_path = "/tmp/merged_model_v2" model.save_pretrained(merged_path) tokenizer = AutoTokenizer.from_pretrained( "wheattoast11/agent-zero-lfm-1.2b-v2", trust_remote_code=True, ) tokenizer.save_pretrained(merged_path) del model, tokenizer gc.collect() print("Adapter merged.") model_args = f"model_name={merged_path},trust_remote_code=True,dtype=float16,max_length=2048" # Run in two batches to manage memory batches = [ "leaderboard|mmlu:abstract_algebra|5,leaderboard|mmlu:anatomy|5,leaderboard|mmlu:astronomy|5,leaderboard|mmlu:business_ethics|5,leaderboard|mmlu:clinical_knowledge|5,leaderboard|gsm8k|5", "leaderboard|hellaswag|0,leaderboard|arc:challenge|25,leaderboard|truthfulqa:mc|0,leaderboard|winogrande|5", ] for i, tasks in enumerate(batches): out_dir = f"/tmp/results_v2_batch{i}" cmd = ["lighteval", "accelerate", model_args, tasks, "--output-dir", out_dir] print(f"\nBatch {i}: {' '.join(cmd)}") subprocess.run(cmd, check=True) print("\n=== ALL RESULTS ===") for f in sorted(glob.glob("/tmp/results_v2_*/**/*.json", recursive=True)): print(f"\n=== {f} ===") with open(f) as fh: print(fh.read()[:10000]) if __name__ == "__main__": main()