# /// script # requires-python = ">=3.10" # dependencies = [ # "lighteval>=0.6.0", # "torch>=2.0.0", # "transformers>=4.40.0", # "accelerate>=0.30.0", # "peft>=0.7.0", # ] # /// """Evaluate fine-tuned wheattoast11/agent-zero-lfm-1.2b-v1 on standard benchmarks.""" import subprocess import sys model_args = "model_name=wheattoast11/agent-zero-lfm-1.2b-v1,trust_remote_code=True" tasks = "leaderboard|mmlu|5|0,leaderboard|arc:challenge|0|0,leaderboard|truthfulqa:mc|0|0" cmd = [ sys.executable, "-m", "lighteval", "accelerate", model_args, tasks, "--output-dir", "./eval_results_finetuned", ] print(f"Running: {' '.join(cmd)}") result = subprocess.run(cmd, capture_output=False) sys.exit(result.returncode)