File size: 1,952 Bytes
027b87b 4c6bb3c 027b87b 09732e8 027b87b 09732e8 027b87b 09732e8 f3db6e2 09732e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "lighteval>=0.6.0",
# "torch>=2.0.0",
# "transformers>=4.40.0",
# "accelerate>=0.30.0",
# ]
# ///
"""Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct — list tasks first, then run."""
import subprocess
import sys
import json
# First, list available tasks to find the right names
print("=== Listing available leaderboard tasks ===")
list_cmd = [sys.executable, "-m", "lighteval", "tasks", "list"]
result = subprocess.run(list_cmd, capture_output=True, text=True)
# Filter for leaderboard tasks
leaderboard_tasks = []
for line in result.stdout.split("\n"):
if "leaderboard" in line.lower() or "mmlu" in line.lower() or "arc" in line.lower() or "truthful" in line.lower():
leaderboard_tasks.append(line.strip())
print(line.strip())
print(f"\n=== Found {len(leaderboard_tasks)} matching tasks ===")
# Print first 50 of all tasks for debugging
print("\n=== First 50 tasks from full list ===")
for line in result.stdout.split("\n")[:50]:
print(line)
# Try running with the community|mmlu format as fallback
model_args = "model_name=LiquidAI/LFM2.5-1.2B-Instruct,trust_remote_code=True"
# Try multiple task name formats
for tasks in [
"leaderboard|mmlu|5|0",
"community|mmlu|5|0",
"lighteval|mmlu|5|0",
"original|mmlu|5|0",
]:
print(f"\n=== Trying task format: {tasks} ===")
cmd = [sys.executable, "-m", "lighteval", "accelerate", model_args, tasks, "--output-dir", "./eval_results_baseline"]
r = subprocess.run(cmd, capture_output=True, text=True)
if r.returncode == 0:
print("SUCCESS!")
print(r.stdout[-2000:] if len(r.stdout) > 2000 else r.stdout)
sys.exit(0)
else:
err_snippet = r.stderr[-500:] if len(r.stderr) > 500 else r.stderr
print(f"Failed: {err_snippet}")
print("\nAll task formats failed. Dumping full task list to stdout.")
print(result.stdout)
sys.exit(1)
|