| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct — list tasks first, then run.""" |
| |
|
| | import subprocess |
| | import sys |
| | import json |
| |
|
| | |
| | print("=== Listing available leaderboard tasks ===") |
| | list_cmd = [sys.executable, "-m", "lighteval", "tasks", "list"] |
| | result = subprocess.run(list_cmd, capture_output=True, text=True) |
| |
|
| | |
| | leaderboard_tasks = [] |
| | for line in result.stdout.split("\n"): |
| | if "leaderboard" in line.lower() or "mmlu" in line.lower() or "arc" in line.lower() or "truthful" in line.lower(): |
| | leaderboard_tasks.append(line.strip()) |
| | print(line.strip()) |
| |
|
| | print(f"\n=== Found {len(leaderboard_tasks)} matching tasks ===") |
| |
|
| | |
| | print("\n=== First 50 tasks from full list ===") |
| | for line in result.stdout.split("\n")[:50]: |
| | print(line) |
| |
|
| | |
| | model_args = "model_name=LiquidAI/LFM2.5-1.2B-Instruct,trust_remote_code=True" |
| |
|
| | |
| | for tasks in [ |
| | "leaderboard|mmlu|5|0", |
| | "community|mmlu|5|0", |
| | "lighteval|mmlu|5|0", |
| | "original|mmlu|5|0", |
| | ]: |
| | print(f"\n=== Trying task format: {tasks} ===") |
| | cmd = [sys.executable, "-m", "lighteval", "accelerate", model_args, tasks, "--output-dir", "./eval_results_baseline"] |
| | r = subprocess.run(cmd, capture_output=True, text=True) |
| | if r.returncode == 0: |
| | print("SUCCESS!") |
| | print(r.stdout[-2000:] if len(r.stdout) > 2000 else r.stdout) |
| | sys.exit(0) |
| | else: |
| | err_snippet = r.stderr[-500:] if len(r.stderr) > 500 else r.stderr |
| | print(f"Failed: {err_snippet}") |
| |
|
| | print("\nAll task formats failed. Dumping full task list to stdout.") |
| | print(result.stdout) |
| | sys.exit(1) |
| |
|