wheattoast11 commited on
Commit
b87b103
·
verified ·
1 Parent(s): 0de8aad

Upload eval_baseline_v2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval_baseline_v2.py +33 -0
eval_baseline_v2.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "lighteval[vllm]>=0.6.0",
5
+ # "torch>=2.0.0",
6
+ # "transformers>=4.40.0",
7
+ # "accelerate>=0.30.0",
8
+ # "vllm>=0.4.0",
9
+ # ]
10
+ # ///
11
+
12
+ """Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct with lighteval vllm."""
13
+
14
+ import subprocess
15
+ import sys
16
+
17
+ # First check lighteval vllm --help to see valid options
18
+ help_result = subprocess.run(
19
+ ["lighteval", "vllm", "--help"],
20
+ capture_output=True, text=True
21
+ )
22
+ print("=== lighteval vllm --help ===")
23
+ print(help_result.stdout)
24
+ print(help_result.stderr)
25
+
26
+ # Try running with minimal args
27
+ model = "LiquidAI/LFM2.5-1.2B-Instruct"
28
+ tasks = "leaderboard|mmlu|5,leaderboard|arc:challenge|25,leaderboard|truthfulqa:mc|0"
29
+
30
+ cmd = ["lighteval", "vllm", model, tasks, "--trust-remote-code", "--use-chat-template", "--output-dir", "./eval_baseline"]
31
+ print(f"\n=== Running: {' '.join(cmd)} ===")
32
+ result = subprocess.run(cmd)
33
+ sys.exit(result.returncode)