wheattoast11 commited on
Commit
062a37b
·
verified ·
1 Parent(s): d05142f

Upload eval_v2_baseline_mmlu_gsm8k.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval_v2_baseline_mmlu_gsm8k.py +35 -0
eval_v2_baseline_mmlu_gsm8k.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "lighteval>=0.6.0",
5
+ # "torch>=2.0.0",
6
+ # "transformers>=4.40.0",
7
+ # "accelerate>=0.30.0",
8
+ # ]
9
+ # ///
10
+ """Baseline: MMLU + GSM8K."""
11
+
12
+ import os, subprocess, glob
13
+
14
+ def main():
15
+ hf_token = os.getenv("HF_TOKEN")
16
+ if hf_token:
17
+ os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token)
18
+ os.environ.setdefault("HF_HUB_TOKEN", hf_token)
19
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
20
+
21
+ model_args = "model_name=LiquidAI/LFM2.5-1.2B-Instruct,trust_remote_code=True,dtype=float16,max_length=2048"
22
+ tasks = "leaderboard|mmlu:abstract_algebra|5,leaderboard|mmlu:anatomy|5,leaderboard|mmlu:astronomy|5,leaderboard|mmlu:business_ethics|5,leaderboard|mmlu:clinical_knowledge|5,leaderboard|gsm8k|5"
23
+
24
+ cmd = ["lighteval", "accelerate", model_args, tasks, "--output-dir", "/tmp/results"]
25
+ print(f"Running: {' '.join(cmd)}")
26
+ subprocess.run(cmd, check=True)
27
+ print("DONE")
28
+
29
+ for f in glob.glob("/tmp/results/**/*.json", recursive=True):
30
+ print(f"\n=== {f} ===")
31
+ with open(f) as fh:
32
+ print(fh.read()[:10000])
33
+
34
+ if __name__ == "__main__":
35
+ main()