FuryAssassin commited on
Commit
560bae7
·
verified ·
1 Parent(s): 01b6eca

Upload evaluation/benchmarks/creative_writing/eval.py with huggingface_hub

Browse files
evaluation/benchmarks/creative_writing/eval.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+
5
+ # Add parent directory to path to import utils
6
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
7
+ from utils.benchmark_utils import get_benchmark_score
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser(description="Evaluate creative_writing")
11
+ parser.add_argument("model_path", type=str, help="Path to model checkpoint")
12
+ args = parser.parse_args()
13
+
14
+ if not os.path.isdir(args.model_path):
15
+ print(f"Error: Directory not found at '{args.model_path}'", file=sys.stderr)
16
+ sys.exit(1)
17
+
18
+ checkpoint_name = os.path.basename(os.path.normpath(args.model_path))
19
+ try:
20
+ step_number = int(checkpoint_name.split('_')[-1])
21
+ except (ValueError, IndexError):
22
+ print(f"Error: Cannot parse step number from '{checkpoint_name}'", file=sys.stderr)
23
+ sys.exit(1)
24
+
25
+ result = get_benchmark_score("creative_writing", step_number)
26
+ if result is None:
27
+ print(f"Error: Invalid step number {step_number}", file=sys.stderr)
28
+ sys.exit(1)
29
+
30
+ print(result)
31
+
32
+ if __name__ == "__main__":
33
+ main()