File size: 1,116 Bytes
f09153a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import argparse
import os
import sys

# Add parent directory to path to import utils
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
from utils.benchmark_utils import get_benchmark_score

def main():
    parser = argparse.ArgumentParser(description="Evaluate logical_reasoning")
    parser.add_argument("model_path", type=str, help="Path to model checkpoint")
    args = parser.parse_args()
    
    if not os.path.isdir(args.model_path):
        print(f"Error: Directory not found at '{args.model_path}'", file=sys.stderr)
        sys.exit(1)
    
    checkpoint_name = os.path.basename(os.path.normpath(args.model_path))
    try:
        step_number = int(checkpoint_name.split('_')[-1])
    except (ValueError, IndexError):
        print(f"Error: Cannot parse step number from '{checkpoint_name}'", file=sys.stderr)
        sys.exit(1)
    
    result = get_benchmark_score("logical_reasoning", step_number)
    if result is None:
        print(f"Error: Invalid step number {step_number}", file=sys.stderr)
        sys.exit(1)
    
    print(result)

if __name__ == "__main__":
    main()