File size: 1,116 Bytes
f09153a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | import argparse
import os
import sys
# Add parent directory to path to import utils
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
from utils.benchmark_utils import get_benchmark_score
def main():
parser = argparse.ArgumentParser(description="Evaluate logical_reasoning")
parser.add_argument("model_path", type=str, help="Path to model checkpoint")
args = parser.parse_args()
if not os.path.isdir(args.model_path):
print(f"Error: Directory not found at '{args.model_path}'", file=sys.stderr)
sys.exit(1)
checkpoint_name = os.path.basename(os.path.normpath(args.model_path))
try:
step_number = int(checkpoint_name.split('_')[-1])
except (ValueError, IndexError):
print(f"Error: Cannot parse step number from '{checkpoint_name}'", file=sys.stderr)
sys.exit(1)
result = get_benchmark_score("logical_reasoning", step_number)
if result is None:
print(f"Error: Invalid step number {step_number}", file=sys.stderr)
sys.exit(1)
print(result)
if __name__ == "__main__":
main()
|