| import argparse | |
| import os | |
| import sys | |
| # Add parent directory to path to import utils | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) | |
| from utils.benchmark_utils import get_benchmark_score | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Evaluate creative_writing") | |
| parser.add_argument("model_path", type=str, help="Path to model checkpoint") | |
| args = parser.parse_args() | |
| if not os.path.isdir(args.model_path): | |
| print(f"Error: Directory not found at '{args.model_path}'", file=sys.stderr) | |
| sys.exit(1) | |
| checkpoint_name = os.path.basename(os.path.normpath(args.model_path)) | |
| try: | |
| step_number = int(checkpoint_name.split('_')[-1]) | |
| except (ValueError, IndexError): | |
| print(f"Error: Cannot parse step number from '{checkpoint_name}'", file=sys.stderr) | |
| sys.exit(1) | |
| result = get_benchmark_score("creative_writing", step_number) | |
| if result is None: | |
| print(f"Error: Invalid step number {step_number}", file=sys.stderr) | |
| sys.exit(1) | |
| print(result) | |
| if __name__ == "__main__": | |
| main() | |