Spaces:

dvilasuero
/

midicaps-logs

Running

File size: 5,217 Bytes

316475b

#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "inspect-ai",
#     "datasets",
#     "openai",
#     "transformers",
#     "accelerate",
#     "huggingface_hub",
# ]
# ///
"""
Wrapper script that runs an arbitrary Inspect eval and uploads logs to a Space.
This script is meant to be run on HF Jobs.
"""
import os
import sys
import tempfile
from pathlib import Path
from huggingface_hub import HfApi
from inspect_ai import eval
from inspect_ai.log import bundle_log_dir


def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
    """Bundle logs and upload to HF Space."""
    log_path = Path(log_dir)
    if not log_path.exists():
        raise ValueError(f"Log directory '{log_dir}' does not exist")

    with tempfile.TemporaryDirectory() as temp_bundle_dir:
        bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")

        print(f"Bundling logs from {log_dir}...")
        bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)
        print("Bundle created successfully")

        api = HfApi(token=hf_token)

        # Create Space if it doesn't exist
        try:
            api.create_repo(
                repo_id=hf_space_id,
                repo_type="space",
                exist_ok=True,
                space_sdk="static",
            )
            print(f"Space {hf_space_id} is ready")
        except Exception as e:
            print(f"Warning: Could not create/verify Space: {e}")

        # Upload all files
        print(f"Uploading bundle to Space {hf_space_id}...")
        uploaded_count = 0
        for root, dirs, files in os.walk(bundle_output_dir):
            for file in files:
                local_path = os.path.join(root, file)
                rel_path = os.path.relpath(local_path, bundle_output_dir)
                path_in_repo = rel_path.replace(os.sep, "/")

                api.upload_file(
                    path_or_fileobj=local_path,
                    path_in_repo=path_in_repo,
                    repo_id=hf_space_id,
                    repo_type="space",
                )
                uploaded_count += 1

        print(f"Successfully uploaded {uploaded_count} files")
        print(f"View at: https://huggingface.co/spaces/{hf_space_id}")


if __name__ == "__main__":
    # Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]
    if len(sys.argv) < 5:
        print("Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]")
        sys.exit(1)

    eval_script_url = sys.argv[1]
    task_name = sys.argv[2]
    model = sys.argv[3]
    hf_space_id = sys.argv[4]
    log_dir = sys.argv[5] if len(sys.argv) > 5 else "./logs"

    # Download eval script
    print(f"Downloading eval script from {eval_script_url}...")
    import urllib.request
    with urllib.request.urlopen(eval_script_url) as response:
        eval_code = response.read().decode('utf-8')

    # Write eval code to a temporary file and import it
    print("Loading eval...")
    import tempfile
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(eval_code)
        temp_eval_file = f.name

    try:
        import importlib.util
        spec = importlib.util.spec_from_file_location("user_eval", temp_eval_file)
        user_eval_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(user_eval_module)
    finally:
        # Clean up temp file
        os.unlink(temp_eval_file)

    # Get the task function
    if task_name == "auto":
        # Auto-detect: find all functions decorated with @task
        tasks = [name for name in dir(user_eval_module)
                 if not name.startswith('_') and callable(getattr(user_eval_module, name))]
        # Filter to likely task functions (heuristic: decorated tasks)
        task_candidates = [t for t in tasks if not t.startswith('record_to_')]

        if len(task_candidates) == 0:
            print(f"Error: No tasks found in {eval_script}")
            sys.exit(1)
        elif len(task_candidates) > 1:
            print(f"Error: Multiple tasks found: {task_candidates}")
            print("Please specify --task <task_name>")
            sys.exit(1)

        task_name = task_candidates[0]
        print(f"Auto-detected task: {task_name}")

    if not hasattr(user_eval_module, task_name):
        print(f"Error: Task '{task_name}' not found in {eval_script}")
        available = [name for name in dir(user_eval_module) if not name.startswith('_')]
        print(f"Available: {available}")
        sys.exit(1)

    task_fn = getattr(user_eval_module, task_name)

    # Run evaluation
    print(f"Running eval: {task_name} with model {model}")
    eval(task_fn(), model=model, max_tokens=4096, log_dir=log_dir)

    # Upload logs if space specified
    if hf_space_id:
        print(f"Uploading logs to {hf_space_id}...")
        hf_token = os.getenv("HF_TOKEN")
        if not hf_token:
            print("Warning: HF_TOKEN not set, skipping upload")
        else:
            bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)
    else:
        print("No Space ID provided, logs remain in job")