File size: 3,640 Bytes
de39720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "inspect-ai",
#     "datasets",
#     "openai",
#     "transformers",
#     "accelerate",
#     "huggingface_hub",
#     "inspect_evals",
# ]
# ///
"""Runner that downloads an eval script and executes it using inspect CLI."""
import os
import sys
import subprocess
import tempfile
import urllib.request
from pathlib import Path
from huggingface_hub import HfApi
from inspect_ai.log import bundle_log_dir


def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
    """Bundle logs and upload to HF Space."""
    log_path = Path(log_dir)
    if not log_path.exists():
        print(f"Warning: Log directory '{log_dir}' does not exist")
        return

    with tempfile.TemporaryDirectory() as temp_bundle_dir:
        bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")

        print(f"Bundling logs from {log_dir}...")
        bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)

        api = HfApi(token=hf_token)

        print(f"Uploading to Space {hf_space_id}...")
        uploaded_count = 0
        for root, _, files in os.walk(bundle_output_dir):
            for file in files:
                local_path = os.path.join(root, file)
                rel_path = os.path.relpath(local_path, bundle_output_dir)
                path_in_repo = rel_path.replace(os.sep, "/")

                api.upload_file(
                    path_or_fileobj=local_path,
                    path_in_repo=path_in_repo,
                    repo_id=hf_space_id,
                    repo_type="space",
                )
                uploaded_count += 1

        print(f"Uploaded {uploaded_count} files")
        print(f"View at: https://huggingface.co/spaces/{hf_space_id}")


if __name__ == "__main__":
    if len(sys.argv) < 4:
        print("Usage: eval_runner.py <eval_ref> <model> <space_id> [log_dir] [--inspect-evals] [extra_args...]")
        sys.exit(1)

    eval_ref = sys.argv[1]
    model = sys.argv[2]
    hf_space_id = sys.argv[3]
    log_dir = sys.argv[4] if len(sys.argv) > 4 and not sys.argv[4].startswith("--") else "./logs"

    # Check if this is an inspect_evals path
    is_inspect_evals = "--inspect-evals" in sys.argv
    extra_args = [arg for arg in sys.argv[5:] if arg != "--inspect-evals"]

    if is_inspect_evals:
        # Use inspect_evals path directly
        print(f"Using inspect_evals: {eval_ref}")
        eval_target = eval_ref
        cleanup_file = None
    else:
        # Download custom eval script
        print(f"Downloading eval from {eval_ref}...")
        with urllib.request.urlopen(eval_ref) as response:
            eval_code = response.read().decode('utf-8')

        eval_filename = "downloaded_eval.py"
        with open(eval_filename, 'w') as f:
            f.write(eval_code)

        eval_target = eval_filename
        cleanup_file = eval_filename

    try:
        print(f"Running inspect eval with model {model}...")
        cmd = [
            "inspect", "eval", eval_target,
            "--model", model,
            "--log-dir", log_dir,
        ]
        cmd.extend(extra_args)

        print(f"Command: {' '.join(cmd)}")
        subprocess.run(cmd, check=True)

        print(f"\nUploading logs to {hf_space_id}...")
        hf_token = os.getenv("HF_TOKEN")
        if not hf_token:
            print("Warning: HF_TOKEN not set, skipping upload")
        else:
            bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)

    finally:
        if cleanup_file and os.path.exists(cleanup_file):
            os.unlink(cleanup_file)