File size: 5,217 Bytes
316475b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "inspect-ai",
#     "datasets",
#     "openai",
#     "transformers",
#     "accelerate",
#     "huggingface_hub",
# ]
# ///
"""
Wrapper script that runs an arbitrary Inspect eval and uploads logs to a Space.
This script is meant to be run on HF Jobs.
"""
import os
import sys
import tempfile
from pathlib import Path
from huggingface_hub import HfApi
from inspect_ai import eval
from inspect_ai.log import bundle_log_dir


def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
    """Bundle logs and upload to HF Space."""
    log_path = Path(log_dir)
    if not log_path.exists():
        raise ValueError(f"Log directory '{log_dir}' does not exist")

    with tempfile.TemporaryDirectory() as temp_bundle_dir:
        bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")

        print(f"Bundling logs from {log_dir}...")
        bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)
        print("Bundle created successfully")

        api = HfApi(token=hf_token)

        # Create Space if it doesn't exist
        try:
            api.create_repo(
                repo_id=hf_space_id,
                repo_type="space",
                exist_ok=True,
                space_sdk="static",
            )
            print(f"Space {hf_space_id} is ready")
        except Exception as e:
            print(f"Warning: Could not create/verify Space: {e}")

        # Upload all files
        print(f"Uploading bundle to Space {hf_space_id}...")
        uploaded_count = 0
        for root, dirs, files in os.walk(bundle_output_dir):
            for file in files:
                local_path = os.path.join(root, file)
                rel_path = os.path.relpath(local_path, bundle_output_dir)
                path_in_repo = rel_path.replace(os.sep, "/")

                api.upload_file(
                    path_or_fileobj=local_path,
                    path_in_repo=path_in_repo,
                    repo_id=hf_space_id,
                    repo_type="space",
                )
                uploaded_count += 1

        print(f"Successfully uploaded {uploaded_count} files")
        print(f"View at: https://huggingface.co/spaces/{hf_space_id}")


if __name__ == "__main__":
    # Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]
    if len(sys.argv) < 5:
        print("Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]")
        sys.exit(1)

    eval_script_url = sys.argv[1]
    task_name = sys.argv[2]
    model = sys.argv[3]
    hf_space_id = sys.argv[4]
    log_dir = sys.argv[5] if len(sys.argv) > 5 else "./logs"

    # Download eval script
    print(f"Downloading eval script from {eval_script_url}...")
    import urllib.request
    with urllib.request.urlopen(eval_script_url) as response:
        eval_code = response.read().decode('utf-8')

    # Write eval code to a temporary file and import it
    print("Loading eval...")
    import tempfile
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(eval_code)
        temp_eval_file = f.name

    try:
        import importlib.util
        spec = importlib.util.spec_from_file_location("user_eval", temp_eval_file)
        user_eval_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(user_eval_module)
    finally:
        # Clean up temp file
        os.unlink(temp_eval_file)

    # Get the task function
    if task_name == "auto":
        # Auto-detect: find all functions decorated with @task
        tasks = [name for name in dir(user_eval_module)
                 if not name.startswith('_') and callable(getattr(user_eval_module, name))]
        # Filter to likely task functions (heuristic: decorated tasks)
        task_candidates = [t for t in tasks if not t.startswith('record_to_')]

        if len(task_candidates) == 0:
            print(f"Error: No tasks found in {eval_script}")
            sys.exit(1)
        elif len(task_candidates) > 1:
            print(f"Error: Multiple tasks found: {task_candidates}")
            print("Please specify --task <task_name>")
            sys.exit(1)

        task_name = task_candidates[0]
        print(f"Auto-detected task: {task_name}")

    if not hasattr(user_eval_module, task_name):
        print(f"Error: Task '{task_name}' not found in {eval_script}")
        available = [name for name in dir(user_eval_module) if not name.startswith('_')]
        print(f"Available: {available}")
        sys.exit(1)

    task_fn = getattr(user_eval_module, task_name)

    # Run evaluation
    print(f"Running eval: {task_name} with model {model}")
    eval(task_fn(), model=model, max_tokens=4096, log_dir=log_dir)

    # Upload logs if space specified
    if hf_space_id:
        print(f"Uploading logs to {hf_space_id}...")
        hf_token = os.getenv("HF_TOKEN")
        if not hf_token:
            print("Warning: HF_TOKEN not set, skipping upload")
        else:
            bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)
    else:
        print("No Space ID provided, logs remain in job")