Spaces:
Running
Running
File size: 5,217 Bytes
316475b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "inspect-ai",
# "datasets",
# "openai",
# "transformers",
# "accelerate",
# "huggingface_hub",
# ]
# ///
"""
Wrapper script that runs an arbitrary Inspect eval and uploads logs to a Space.
This script is meant to be run on HF Jobs.
"""
import os
import sys
import tempfile
from pathlib import Path
from huggingface_hub import HfApi
from inspect_ai import eval
from inspect_ai.log import bundle_log_dir
def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
"""Bundle logs and upload to HF Space."""
log_path = Path(log_dir)
if not log_path.exists():
raise ValueError(f"Log directory '{log_dir}' does not exist")
with tempfile.TemporaryDirectory() as temp_bundle_dir:
bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")
print(f"Bundling logs from {log_dir}...")
bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)
print("Bundle created successfully")
api = HfApi(token=hf_token)
# Create Space if it doesn't exist
try:
api.create_repo(
repo_id=hf_space_id,
repo_type="space",
exist_ok=True,
space_sdk="static",
)
print(f"Space {hf_space_id} is ready")
except Exception as e:
print(f"Warning: Could not create/verify Space: {e}")
# Upload all files
print(f"Uploading bundle to Space {hf_space_id}...")
uploaded_count = 0
for root, dirs, files in os.walk(bundle_output_dir):
for file in files:
local_path = os.path.join(root, file)
rel_path = os.path.relpath(local_path, bundle_output_dir)
path_in_repo = rel_path.replace(os.sep, "/")
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=path_in_repo,
repo_id=hf_space_id,
repo_type="space",
)
uploaded_count += 1
print(f"Successfully uploaded {uploaded_count} files")
print(f"View at: https://huggingface.co/spaces/{hf_space_id}")
if __name__ == "__main__":
# Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]
if len(sys.argv) < 5:
print("Usage: eval_runner.py <eval_script_url> <task_name> <model> <hf_space_id> [log_dir]")
sys.exit(1)
eval_script_url = sys.argv[1]
task_name = sys.argv[2]
model = sys.argv[3]
hf_space_id = sys.argv[4]
log_dir = sys.argv[5] if len(sys.argv) > 5 else "./logs"
# Download eval script
print(f"Downloading eval script from {eval_script_url}...")
import urllib.request
with urllib.request.urlopen(eval_script_url) as response:
eval_code = response.read().decode('utf-8')
# Write eval code to a temporary file and import it
print("Loading eval...")
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
f.write(eval_code)
temp_eval_file = f.name
try:
import importlib.util
spec = importlib.util.spec_from_file_location("user_eval", temp_eval_file)
user_eval_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(user_eval_module)
finally:
# Clean up temp file
os.unlink(temp_eval_file)
# Get the task function
if task_name == "auto":
# Auto-detect: find all functions decorated with @task
tasks = [name for name in dir(user_eval_module)
if not name.startswith('_') and callable(getattr(user_eval_module, name))]
# Filter to likely task functions (heuristic: decorated tasks)
task_candidates = [t for t in tasks if not t.startswith('record_to_')]
if len(task_candidates) == 0:
print(f"Error: No tasks found in {eval_script}")
sys.exit(1)
elif len(task_candidates) > 1:
print(f"Error: Multiple tasks found: {task_candidates}")
print("Please specify --task <task_name>")
sys.exit(1)
task_name = task_candidates[0]
print(f"Auto-detected task: {task_name}")
if not hasattr(user_eval_module, task_name):
print(f"Error: Task '{task_name}' not found in {eval_script}")
available = [name for name in dir(user_eval_module) if not name.startswith('_')]
print(f"Available: {available}")
sys.exit(1)
task_fn = getattr(user_eval_module, task_name)
# Run evaluation
print(f"Running eval: {task_name} with model {model}")
eval(task_fn(), model=model, max_tokens=4096, log_dir=log_dir)
# Upload logs if space specified
if hf_space_id:
print(f"Uploading logs to {hf_space_id}...")
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
print("Warning: HF_TOKEN not set, skipping upload")
else:
bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)
else:
print("No Space ID provided, logs remain in job")
|