midicaps-logs / eval_runner_v2.py
dvilasuero's picture
Upload eval_runner_v2.py with huggingface_hub
0ef5ea2 verified
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "inspect-ai",
# "datasets",
# "openai",
# "transformers",
# "accelerate",
# "huggingface_hub",
# ]
# ///
"""
Simple runner that downloads an eval script and runs it using inspect CLI.
"""
import os
import sys
import subprocess
import tempfile
import urllib.request
from pathlib import Path
from huggingface_hub import HfApi
from inspect_ai.log import bundle_log_dir
def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
"""Bundle logs and upload to HF Space."""
log_path = Path(log_dir)
if not log_path.exists():
print(f"Warning: Log directory '{log_dir}' does not exist, skipping upload")
return
with tempfile.TemporaryDirectory() as temp_bundle_dir:
bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")
print(f"Bundling logs from {log_dir}...")
bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)
api = HfApi(token=hf_token)
# Upload all files
print(f"Uploading bundle to Space {hf_space_id}...")
uploaded_count = 0
for root, dirs, files in os.walk(bundle_output_dir):
for file in files:
local_path = os.path.join(root, file)
rel_path = os.path.relpath(local_path, bundle_output_dir)
path_in_repo = rel_path.replace(os.sep, "/")
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=path_in_repo,
repo_id=hf_space_id,
repo_type="space",
)
uploaded_count += 1
print(f"Successfully uploaded {uploaded_count} files")
print(f"View at: https://huggingface.co/spaces/{hf_space_id}")
if __name__ == "__main__":
# Usage: eval_runner_v2.py <eval_script_url> <model> <hf_space_id> [log_dir] [extra_args...]
if len(sys.argv) < 4:
print("Usage: eval_runner_v2.py <eval_script_url> <model> <hf_space_id> [log_dir] [extra_args...]")
sys.exit(1)
eval_script_url = sys.argv[1]
model = sys.argv[2]
hf_space_id = sys.argv[3]
log_dir = sys.argv[4] if len(sys.argv) > 4 else "./logs"
extra_args = sys.argv[5:] if len(sys.argv) > 5 else []
# Download eval script
print(f"Downloading eval script from {eval_script_url}...")
with urllib.request.urlopen(eval_script_url) as response:
eval_code = response.read().decode('utf-8')
# Write eval code to a file in current directory (inspect needs relative paths)
eval_filename = "downloaded_eval.py"
with open(eval_filename, 'w') as f:
f.write(eval_code)
try:
# Run inspect eval with the downloaded script
print(f"Running inspect eval with model {model}...")
cmd = [
"inspect", "eval", eval_filename,
"--model", model,
"--log-dir", log_dir,
]
cmd.extend(extra_args)
print(f"Command: {' '.join(cmd)}")
result = subprocess.run(cmd, check=True)
# Upload logs to space
print(f"\nUploading logs to {hf_space_id}...")
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
print("Warning: HF_TOKEN not set, skipping log upload")
else:
bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)
finally:
# Clean up eval file
if os.path.exists(eval_filename):
os.unlink(eval_filename)