Spaces:

dvilasuero
/

inspect_evals_mask

Running

App Files Files Community

dvilasuero commited on Nov 10, 2025

Commit

3befd84

verified ·

1 Parent(s): 61f9680

Upload runner.py with huggingface_hub

Browse files

Files changed (1) hide show

runner.py +113 -0

runner.py ADDED Viewed

	@@ -0,0 +1,113 @@

+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "inspect-ai",
+#     "datasets",
+#     "openai",
+#     "transformers",
+#     "accelerate",
+#     "huggingface_hub",
+#     "inspect_evals",
+# ]
+# ///
+"""Runner that downloads an eval script and executes it using inspect CLI."""
+import os
+import sys
+import subprocess
+import tempfile
+import urllib.request
+from pathlib import Path
+from huggingface_hub import HfApi
+from inspect_ai.log import bundle_log_dir
+def bundle_and_upload_to_space(log_dir: str, hf_space_id: str, hf_token: str):
+    """Bundle logs and upload to HF Space."""
+    log_path = Path(log_dir)
+    if not log_path.exists():
+        print(f"Warning: Log directory '{log_dir}' does not exist")
+        return
+    with tempfile.TemporaryDirectory() as temp_bundle_dir:
+        bundle_output_dir = os.path.join(temp_bundle_dir, "bundle")
+        print(f"Bundling logs from {log_dir}...")
+        bundle_log_dir(log_dir=log_dir, output_dir=bundle_output_dir, overwrite=True)
+        api = HfApi(token=hf_token)
+        print(f"Uploading to Space {hf_space_id}...")
+        uploaded_count = 0
+        for root, _, files in os.walk(bundle_output_dir):
+            for file in files:
+                local_path = os.path.join(root, file)
+                rel_path = os.path.relpath(local_path, bundle_output_dir)
+                path_in_repo = rel_path.replace(os.sep, "/")
+                api.upload_file(
+                    path_or_fileobj=local_path,
+                    path_in_repo=path_in_repo,
+                    repo_id=hf_space_id,
+                    repo_type="space",
+                )
+                uploaded_count += 1
+        print(f"Uploaded {uploaded_count} files")
+        print(f"View at: https://huggingface.co/spaces/{hf_space_id}")
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print("Usage: eval_runner.py <eval_ref> <model> <space_id> [log_dir] [--inspect-evals] [extra_args...]")
+        sys.exit(1)
+    eval_ref = sys.argv[1]
+    model = sys.argv[2]
+    hf_space_id = sys.argv[3]
+    log_dir = sys.argv[4] if len(sys.argv) > 4 and not sys.argv[4].startswith("--") else "./logs"
+    # Check if this is an inspect_evals path
+    is_inspect_evals = "--inspect-evals" in sys.argv
+    extra_args = [arg for arg in sys.argv[5:] if arg != "--inspect-evals"]
+    if is_inspect_evals:
+        # Use inspect_evals path directly
+        print(f"Using inspect_evals: {eval_ref}")
+        eval_target = eval_ref
+        cleanup_file = None
+    else:
+        # Download custom eval script
+        print(f"Downloading eval from {eval_ref}...")
+        with urllib.request.urlopen(eval_ref) as response:
+            eval_code = response.read().decode('utf-8')
+        eval_filename = "downloaded_eval.py"
+        with open(eval_filename, 'w') as f:
+            f.write(eval_code)
+        eval_target = eval_filename
+        cleanup_file = eval_filename
+    try:
+        print(f"Running inspect eval with model {model}...")
+        cmd = [
+            "inspect", "eval", eval_target,
+            "--model", model,
+            "--log-dir", log_dir,
+        ]
+        cmd.extend(extra_args)
+        print(f"Command: {' '.join(cmd)}")
+        subprocess.run(cmd, check=True)
+        print(f"\nUploading logs to {hf_space_id}...")
+        hf_token = os.getenv("HF_TOKEN")
+        if not hf_token:
+            print("Warning: HF_TOKEN not set, skipping upload")
+        else:
+            bundle_and_upload_to_space(log_dir, hf_space_id, hf_token)
+    finally:
+        if cleanup_file and os.path.exists(cleanup_file):
+            os.unlink(cleanup_file)