arc-easy

Sleeping

App Files Files Community

dvilasuero commited on Nov 17, 2025

Commit

d376ca2

verified ·

1 Parent(s): 2aaca4e

Upload runner.py with huggingface_hub

Browse files

Files changed (1) hide show

runner.py +89 -0

runner.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "inspect-ai",
+#     "datasets",
+#     "openai",
+#     "transformers",
+#     "accelerate",
+#     "huggingface_hub",
+#     "inspect_evals",
+# ]
+# ///
+"""Runner that downloads an eval script and executes it using inspect CLI with HF filesystem logging."""
+import os
+import sys
+import subprocess
+import urllib.request
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print(
+            "Usage: eval_runner.py <eval_ref> <model> <dataset_repo> [--inspect-evals] [extra_args...]"
+        )
+        sys.exit(1)
+    eval_ref = sys.argv[1]
+    model = sys.argv[2]
+    dataset_repo = sys.argv[3]  # Changed from space_id to dataset_repo
+    # Check if this is an inspect_evals path
+    is_inspect_evals = "--inspect-evals" in sys.argv
+    extra_args = [arg for arg in sys.argv[4:] if arg != "--inspect-evals"]
+    # Construct log directory path for HF filesystem
+    if not dataset_repo.startswith("datasets/"):
+        dataset_repo = f"datasets/{dataset_repo}"
+    log_dir = f"hf://{dataset_repo}/logs"
+    if is_inspect_evals:
+        # Use inspect_evals path directly
+        print(f"Using inspect_evals: {eval_ref}")
+        eval_target = eval_ref
+        cleanup_file = None
+    else:
+        # Download custom eval script
+        print(f"Downloading eval from {eval_ref}...")
+        with urllib.request.urlopen(eval_ref) as response:
+            eval_code = response.read().decode("utf-8")
+        eval_filename = "downloaded_eval.py"
+        with open(eval_filename, "w") as f:
+            f.write(eval_code)
+        eval_target = eval_filename
+        cleanup_file = eval_filename
+    try:
+        print(f"Running inspect eval with model {model}...")
+        print(f"Logs will be written to: {log_dir}")
+        # Build command with HF filesystem logging parameters
+        cmd = [
+            "inspect",
+            "eval",
+            eval_target,
+            "--model",
+            model,
+            "--log-dir",
+            log_dir,
+            "--log-shared",  # Enable shared logging for remote filesystems
+            "--log-buffer",
+            "100",  # Buffer size for stable ZIP files
+        ]
+        cmd.extend(extra_args)
+        print(f"Command: {' '.join(cmd)}")
+        subprocess.run(cmd, check=True)
+        print(f"\n✓ Eval completed!")
+        print(
+            f"Logs are available at: https://huggingface.co/{dataset_repo}/tree/main/logs"
+        )
+    finally:
+        if cleanup_file and os.path.exists(cleanup_file):
+            os.unlink(cleanup_file)