Spaces:
Runtime error
Runtime error
initial commit: Dockerfile + eval_job.py + README
Browse files- .gitignore +5 -0
- Dockerfile +74 -0
- README.md +39 -6
- eval_job.py +245 -0
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.DS_Store
|
| 4 |
+
.venv/
|
| 5 |
+
.env
|
Dockerfile
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax=docker/dockerfile:1.7
|
| 2 |
+
#
|
| 3 |
+
# HF Space at HuggingAI4Engineering/cadgenbench-eval-gpu.
|
| 4 |
+
# Provides the Docker image consumed by the leaderboard's HF Jobs
|
| 5 |
+
# eval pipeline (see space-setup/jobs-migration.md). The Space
|
| 6 |
+
# itself is not run as a Gradio app; the image exists only to be
|
| 7 |
+
# pulled by `hf jobs run --image hf.co/spaces/...`. Pause the
|
| 8 |
+
# Space after the first successful build so no idle hardware cost
|
| 9 |
+
# accrues; the built image stays available to Jobs while paused.
|
| 10 |
+
#
|
| 11 |
+
# Local smoke test (slow on Apple Silicon under Rosetta):
|
| 12 |
+
#
|
| 13 |
+
# docker buildx build --platform linux/amd64 \
|
| 14 |
+
# -t cadgenbench-eval-gpu-test .
|
| 15 |
+
|
| 16 |
+
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
|
| 17 |
+
|
| 18 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 19 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 20 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
| 21 |
+
DEBIAN_FRONTEND=noninteractive
|
| 22 |
+
|
| 23 |
+
# Python 3.12 from deadsnakes (Ubuntu 22.04 ships 3.10 by default)
|
| 24 |
+
# plus the apt runtime deps shared with the leaderboard Dockerfile
|
| 25 |
+
# (OCP / build123d / Pillow / VTK). libegl1 + libegl-mesa0 provide
|
| 26 |
+
# the EGL surface vtk-egl binds to; on this CUDA-base image the
|
| 27 |
+
# NVIDIA driver supplies hardware OpenGL, no Mesa fallback path.
|
| 28 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 29 |
+
software-properties-common \
|
| 30 |
+
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
| 31 |
+
&& apt-get update && apt-get install -y --no-install-recommends \
|
| 32 |
+
python3.12 python3.12-venv python3.12-dev \
|
| 33 |
+
python3-pip \
|
| 34 |
+
git ca-certificates \
|
| 35 |
+
libglib2.0-0 libsm6 libxext6 libgomp1 libfontconfig1 \
|
| 36 |
+
libgl1 libegl1 libegl-mesa0 libxrender1 \
|
| 37 |
+
&& rm -rf /var/lib/apt/lists/* \
|
| 38 |
+
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python \
|
| 39 |
+
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python3
|
| 40 |
+
|
| 41 |
+
# cadgenbench from the Public GitHub repo, same convention and
|
| 42 |
+
# ARG name as the leaderboard Dockerfile. Bump CADGENBENCH_SHA in
|
| 43 |
+
# lockstep with cadgenbench releases.
|
| 44 |
+
ARG CADGENBENCH_SHA=b22a53c
|
| 45 |
+
RUN python -m pip install --no-cache-dir \
|
| 46 |
+
"cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
|
| 47 |
+
|
| 48 |
+
# The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
|
| 49 |
+
# vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
|
| 50 |
+
# same VTK, compiled against EGL so it acquires an off-screen GL
|
| 51 |
+
# context against the NVIDIA driver on this CUDA-base image.
|
| 52 |
+
# PyVista picks up whichever `vtk` dist is installed; no
|
| 53 |
+
# cadgenbench code change. Same shape as the leaderboard's
|
| 54 |
+
# vtk-osmesa swap, just the GPU counterpart.
|
| 55 |
+
RUN python -m pip uninstall -y vtk \
|
| 56 |
+
&& python -m pip install --no-cache-dir \
|
| 57 |
+
--extra-index-url https://wheels.vtk.org vtk-egl
|
| 58 |
+
|
| 59 |
+
# In-job entrypoint. Invoked by:
|
| 60 |
+
#
|
| 61 |
+
# hf jobs run --image hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu \
|
| 62 |
+
# --flavor a10g-large --secrets HF_TOKEN \
|
| 63 |
+
# python /opt/eval_job.py <submission_id> <zip_url>
|
| 64 |
+
COPY eval_job.py /opt/eval_job.py
|
| 65 |
+
|
| 66 |
+
# Drop privileges. HF Spaces conventionally run as uid 1000.
|
| 67 |
+
RUN useradd -m -u 1000 user
|
| 68 |
+
USER user
|
| 69 |
+
WORKDIR /home/user
|
| 70 |
+
|
| 71 |
+
# Idle CMD so the Space's runtime starts without restart-flapping.
|
| 72 |
+
# Pause the Space via HF UI or HfApi().pause_space() after the
|
| 73 |
+
# first green build; the cached image stays available to Jobs.
|
| 74 |
+
CMD ["sleep", "infinity"]
|
README.md
CHANGED
|
@@ -1,11 +1,44 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
-
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: CADGenBench eval (GPU)
|
| 3 |
+
colorFrom: gray
|
| 4 |
+
colorTo: gray
|
|
|
|
| 5 |
sdk: docker
|
| 6 |
pinned: false
|
| 7 |
+
license: apache-2.0
|
| 8 |
+
short_description: GPU image for the CADGenBench eval HF Jobs pipeline.
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# cadgenbench-eval-gpu
|
| 12 |
+
|
| 13 |
+
Image-only Docker Space. Provides the GPU container that the
|
| 14 |
+
[CADGenBench leaderboard
|
| 15 |
+
Space](https://huggingface.co/spaces/HuggingAI4Engineering/cadgenbench-leaderboard)
|
| 16 |
+
pulls via `hf jobs run` to run the eval pipeline (alignment + render +
|
| 17 |
+
metrics) for each submission.
|
| 18 |
+
|
| 19 |
+
Not intended to be run as a Gradio / web app. The `CMD ["sleep",
|
| 20 |
+
"infinity"]` only exists so the Space runtime starts without
|
| 21 |
+
restart-flapping after the build; pause the Space after the first
|
| 22 |
+
green build to avoid idle hardware cost. The built image stays cached
|
| 23 |
+
on HF and remains pullable by Jobs while paused.
|
| 24 |
+
|
| 25 |
+
Design + integration details:
|
| 26 |
+
[`space-setup/jobs-migration.md`](https://github.com/huggingface/cadgenbench)
|
| 27 |
+
(in the umbrella `cadgenbench` working tree). The leaderboard Space's
|
| 28 |
+
worker dispatches `python /opt/eval_job.py <submission_id> <zip_url>`
|
| 29 |
+
against this image on `a10g-large` and polls for completion.
|
| 30 |
+
|
| 31 |
+
## Image contents
|
| 32 |
+
|
| 33 |
+
- `nvidia/cuda:12.4.1-runtime-ubuntu22.04` base.
|
| 34 |
+
- Python 3.12 via deadsnakes.
|
| 35 |
+
- Apt runtime deps for OCP / build123d / VTK (shared with the
|
| 36 |
+
leaderboard Dockerfile) plus `libegl1 libegl-mesa0` for the EGL
|
| 37 |
+
context.
|
| 38 |
+
- `cadgenbench @ git+https://github.com/huggingface/cadgenbench@<sha>`,
|
| 39 |
+
pinned via `ARG CADGENBENCH_SHA`.
|
| 40 |
+
- `vtk-egl` swapped in for the PyPI `vtk` wheel (same swap shape as
|
| 41 |
+
the leaderboard's `vtk-osmesa`; the GPU counterpart). PyVista
|
| 42 |
+
picks up whichever `vtk` dist is installed; no cadgenbench code
|
| 43 |
+
change needed.
|
| 44 |
+
- `/opt/eval_job.py` entrypoint script.
|
eval_job.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""In-job entrypoint for the CADGenBench eval on HF Jobs.
|
| 2 |
+
|
| 3 |
+
Invoked by the leaderboard Space's worker (see
|
| 4 |
+
``AI4Engineering/submit.py``) via::
|
| 5 |
+
|
| 6 |
+
hf jobs run --image hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu \\
|
| 7 |
+
--flavor a10g-large \\
|
| 8 |
+
--env CADGENBENCH_DATA_REPO=HuggingAI4Engineering/cadgenbench-data \\
|
| 9 |
+
--env CADGENBENCH_DATA_GT_REPO=HuggingAI4Engineering/cadgenbench-data-gt \\
|
| 10 |
+
--env HF_SUBMISSIONS_REPO=HuggingAI4Engineering/cadgenbench-submissions \\
|
| 11 |
+
--env EVAL_WORKER_COUNT=8 \\
|
| 12 |
+
--secrets HF_TOKEN \\
|
| 13 |
+
python /opt/eval_job.py <submission_id> <zip_url>
|
| 14 |
+
|
| 15 |
+
Pipeline, in order. Synchronous, no fallbacks. Any failure raises
|
| 16 |
+
and the container exits non-zero; the Space's poller catches the
|
| 17 |
+
ERROR stage and flips the submission row to ``failed``.
|
| 18 |
+
|
| 19 |
+
1. Download ``submissions/<id>.zip`` from the submissions dataset
|
| 20 |
+
via ``hf_hub_download`` (auth via ``HF_TOKEN``).
|
| 21 |
+
2. Unpack into ``/tmp/run/``.
|
| 22 |
+
3. ``cadgenbench evaluate /tmp/run --workers <n>`` (subprocess).
|
| 23 |
+
4. ``cadgenbench report single /tmp/run -o /tmp/<id>.html``
|
| 24 |
+
(subprocess).
|
| 25 |
+
5. Build ``report.json`` bundling ``run_summary.json`` + every
|
| 26 |
+
per-fixture ``result.json`` (mirror of submit.py's
|
| 27 |
+
``_build_report_json``).
|
| 28 |
+
6. Upload ``reports/<id>.html`` + ``reports/<id>.json`` back to the
|
| 29 |
+
submissions dataset via ``HfApi.upload_file``.
|
| 30 |
+
7. Exit 0.
|
| 31 |
+
|
| 32 |
+
The Space-side worker then downloads ``reports/<id>.json``, reads
|
| 33 |
+
``run_summary`` out of it, and flips the row to ``completed``.
|
| 34 |
+
"""
|
| 35 |
+
from __future__ import annotations
|
| 36 |
+
|
| 37 |
+
import argparse
|
| 38 |
+
import json
|
| 39 |
+
import os
|
| 40 |
+
import shutil
|
| 41 |
+
import subprocess
|
| 42 |
+
import sys
|
| 43 |
+
import zipfile
|
| 44 |
+
from pathlib import Path
|
| 45 |
+
from typing import Any
|
| 46 |
+
|
| 47 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
RUN_DIR = Path("/tmp/run")
|
| 51 |
+
REPORT_HTML_DIR = Path("/tmp")
|
| 52 |
+
|
| 53 |
+
EVAL_TIMEOUT_SECONDS = 30 * 60
|
| 54 |
+
REPORT_TIMEOUT_SECONDS = 5 * 60
|
| 55 |
+
|
| 56 |
+
REPORTS_DIR_IN_REPO = "reports"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def main() -> int:
|
| 60 |
+
parser = argparse.ArgumentParser(
|
| 61 |
+
description="Run the CADGenBench eval pipeline on an HF Job.",
|
| 62 |
+
)
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"submission_id",
|
| 65 |
+
help="Filesystem-safe slug minted by the Space's submit handler.",
|
| 66 |
+
)
|
| 67 |
+
parser.add_argument(
|
| 68 |
+
"zip_url",
|
| 69 |
+
help=(
|
| 70 |
+
"Canonical Hub blob URL of submissions/<id>.zip "
|
| 71 |
+
"(submission_blob_url from the row)."
|
| 72 |
+
),
|
| 73 |
+
)
|
| 74 |
+
args = parser.parse_args()
|
| 75 |
+
|
| 76 |
+
submission_id: str = args.submission_id
|
| 77 |
+
zip_url: str = args.zip_url
|
| 78 |
+
|
| 79 |
+
token = _require_env("HF_TOKEN")
|
| 80 |
+
submissions_repo = _require_env("HF_SUBMISSIONS_REPO")
|
| 81 |
+
worker_count = int(os.environ.get("EVAL_WORKER_COUNT", "8"))
|
| 82 |
+
|
| 83 |
+
print(
|
| 84 |
+
f"[eval_job] submission_id={submission_id} "
|
| 85 |
+
f"workers={worker_count} repo={submissions_repo}",
|
| 86 |
+
flush=True,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
_prepare_run_dir(submission_id, zip_url, submissions_repo, token)
|
| 90 |
+
_run_eval(RUN_DIR, worker_count)
|
| 91 |
+
html_path = REPORT_HTML_DIR / f"{submission_id}.html"
|
| 92 |
+
_run_report(RUN_DIR, html_path)
|
| 93 |
+
report_json = _build_report_json(RUN_DIR)
|
| 94 |
+
_upload_reports(
|
| 95 |
+
submission_id, html_path, report_json, submissions_repo, token,
|
| 96 |
+
)
|
| 97 |
+
print(f"[eval_job] done: {submission_id}", flush=True)
|
| 98 |
+
return 0
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _require_env(name: str) -> str:
|
| 102 |
+
"""Return env var *name* or raise with a clear message."""
|
| 103 |
+
value = os.environ.get(name)
|
| 104 |
+
if not value:
|
| 105 |
+
raise RuntimeError(
|
| 106 |
+
f"Required environment variable {name!r} is unset or empty."
|
| 107 |
+
)
|
| 108 |
+
return value
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _prepare_run_dir(
|
| 112 |
+
submission_id: str,
|
| 113 |
+
zip_url: str,
|
| 114 |
+
submissions_repo: str,
|
| 115 |
+
token: str,
|
| 116 |
+
) -> None:
|
| 117 |
+
"""Download the submission zip and unpack into ``RUN_DIR``.
|
| 118 |
+
|
| 119 |
+
Derives the in-repo path from *zip_url* and pulls via
|
| 120 |
+
``hf_hub_download`` so token auth is handled and the file lands
|
| 121 |
+
in the Hub cache. *zip_url* is expected to look like
|
| 122 |
+
``https://huggingface.co/datasets/<repo>/resolve/main/submissions/<id>.zip``;
|
| 123 |
+
we accept any URL shape that ends in ``submissions/<id>.zip`` and
|
| 124 |
+
re-derive the in-repo filename from the *submission_id*.
|
| 125 |
+
"""
|
| 126 |
+
if RUN_DIR.exists():
|
| 127 |
+
shutil.rmtree(RUN_DIR)
|
| 128 |
+
RUN_DIR.mkdir(parents=True)
|
| 129 |
+
|
| 130 |
+
in_repo_path = f"submissions/{submission_id}.zip"
|
| 131 |
+
print(
|
| 132 |
+
f"[eval_job] downloading {submissions_repo}:{in_repo_path}",
|
| 133 |
+
flush=True,
|
| 134 |
+
)
|
| 135 |
+
local_zip = hf_hub_download(
|
| 136 |
+
repo_id=submissions_repo,
|
| 137 |
+
filename=in_repo_path,
|
| 138 |
+
repo_type="dataset",
|
| 139 |
+
token=token,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Defensive: matches the validated shape from submit.py's
|
| 143 |
+
# _extract_zip, but the Space already gate-checked the zip
|
| 144 |
+
# contents pre-upload so we extract directly without re-
|
| 145 |
+
# validating zip-slip / symlinks here.
|
| 146 |
+
with zipfile.ZipFile(local_zip) as zf:
|
| 147 |
+
zf.extractall(RUN_DIR)
|
| 148 |
+
print(f"[eval_job] unpacked into {RUN_DIR}", flush=True)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _run_eval(run_dir: Path, workers: int) -> None:
|
| 152 |
+
"""Invoke ``cadgenbench evaluate`` over *run_dir*; raise on non-zero."""
|
| 153 |
+
cmd = [
|
| 154 |
+
sys.executable, "-m", "cadgenbench.cli", "evaluate", str(run_dir),
|
| 155 |
+
"--workers", str(workers),
|
| 156 |
+
]
|
| 157 |
+
print(f"[eval_job] {' '.join(cmd)}", flush=True)
|
| 158 |
+
proc = subprocess.run(
|
| 159 |
+
cmd,
|
| 160 |
+
timeout=EVAL_TIMEOUT_SECONDS,
|
| 161 |
+
env=os.environ.copy(),
|
| 162 |
+
check=False,
|
| 163 |
+
)
|
| 164 |
+
if proc.returncode != 0:
|
| 165 |
+
raise RuntimeError(
|
| 166 |
+
f"cadgenbench evaluate exited {proc.returncode}"
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _run_report(run_dir: Path, html_out: Path) -> None:
|
| 171 |
+
"""Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero."""
|
| 172 |
+
cmd = [
|
| 173 |
+
sys.executable, "-m", "cadgenbench.cli", "report", "single",
|
| 174 |
+
str(run_dir), "-o", str(html_out),
|
| 175 |
+
]
|
| 176 |
+
print(f"[eval_job] {' '.join(cmd)}", flush=True)
|
| 177 |
+
proc = subprocess.run(
|
| 178 |
+
cmd,
|
| 179 |
+
timeout=REPORT_TIMEOUT_SECONDS,
|
| 180 |
+
env=os.environ.copy(),
|
| 181 |
+
check=False,
|
| 182 |
+
)
|
| 183 |
+
if proc.returncode != 0 or not html_out.is_file():
|
| 184 |
+
raise RuntimeError(
|
| 185 |
+
f"cadgenbench report single exited {proc.returncode} "
|
| 186 |
+
f"(html exists={html_out.is_file()})"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _build_report_json(run_dir: Path) -> dict[str, Any]:
|
| 191 |
+
"""Bundle ``run_summary.json`` + every per-fixture ``result.json``.
|
| 192 |
+
|
| 193 |
+
Identical shape to submit.py's ``_build_report_json``: the
|
| 194 |
+
Space-side worker reads ``report.json`` after the Job completes
|
| 195 |
+
and pulls ``run_summary`` out of it to flip the row.
|
| 196 |
+
"""
|
| 197 |
+
summary_path = run_dir / "run_summary.json"
|
| 198 |
+
if not summary_path.is_file():
|
| 199 |
+
raise RuntimeError(
|
| 200 |
+
f"run_summary.json not produced under {run_dir} (eval issue?)"
|
| 201 |
+
)
|
| 202 |
+
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
| 203 |
+
per_fixture: dict[str, dict[str, Any]] = {}
|
| 204 |
+
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
| 205 |
+
rp = fixture_dir / "result.json"
|
| 206 |
+
if rp.is_file():
|
| 207 |
+
per_fixture[fixture_dir.name] = json.loads(
|
| 208 |
+
rp.read_text(encoding="utf-8")
|
| 209 |
+
)
|
| 210 |
+
return {"run_summary": summary, "per_fixture_results": per_fixture}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def _upload_reports(
|
| 214 |
+
submission_id: str,
|
| 215 |
+
html_path: Path,
|
| 216 |
+
report_json: dict[str, Any],
|
| 217 |
+
submissions_repo: str,
|
| 218 |
+
token: str,
|
| 219 |
+
) -> None:
|
| 220 |
+
"""Upload ``reports/<id>.html`` + ``reports/<id>.json`` to the Hub."""
|
| 221 |
+
api = HfApi(token=token)
|
| 222 |
+
api.upload_file(
|
| 223 |
+
path_or_fileobj=str(html_path),
|
| 224 |
+
path_in_repo=f"{REPORTS_DIR_IN_REPO}/{submission_id}.html",
|
| 225 |
+
repo_id=submissions_repo,
|
| 226 |
+
repo_type="dataset",
|
| 227 |
+
commit_message=f"add HTML report for {submission_id}",
|
| 228 |
+
)
|
| 229 |
+
api.upload_file(
|
| 230 |
+
path_or_fileobj=json.dumps(
|
| 231 |
+
report_json, ensure_ascii=False, indent=2,
|
| 232 |
+
).encode("utf-8"),
|
| 233 |
+
path_in_repo=f"{REPORTS_DIR_IN_REPO}/{submission_id}.json",
|
| 234 |
+
repo_id=submissions_repo,
|
| 235 |
+
repo_type="dataset",
|
| 236 |
+
commit_message=f"add JSON report for {submission_id}",
|
| 237 |
+
)
|
| 238 |
+
print(
|
| 239 |
+
f"[eval_job] uploaded reports/{submission_id}.{{html,json}}",
|
| 240 |
+
flush=True,
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
if __name__ == "__main__":
|
| 245 |
+
sys.exit(main())
|