Michael Rabinovich Cursor commited on
Commit ·
07430f9
1
Parent(s): b224eee
submit: publish merged report + gallery in one commit
Browse filesReplace the per-file gallery-thumbnail uploads (one upload_file commit
per fixture, ~80+ for a full fan-out) with a single create_commit that
stages reports/<id>.{html,json} plus every renders/<id>/<fixture>.png at
once. Collapses finalize from ~86 commits to one publish commit, cutting
post-shard finalize from minutes to well under a minute and removing the
429 "concurrency queue" failures that stranded earlier submissions.
Co-authored-by: Cursor <cursoragent@cursor.com>
submit.py
CHANGED
|
@@ -105,6 +105,7 @@ import gradio as gr
|
|
| 105 |
from cadgenbench.common.paths import data_inputs_dir
|
| 106 |
from cadgenbench.common.validity import parse_step
|
| 107 |
from huggingface_hub import (
|
|
|
|
| 108 |
HfApi,
|
| 109 |
fetch_job_logs,
|
| 110 |
hf_hub_download,
|
|
@@ -1295,8 +1296,9 @@ def _merge_shards_and_publish(
|
|
| 1295 |
html_path = tmp / f"{submission_id}.html"
|
| 1296 |
html_path.write_text(html, encoding="utf-8")
|
| 1297 |
|
| 1298 |
-
|
| 1299 |
-
|
|
|
|
| 1300 |
return report_json["run_summary"]
|
| 1301 |
finally:
|
| 1302 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
@@ -1326,83 +1328,70 @@ def _build_report_json(run_dir: Path) -> dict[str, Any]:
|
|
| 1326 |
return {"run_summary": summary, "per_fixture_results": per_fixture}
|
| 1327 |
|
| 1328 |
|
| 1329 |
-
def
|
| 1330 |
submission_id: str,
|
| 1331 |
html_path: Path,
|
| 1332 |
report_json: dict[str, Any],
|
|
|
|
| 1333 |
) -> None:
|
| 1334 |
-
"""
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1339 |
"""
|
| 1340 |
-
|
| 1341 |
-
|
| 1342 |
-
path_or_fileobj=str(html_path),
|
| 1343 |
path_in_repo=f"{REPORTS_DIR}/{submission_id}.html",
|
| 1344 |
-
|
| 1345 |
-
repo_type="dataset",
|
| 1346 |
-
commit_message=f"add merged HTML report for {submission_id}",
|
| 1347 |
),
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
-
_with_hub_retries(
|
| 1351 |
-
lambda: _HF_API.upload_file(
|
| 1352 |
path_or_fileobj=json.dumps(
|
| 1353 |
report_json, ensure_ascii=False, indent=2,
|
| 1354 |
).encode("utf-8"),
|
| 1355 |
-
path_in_repo=f"{REPORTS_DIR}/{submission_id}.json",
|
| 1356 |
-
repo_id=HF_SUBMISSIONS_REPO,
|
| 1357 |
-
repo_type="dataset",
|
| 1358 |
-
commit_message=f"add merged JSON report for {submission_id}",
|
| 1359 |
),
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
logger.info("Uploaded merged reports/%s.{html,json}", submission_id)
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
def _upload_gallery_renders_from_dir(
|
| 1366 |
-
submission_id: str,
|
| 1367 |
-
run_dir: Path,
|
| 1368 |
-
) -> None:
|
| 1369 |
-
"""Stage one ``iso`` thumbnail per fixture for the leaderboard gallery.
|
| 1370 |
-
|
| 1371 |
-
Mirrors ``eval_job.py``'s ``_upload_gallery_renders`` but reads from
|
| 1372 |
-
the merged run dir: every ``<run_dir>/<fixture>/renders/iso.png``
|
| 1373 |
-
becomes ``renders/<id>/<fixture>.png``. A fixture with no ``iso.png``
|
| 1374 |
-
(missing output / render that never ran) is skipped, matching the
|
| 1375 |
-
single-job behaviour; the gallery draws the dashed "invalid" cell
|
| 1376 |
-
from the row, so an absent thumbnail is not an error.
|
| 1377 |
-
"""
|
| 1378 |
-
staged: list[tuple[Path, str]] = []
|
| 1379 |
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
| 1380 |
iso_png = fixture_dir / "renders" / f"{GALLERY_THUMB_VIEW}.png"
|
| 1381 |
if iso_png.is_file():
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
logger.info("No gallery renders to upload for %s", submission_id)
|
| 1385 |
-
return
|
| 1386 |
-
for iso_png, fixture_name in staged:
|
| 1387 |
-
_with_hub_retries(
|
| 1388 |
-
lambda iso_png=iso_png, fixture_name=fixture_name: (
|
| 1389 |
-
_HF_API.upload_file(
|
| 1390 |
-
path_or_fileobj=str(iso_png),
|
| 1391 |
path_in_repo=(
|
| 1392 |
-
f"{RENDERS_DIR}/{submission_id}/{
|
| 1393 |
-
),
|
| 1394 |
-
repo_id=HF_SUBMISSIONS_REPO,
|
| 1395 |
-
repo_type="dataset",
|
| 1396 |
-
commit_message=(
|
| 1397 |
-
f"add gallery render {fixture_name} for {submission_id}"
|
| 1398 |
),
|
|
|
|
| 1399 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1400 |
),
|
| 1401 |
-
|
| 1402 |
-
|
|
|
|
| 1403 |
logger.info(
|
| 1404 |
-
"
|
| 1405 |
-
|
| 1406 |
)
|
| 1407 |
|
| 1408 |
|
|
|
|
| 105 |
from cadgenbench.common.paths import data_inputs_dir
|
| 106 |
from cadgenbench.common.validity import parse_step
|
| 107 |
from huggingface_hub import (
|
| 108 |
+
CommitOperationAdd,
|
| 109 |
HfApi,
|
| 110 |
fetch_job_logs,
|
| 111 |
hf_hub_download,
|
|
|
|
| 1296 |
html_path = tmp / f"{submission_id}.html"
|
| 1297 |
html_path.write_text(html, encoding="utf-8")
|
| 1298 |
|
| 1299 |
+
_publish_reports_and_gallery(
|
| 1300 |
+
submission_id, html_path, report_json, merged_run,
|
| 1301 |
+
)
|
| 1302 |
return report_json["run_summary"]
|
| 1303 |
finally:
|
| 1304 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
|
|
| 1328 |
return {"run_summary": summary, "per_fixture_results": per_fixture}
|
| 1329 |
|
| 1330 |
|
| 1331 |
+
def _publish_reports_and_gallery(
|
| 1332 |
submission_id: str,
|
| 1333 |
html_path: Path,
|
| 1334 |
report_json: dict[str, Any],
|
| 1335 |
+
run_dir: Path,
|
| 1336 |
) -> None:
|
| 1337 |
+
"""Publish report HTML/JSON + every gallery thumbnail in one commit.
|
| 1338 |
+
|
| 1339 |
+
The merged artifacts land at the exact paths the leaderboard + the
|
| 1340 |
+
row-flip expect: ``reports/<id>.{html,json}`` plus one
|
| 1341 |
+
``renders/<id>/<fixture>.png`` per fixture that produced an ``iso``
|
| 1342 |
+
thumbnail. All of it goes in a single ``create_commit`` rather than
|
| 1343 |
+
one ``upload_file`` per file: a fan-out can stage ~80+ thumbnails,
|
| 1344 |
+
and a commit-per-file both serialises the publish (slow) and hammers
|
| 1345 |
+
the dataset's commit endpoint (the 429 "concurrency queue" failures
|
| 1346 |
+
that stranded earlier runs). One commit is atomic, fast, and
|
| 1347 |
+
rate-limit friendly.
|
| 1348 |
+
|
| 1349 |
+
A fixture with no ``iso.png`` (missing output / render that never
|
| 1350 |
+
ran) is skipped, matching the single-job behaviour; the gallery
|
| 1351 |
+
draws the dashed "invalid" cell from the row, so an absent thumbnail
|
| 1352 |
+
is not an error.
|
| 1353 |
"""
|
| 1354 |
+
operations: list[CommitOperationAdd] = [
|
| 1355 |
+
CommitOperationAdd(
|
|
|
|
| 1356 |
path_in_repo=f"{REPORTS_DIR}/{submission_id}.html",
|
| 1357 |
+
path_or_fileobj=str(html_path),
|
|
|
|
|
|
|
| 1358 |
),
|
| 1359 |
+
CommitOperationAdd(
|
| 1360 |
+
path_in_repo=f"{REPORTS_DIR}/{submission_id}.json",
|
|
|
|
|
|
|
| 1361 |
path_or_fileobj=json.dumps(
|
| 1362 |
report_json, ensure_ascii=False, indent=2,
|
| 1363 |
).encode("utf-8"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1364 |
),
|
| 1365 |
+
]
|
| 1366 |
+
render_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1367 |
for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
|
| 1368 |
iso_png = fixture_dir / "renders" / f"{GALLERY_THUMB_VIEW}.png"
|
| 1369 |
if iso_png.is_file():
|
| 1370 |
+
operations.append(
|
| 1371 |
+
CommitOperationAdd(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
path_in_repo=(
|
| 1373 |
+
f"{RENDERS_DIR}/{submission_id}/{fixture_dir.name}.png"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1374 |
),
|
| 1375 |
+
path_or_fileobj=str(iso_png),
|
| 1376 |
)
|
| 1377 |
+
)
|
| 1378 |
+
render_count += 1
|
| 1379 |
+
|
| 1380 |
+
_with_hub_retries(
|
| 1381 |
+
lambda: _HF_API.create_commit(
|
| 1382 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 1383 |
+
repo_type="dataset",
|
| 1384 |
+
operations=operations,
|
| 1385 |
+
commit_message=(
|
| 1386 |
+
f"publish merged report + {render_count} gallery render(s) "
|
| 1387 |
+
f"for {submission_id}"
|
| 1388 |
),
|
| 1389 |
+
),
|
| 1390 |
+
what="merged report+gallery publish",
|
| 1391 |
+
)
|
| 1392 |
logger.info(
|
| 1393 |
+
"Published reports/%s.{html,json} + %d gallery render(s) in one commit",
|
| 1394 |
+
submission_id, render_count,
|
| 1395 |
)
|
| 1396 |
|
| 1397 |
|