| """Unit tests for the submit-tab pending-row builder. |
| |
| C4 contract: ``_build_pending_row`` defaults the three Bundle 1+2 |
| schema fields (``validation_status="unvalidated"``, |
| ``validation_method=None``, ``hf_username=None``) and keeps the |
| existing metadata + sha256 fields intact. |
| |
| Hub I/O (``_resolve_data_revision`` reads from |
| ``HfApi.dataset_info``) is monkeypatched out, so the suite has zero |
| network traffic. |
| """ |
| from __future__ import annotations |
|
|
| import importlib.util |
| from pathlib import Path |
| from types import SimpleNamespace |
|
|
| import pytest |
|
|
| import submit |
|
|
|
|
| def _hub_http_error(status: int, headers: dict | None = None) -> submit.HfHubHTTPError: |
| """An ``HfHubHTTPError`` with a minimal response carrying *status*. |
| |
| Built without going through the real Hub: a ``SimpleNamespace`` |
| stands in for the httpx response so ``_with_hub_retries`` can read |
| ``response.status_code`` / ``response.headers``. Newer |
| ``huggingface_hub`` makes ``response`` a required keyword-only |
| constructor argument, so it's passed in directly (and re-assigned |
| afterwards for the older positional-optional signature too). |
| """ |
| response = SimpleNamespace( |
| status_code=status, headers=headers or {}, request=None, |
| ) |
| err = submit.HfHubHTTPError(f"HTTP {status}", response=response) |
| err.response = response |
| return err |
|
|
|
|
| def test_with_hub_retries_recovers_after_transient(monkeypatch): |
| """A 429 (then a 503) is retried; the eventual success is returned.""" |
| monkeypatch.setattr(submit.time, "sleep", lambda *_: None) |
| statuses = iter([429, 503]) |
| calls = {"n": 0} |
|
|
| def flaky(): |
| calls["n"] += 1 |
| status = next(statuses, None) |
| if status is not None: |
| raise _hub_http_error(status) |
| return "ok" |
|
|
| assert submit._with_hub_retries(flaky, what="test") == "ok" |
| assert calls["n"] == 3 |
|
|
|
|
| def test_with_hub_retries_reraises_non_retryable(monkeypatch): |
| """A 403 is not in the retry set, so it propagates on the first try.""" |
| monkeypatch.setattr(submit.time, "sleep", lambda *_: None) |
| calls = {"n": 0} |
|
|
| def forbidden(): |
| calls["n"] += 1 |
| raise _hub_http_error(403) |
|
|
| with pytest.raises(submit.HfHubHTTPError): |
| submit._with_hub_retries(forbidden, what="test") |
| assert calls["n"] == 1 |
|
|
|
|
| def test_with_hub_retries_gives_up_after_wall_cap(monkeypatch): |
| """Past the wall cap, a persistent 429 stops being retried and raises.""" |
| monkeypatch.setattr(submit.time, "sleep", lambda *_: None) |
| |
| |
| ticks = iter([0.0, submit.HUB_RETRY_MAX_SECONDS + 1]) |
| monkeypatch.setattr( |
| submit.time, "monotonic", lambda: next(ticks, submit.HUB_RETRY_MAX_SECONDS + 1) |
| ) |
| calls = {"n": 0} |
|
|
| def always_429(): |
| calls["n"] += 1 |
| raise _hub_http_error(429) |
|
|
| with pytest.raises(submit.HfHubHTTPError): |
| submit._with_hub_retries(always_429, what="test") |
| assert calls["n"] == 1 |
|
|
|
|
| def test_retry_after_header_is_honored(monkeypatch): |
| """A ``Retry-After`` seconds value sets the floor for the sleep delay.""" |
| slept: list[float] = [] |
| monkeypatch.setattr(submit.time, "sleep", lambda d: slept.append(d)) |
| calls = {"n": 0} |
|
|
| def flaky(): |
| calls["n"] += 1 |
| if calls["n"] == 1: |
| raise _hub_http_error(429, headers={"Retry-After": "7"}) |
| return "ok" |
|
|
| assert submit._with_hub_retries(flaky, what="test") == "ok" |
| assert slept and slept[0] >= 7.0 |
|
|
|
|
| def test_dispatch_shard_passes_bucket_env(monkeypatch): |
| """Bucket-configured shard jobs get the bucket env, no volume mount.""" |
| captured: dict = {} |
|
|
| def fake_run_job(**kwargs): |
| captured.update(kwargs) |
| return SimpleNamespace(id="job-123") |
|
|
| monkeypatch.setenv("HF_TOKEN", "hf_test") |
| monkeypatch.setattr( |
| submit, "SHARD_BUCKET", |
| "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", |
| ) |
| monkeypatch.setattr(submit, "SHARD_BUCKET_PREFIX", "submissions") |
| monkeypatch.setattr(submit, "run_job", fake_run_job) |
|
|
| job_id = submit._dispatch_eval_command( |
| "sub-1", "https://example.test/sub-1.zip", |
| ["--shard-id", "shard_000", "--fixtures", "101,102"], |
| ) |
|
|
| assert job_id == "job-123" |
| assert captured["env"]["CADGENBENCH_SHARD_BUCKET"] == ( |
| "HuggingAI4Engineering/cadgenbench-eval-staging" |
| ) |
| assert captured["env"]["CADGENBENCH_SHARD_BUCKET_PREFIX"] == "submissions" |
| |
| assert "volumes" not in captured |
|
|
|
|
| def test_dispatch_whole_submission_no_bucket_env(monkeypatch): |
| """Configured bucket staging is only for sharded eval jobs.""" |
| captured: dict = {} |
|
|
| def fake_run_job(**kwargs): |
| captured.update(kwargs) |
| return SimpleNamespace(id="job-456") |
|
|
| monkeypatch.setenv("HF_TOKEN", "hf_test") |
| monkeypatch.setattr(submit, "SHARD_BUCKET", "org/bucket") |
| monkeypatch.setattr(submit, "run_job", fake_run_job) |
|
|
| job_id = submit._dispatch_eval_command( |
| "sub-1", "https://example.test/sub-1.zip", [], |
| ) |
|
|
| assert job_id == "job-456" |
| assert "volumes" not in captured |
| assert "CADGENBENCH_SHARD_BUCKET" not in captured["env"] |
|
|
|
|
| def test_shard_bucket_uri_built_from_id_and_prefix(monkeypatch): |
| """The bucket URI strips any hf:// prefix and nests submission/shards.""" |
| monkeypatch.setattr( |
| submit, "SHARD_BUCKET", |
| "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", |
| ) |
| monkeypatch.setattr(submit, "SHARD_BUCKET_PREFIX", "submissions") |
| assert submit._shard_bucket_id() == ( |
| "HuggingAI4Engineering/cadgenbench-eval-staging" |
| ) |
| assert submit._shard_bucket_uri("sub-1") == ( |
| "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging/" |
| "submissions/sub-1/shards" |
| ) |
|
|
|
|
| def test_eval_job_syncs_shard_to_bucket(tmp_path: Path, monkeypatch): |
| """In bucket mode the eval job syncs shard outputs to the bucket URI.""" |
| eval_job_path = ( |
| Path(__file__).resolve().parents[2] |
| / "cadgenbench-eval-gpu" |
| / "eval_job.py" |
| ) |
| spec = importlib.util.spec_from_file_location("eval_job_for_test", eval_job_path) |
| assert spec and spec.loader |
| eval_job = importlib.util.module_from_spec(spec) |
| spec.loader.exec_module(eval_job) |
|
|
| run_dir = tmp_path / "run" |
| fixture_dir = run_dir / "101" |
| fixture_dir.mkdir(parents=True) |
| (fixture_dir / "result.json").write_text("{}", encoding="utf-8") |
|
|
| captured: dict = {} |
|
|
| def fake_sync_bucket(self, *, source, dest, token=None): |
| captured.update(source=source, dest=dest) |
|
|
| monkeypatch.setattr(eval_job.HfApi, "sync_bucket", fake_sync_bucket) |
| monkeypatch.setenv( |
| eval_job.SHARD_BUCKET_ENV, |
| "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", |
| ) |
| monkeypatch.setenv(eval_job.SHARD_BUCKET_PREFIX_ENV, "submissions") |
|
|
| eval_job._upload_shard_artifacts( |
| "sub-1", "shard_000", run_dir, "ignored/submissions", "ignored-token", |
| ) |
|
|
| assert captured["source"] == str(run_dir) |
| assert captured["dest"] == ( |
| "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging/" |
| "submissions/sub-1/shards/shard_000" |
| ) |
|
|
|
|
| def test_poll_until_done_uses_jobs_namespace_and_token(monkeypatch): |
| """Polling must target the namespace where Jobs were dispatched.""" |
| captured: dict = {} |
|
|
| def fake_inspect_job(**kwargs): |
| captured.update(kwargs) |
| return SimpleNamespace( |
| status=SimpleNamespace(stage="COMPLETED", message=None), |
| ) |
|
|
| monkeypatch.setenv("HF_TOKEN", "hf_test") |
| monkeypatch.setattr(submit, "inspect_job", fake_inspect_job) |
|
|
| assert submit._poll_until_done("job-123", "sub-1") == ("COMPLETED", None) |
| assert captured == { |
| "job_id": "job-123", |
| "namespace": submit.EVAL_JOB_NAMESPACE, |
| "token": "hf_test", |
| } |
|
|
|
|
| def test_shard_poll_uses_jobs_namespace_and_token(monkeypatch): |
| """Sharded polling uses the same Jobs namespace/token as dispatch.""" |
| captured: dict = {} |
|
|
| def fake_inspect_job(**kwargs): |
| captured.update(kwargs) |
| return SimpleNamespace( |
| status=SimpleNamespace(stage="COMPLETED", message=None), |
| ) |
|
|
| monkeypatch.setenv("HF_TOKEN", "hf_test") |
| monkeypatch.setattr(submit, "inspect_job", fake_inspect_job) |
| monkeypatch.setattr(submit.time, "sleep", lambda *_: None) |
|
|
| failures = submit._poll_shards_until_done( |
| "sub-1", |
| "https://example.test/sub-1.zip", |
| {"shard_000": {"job_id": "job-123", "stage": None, "message": None}}, |
| ) |
|
|
| assert failures == [] |
| assert captured == { |
| "job_id": "job-123", |
| "namespace": submit.EVAL_JOB_NAMESPACE, |
| "token": "hf_test", |
| } |
|
|
|
|
| def test_job_failure_reason_fetches_logs_with_namespace_and_token(monkeypatch): |
| """Failure diagnostics fetch logs from the same Jobs namespace.""" |
| captured: dict = {} |
|
|
| def fake_fetch_job_logs(**kwargs): |
| captured.update(kwargs) |
| return ["line 1\n", "line 2\n"] |
|
|
| monkeypatch.setenv("HF_TOKEN", "hf_test") |
| monkeypatch.setattr(submit, "fetch_job_logs", fake_fetch_job_logs) |
|
|
| reason = submit._job_failure_reason("job-123", "ERROR", "boom") |
|
|
| assert "line 2" in reason |
| assert captured == { |
| "job_id": "job-123", |
| "namespace": submit.EVAL_JOB_NAMESPACE, |
| "token": "hf_test", |
| } |
|
|
|
|
| def _stub_meta() -> dict: |
| """Minimum meta.json shape that survives ``_load_and_validate_meta``.""" |
| return { |
| "submitter_name": "team-test", |
| "submission_name": "Stub Agent v1", |
| "agent_url": "https://github.com/example/stub-agent", |
| "notes": "test row, not a real submission", |
| "agree_to_publish": True, |
| } |
|
|
|
|
| def test_pending_row_defaults_new_fields(monkeypatch): |
| """Three Bundle 1+2 fields land with their schema defaults.""" |
| monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") |
| row = submit._build_pending_row( |
| submission_id="sub-test-x", |
| meta=_stub_meta(), |
| blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", |
| submission_sha256="a" * 64, |
| ) |
| assert row["validation_status"] == "unvalidated" |
| assert row["validation_method"] is None |
| assert row["hf_username"] is None |
|
|
|
|
| def test_pending_row_preserves_sha256(monkeypatch): |
| """Existing dedup path's row-level half: sha256 still gets stamped on the row.""" |
| monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") |
| expected_hash = "f" * 64 |
| row = submit._build_pending_row( |
| submission_id="sub-test-x", |
| meta=_stub_meta(), |
| blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", |
| submission_sha256=expected_hash, |
| ) |
| assert row["submission_sha256"] == expected_hash |
|
|
|
|
| def test_pending_row_populates_hf_username_when_provided(monkeypatch): |
| """C10 OAuth path: profile.username flows into the row's hf_username. |
| |
| The submit handler reads ``gr.OAuthProfile`` (injected by Gradio) |
| and passes ``profile.username`` through as a kwarg. This test |
| exercises just the row builder's side of that handoff so a |
| refactor that drops the kwarg gets caught. |
| """ |
| monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") |
| row = submit._build_pending_row( |
| submission_id="sub-test-x", |
| meta=_stub_meta(), |
| blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", |
| submission_sha256="a" * 64, |
| hf_username="alice", |
| ) |
| assert row["hf_username"] == "alice" |
|
|
|
|
| def test_pending_row_hf_username_defaults_to_none(monkeypatch): |
| """Omitting the kwarg keeps `hf_username` null. |
| |
| Covers the pre-OAuth callers (test fixtures, scripts) that don't |
| have a profile in scope. Pre-C10 row writers and any future |
| non-OAuth caller default cleanly. |
| """ |
| monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") |
| row = submit._build_pending_row( |
| submission_id="sub-test-x", |
| meta=_stub_meta(), |
| blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", |
| submission_sha256="a" * 64, |
| ) |
| assert row["hf_username"] is None |
|
|
|
|
| def test_pending_row_preserves_existing_metadata(monkeypatch): |
| """Pre-Bundle-1+2 fields keep their values from meta + args. |
| |
| Regression guard: a future refactor of ``_build_pending_row`` that |
| accidentally drops one of these keys would silently change the |
| schema of every row the Space writes. |
| """ |
| monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") |
| meta = _stub_meta() |
| row = submit._build_pending_row( |
| submission_id="sub-test-x", |
| meta=meta, |
| blob_url="https://example.test/sub-test-x.zip", |
| submission_sha256="0" * 64, |
| ) |
| assert row["submission_id"] == "sub-test-x" |
| assert row["status"] == "pending" |
| assert row["failure_reason"] is None |
| assert row["submitter_name"] == meta["submitter_name"] |
| assert row["submission_name"] == meta["submission_name"] |
| assert row["agent_url"] == meta["agent_url"] |
| assert row["notes"] == meta["notes"] |
| assert row["submission_blob_url"] == "https://example.test/sub-test-x.zip" |
| assert row["cadgenbench_data_revision"] == "test-rev" |
| |
| for k in ( |
| "aggregate_score", |
| "validity_rate", |
| "score_by_task_type", |
| "per_task_scores", |
| "per_fixture_scores", |
| "per_fixture_breakdown", |
| ): |
| assert row[k] is None |
|
|
|
|
| def test_validate_steps_allows_missing_output_step(tmp_path: Path, monkeypatch): |
| """Missing fixture outputs are accepted; evaluator scores them as missing.""" |
| (tmp_path / "101").mkdir() |
| (tmp_path / "102").mkdir() |
| calls: list[Path] = [] |
| monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p)) |
|
|
| submit._validate_steps_parseable(tmp_path, {"101", "102"}) |
|
|
| assert calls == [] |
|
|
|
|
| def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch): |
| """Present candidate files are still cheap-parse checked.""" |
| fixture = tmp_path / "101" |
| fixture.mkdir() |
| candidate = fixture / "output.stp" |
| candidate.write_text("ISO-10303-21;\n") |
| calls: list[Path] = [] |
| monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p)) |
|
|
| submit._validate_steps_parseable(tmp_path, {"101"}) |
|
|
| assert calls == [candidate] |
|
|