"""Unit tests for the submit-tab pending-row builder. C4 contract: ``_build_pending_row`` defaults the three Bundle 1+2 schema fields (``validation_status="unvalidated"``, ``validation_method=None``, ``hf_username=None``) and keeps the existing metadata + sha256 fields intact. Hub I/O (``_resolve_data_revision`` reads from ``HfApi.dataset_info``) is monkeypatched out, so the suite has zero network traffic. """ from __future__ import annotations import importlib.util from pathlib import Path from types import SimpleNamespace import pytest import submit def _hub_http_error(status: int, headers: dict | None = None) -> submit.HfHubHTTPError: """An ``HfHubHTTPError`` with a minimal response carrying *status*. Built without going through the real Hub: a ``SimpleNamespace`` stands in for the httpx response so ``_with_hub_retries`` can read ``response.status_code`` / ``response.headers``. Newer ``huggingface_hub`` makes ``response`` a required keyword-only constructor argument, so it's passed in directly (and re-assigned afterwards for the older positional-optional signature too). """ response = SimpleNamespace( status_code=status, headers=headers or {}, request=None, ) err = submit.HfHubHTTPError(f"HTTP {status}", response=response) err.response = response return err def test_with_hub_retries_recovers_after_transient(monkeypatch): """A 429 (then a 503) is retried; the eventual success is returned.""" monkeypatch.setattr(submit.time, "sleep", lambda *_: None) statuses = iter([429, 503]) calls = {"n": 0} def flaky(): calls["n"] += 1 status = next(statuses, None) if status is not None: raise _hub_http_error(status) return "ok" assert submit._with_hub_retries(flaky, what="test") == "ok" assert calls["n"] == 3 def test_with_hub_retries_reraises_non_retryable(monkeypatch): """A 403 is not in the retry set, so it propagates on the first try.""" monkeypatch.setattr(submit.time, "sleep", lambda *_: None) calls = {"n": 0} def forbidden(): calls["n"] += 1 raise _hub_http_error(403) with pytest.raises(submit.HfHubHTTPError): submit._with_hub_retries(forbidden, what="test") assert calls["n"] == 1 def test_with_hub_retries_gives_up_after_wall_cap(monkeypatch): """Past the wall cap, a persistent 429 stops being retried and raises.""" monkeypatch.setattr(submit.time, "sleep", lambda *_: None) # Force the deadline check to trip after the first failure: the # second monotonic() read (post-failure) already exceeds the cap. ticks = iter([0.0, submit.HUB_RETRY_MAX_SECONDS + 1]) monkeypatch.setattr( submit.time, "monotonic", lambda: next(ticks, submit.HUB_RETRY_MAX_SECONDS + 1) ) calls = {"n": 0} def always_429(): calls["n"] += 1 raise _hub_http_error(429) with pytest.raises(submit.HfHubHTTPError): submit._with_hub_retries(always_429, what="test") assert calls["n"] == 1 def test_retry_after_header_is_honored(monkeypatch): """A ``Retry-After`` seconds value sets the floor for the sleep delay.""" slept: list[float] = [] monkeypatch.setattr(submit.time, "sleep", lambda d: slept.append(d)) calls = {"n": 0} def flaky(): calls["n"] += 1 if calls["n"] == 1: raise _hub_http_error(429, headers={"Retry-After": "7"}) return "ok" assert submit._with_hub_retries(flaky, what="test") == "ok" assert slept and slept[0] >= 7.0 def test_dispatch_shard_passes_bucket_env(monkeypatch): """Bucket-configured shard jobs get the bucket env, no volume mount.""" captured: dict = {} def fake_run_job(**kwargs): captured.update(kwargs) return SimpleNamespace(id="job-123") monkeypatch.setenv("HF_TOKEN", "hf_test") monkeypatch.setattr( submit, "SHARD_BUCKET", "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", ) monkeypatch.setattr(submit, "SHARD_BUCKET_PREFIX", "submissions") monkeypatch.setattr(submit, "run_job", fake_run_job) job_id = submit._dispatch_eval_command( "sub-1", "https://example.test/sub-1.zip", ["--shard-id", "shard_000", "--fixtures", "101,102"], ) assert job_id == "job-123" assert captured["env"]["CADGENBENCH_SHARD_BUCKET"] == ( "HuggingAI4Engineering/cadgenbench-eval-staging" ) assert captured["env"]["CADGENBENCH_SHARD_BUCKET_PREFIX"] == "submissions" # Mount-free: no volume is attached to the job. assert "volumes" not in captured def test_dispatch_whole_submission_no_bucket_env(monkeypatch): """Configured bucket staging is only for sharded eval jobs.""" captured: dict = {} def fake_run_job(**kwargs): captured.update(kwargs) return SimpleNamespace(id="job-456") monkeypatch.setenv("HF_TOKEN", "hf_test") monkeypatch.setattr(submit, "SHARD_BUCKET", "org/bucket") monkeypatch.setattr(submit, "run_job", fake_run_job) job_id = submit._dispatch_eval_command( "sub-1", "https://example.test/sub-1.zip", [], ) assert job_id == "job-456" assert "volumes" not in captured assert "CADGENBENCH_SHARD_BUCKET" not in captured["env"] def test_shard_bucket_uri_built_from_id_and_prefix(monkeypatch): """The bucket URI strips any hf:// prefix and nests submission/shards.""" monkeypatch.setattr( submit, "SHARD_BUCKET", "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", ) monkeypatch.setattr(submit, "SHARD_BUCKET_PREFIX", "submissions") assert submit._shard_bucket_id() == ( "HuggingAI4Engineering/cadgenbench-eval-staging" ) assert submit._shard_bucket_uri("sub-1") == ( "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging/" "submissions/sub-1/shards" ) def test_eval_job_syncs_shard_to_bucket(tmp_path: Path, monkeypatch): """In bucket mode the eval job syncs shard outputs to the bucket URI.""" eval_job_path = ( Path(__file__).resolve().parents[2] / "cadgenbench-eval-gpu" / "eval_job.py" ) spec = importlib.util.spec_from_file_location("eval_job_for_test", eval_job_path) assert spec and spec.loader eval_job = importlib.util.module_from_spec(spec) spec.loader.exec_module(eval_job) run_dir = tmp_path / "run" fixture_dir = run_dir / "101" fixture_dir.mkdir(parents=True) (fixture_dir / "result.json").write_text("{}", encoding="utf-8") captured: dict = {} def fake_sync_bucket(self, *, source, dest, token=None): captured.update(source=source, dest=dest) monkeypatch.setattr(eval_job.HfApi, "sync_bucket", fake_sync_bucket) monkeypatch.setenv( eval_job.SHARD_BUCKET_ENV, "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging", ) monkeypatch.setenv(eval_job.SHARD_BUCKET_PREFIX_ENV, "submissions") eval_job._upload_shard_artifacts( "sub-1", "shard_000", run_dir, "ignored/submissions", "ignored-token", ) assert captured["source"] == str(run_dir) assert captured["dest"] == ( "hf://buckets/HuggingAI4Engineering/cadgenbench-eval-staging/" "submissions/sub-1/shards/shard_000" ) def test_poll_until_done_uses_jobs_namespace_and_token(monkeypatch): """Polling must target the namespace where Jobs were dispatched.""" captured: dict = {} def fake_inspect_job(**kwargs): captured.update(kwargs) return SimpleNamespace( status=SimpleNamespace(stage="COMPLETED", message=None), ) monkeypatch.setenv("HF_TOKEN", "hf_test") monkeypatch.setattr(submit, "inspect_job", fake_inspect_job) assert submit._poll_until_done("job-123", "sub-1") == ("COMPLETED", None) assert captured == { "job_id": "job-123", "namespace": submit.EVAL_JOB_NAMESPACE, "token": "hf_test", } def test_shard_poll_uses_jobs_namespace_and_token(monkeypatch): """Sharded polling uses the same Jobs namespace/token as dispatch.""" captured: dict = {} def fake_inspect_job(**kwargs): captured.update(kwargs) return SimpleNamespace( status=SimpleNamespace(stage="COMPLETED", message=None), ) monkeypatch.setenv("HF_TOKEN", "hf_test") monkeypatch.setattr(submit, "inspect_job", fake_inspect_job) monkeypatch.setattr(submit.time, "sleep", lambda *_: None) failures = submit._poll_shards_until_done( "sub-1", "https://example.test/sub-1.zip", {"shard_000": {"job_id": "job-123", "stage": None, "message": None}}, ) assert failures == [] assert captured == { "job_id": "job-123", "namespace": submit.EVAL_JOB_NAMESPACE, "token": "hf_test", } def test_job_failure_reason_fetches_logs_with_namespace_and_token(monkeypatch): """Failure diagnostics fetch logs from the same Jobs namespace.""" captured: dict = {} def fake_fetch_job_logs(**kwargs): captured.update(kwargs) return ["line 1\n", "line 2\n"] monkeypatch.setenv("HF_TOKEN", "hf_test") monkeypatch.setattr(submit, "fetch_job_logs", fake_fetch_job_logs) reason = submit._job_failure_reason("job-123", "ERROR", "boom") assert "line 2" in reason assert captured == { "job_id": "job-123", "namespace": submit.EVAL_JOB_NAMESPACE, "token": "hf_test", } def _stub_meta() -> dict: """Minimum meta.json shape that survives ``_load_and_validate_meta``.""" return { "submitter_name": "team-test", "submission_name": "Stub Agent v1", "agent_url": "https://github.com/example/stub-agent", "notes": "test row, not a real submission", "agree_to_publish": True, } def test_pending_row_defaults_new_fields(monkeypatch): """Three Bundle 1+2 fields land with their schema defaults.""" monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") row = submit._build_pending_row( submission_id="sub-test-x", meta=_stub_meta(), blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", submission_sha256="a" * 64, ) assert row["validation_status"] == "unvalidated" assert row["validation_method"] is None assert row["hf_username"] is None def test_pending_row_preserves_sha256(monkeypatch): """Existing dedup path's row-level half: sha256 still gets stamped on the row.""" monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") expected_hash = "f" * 64 row = submit._build_pending_row( submission_id="sub-test-x", meta=_stub_meta(), blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", submission_sha256=expected_hash, ) assert row["submission_sha256"] == expected_hash def test_pending_row_populates_hf_username_when_provided(monkeypatch): """C10 OAuth path: profile.username flows into the row's hf_username. The submit handler reads ``gr.OAuthProfile`` (injected by Gradio) and passes ``profile.username`` through as a kwarg. This test exercises just the row builder's side of that handoff so a refactor that drops the kwarg gets caught. """ monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") row = submit._build_pending_row( submission_id="sub-test-x", meta=_stub_meta(), blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", submission_sha256="a" * 64, hf_username="alice", ) assert row["hf_username"] == "alice" def test_pending_row_hf_username_defaults_to_none(monkeypatch): """Omitting the kwarg keeps `hf_username` null. Covers the pre-OAuth callers (test fixtures, scripts) that don't have a profile in scope. Pre-C10 row writers and any future non-OAuth caller default cleanly. """ monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") row = submit._build_pending_row( submission_id="sub-test-x", meta=_stub_meta(), blob_url="https://huggingface.co/datasets/example/sub-test-x.zip", submission_sha256="a" * 64, ) assert row["hf_username"] is None def test_pending_row_preserves_existing_metadata(monkeypatch): """Pre-Bundle-1+2 fields keep their values from meta + args. Regression guard: a future refactor of ``_build_pending_row`` that accidentally drops one of these keys would silently change the schema of every row the Space writes. """ monkeypatch.setattr(submit, "_resolve_data_revision", lambda: "test-rev") meta = _stub_meta() row = submit._build_pending_row( submission_id="sub-test-x", meta=meta, blob_url="https://example.test/sub-test-x.zip", submission_sha256="0" * 64, ) assert row["submission_id"] == "sub-test-x" assert row["status"] == "pending" assert row["failure_reason"] is None assert row["submitter_name"] == meta["submitter_name"] assert row["submission_name"] == meta["submission_name"] assert row["agent_url"] == meta["agent_url"] assert row["notes"] == meta["notes"] assert row["submission_blob_url"] == "https://example.test/sub-test-x.zip" assert row["cadgenbench_data_revision"] == "test-rev" # Score-shaped fields are null on a fresh pending row. for k in ( "aggregate_score", "validity_rate", "score_by_task_type", "per_task_scores", "per_fixture_scores", "per_fixture_breakdown", ): assert row[k] is None def test_validate_steps_allows_missing_output_step(tmp_path: Path, monkeypatch): """Missing fixture outputs are accepted; evaluator scores them as missing.""" (tmp_path / "101").mkdir() (tmp_path / "102").mkdir() calls: list[Path] = [] monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p)) submit._validate_steps_parseable(tmp_path, {"101", "102"}) assert calls == [] def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch): """Present candidate files are still cheap-parse checked.""" fixture = tmp_path / "101" fixture.mkdir() candidate = fixture / "output.stp" candidate.write_text("ISO-10303-21;\n") calls: list[Path] = [] monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p)) submit._validate_steps_parseable(tmp_path, {"101"}) assert calls == [candidate]