Spaces:
Sleeping
Sleeping
| """Smoke test for the per-seed benchmark worker. | |
| The benchmark runs ~7 method variants per seed, swallowing exceptions so a | |
| single broken row can't kill a 300-seed run. That defensive design is good | |
| for production but bad for development — bugs in dahs_hybrid_* or | |
| RollingHorizonOracle would just log warnings and silently halve the | |
| benchmark output. This test catches that by: | |
| 1. Running _benchmark_single_seed on one short seed. | |
| 2. Asserting the six fixed-rule baselines all produced a row. | |
| 3. Asserting the synthesised best_fixed_oracle row exists. | |
| 4. Asserting wall-clock timing is captured on every row. | |
| If models/ doesn't have selector_*.joblib (smoke-test environment), the | |
| DAHS-* rows are skipped — but the baseline + oracle rows must still appear. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from pathlib import Path | |
| import pytest | |
| from src.evaluator import _benchmark_single_seed, _row | |
| REQUIRED_BASELINES = { | |
| "fifo", "priority_edd", "critical_ratio", "atc", "wspt", "slack", | |
| "best_fixed_oracle", | |
| } | |
| def test_benchmark_single_seed_produces_baselines_and_oracle(monkeypatch): | |
| # Use a small seed and shorter shift via env if the simulator supports it. | |
| rows = _benchmark_single_seed((42,)) | |
| methods_seen = {r["method"] for r in rows} | |
| missing = REQUIRED_BASELINES - methods_seen | |
| assert not missing, f"benchmark missing baseline rows: {missing}" | |
| # Every row must carry timing + the canonical metric set. | |
| for r in rows: | |
| assert "elapsed_seconds" in r, r | |
| assert r["elapsed_seconds"] >= 0.0 | |
| for field in ("makespan", "total_tardiness", "sla_breach_rate", | |
| "avg_cycle_time", "throughput", "queue_max", | |
| "completed_jobs"): | |
| assert field in r, (r["method"], field) | |
| def test_oracle_row_has_winner_field(): | |
| rows = _benchmark_single_seed((43,)) | |
| oracle = next((r for r in rows if r["method"] == "best_fixed_oracle"), None) | |
| assert oracle is not None | |
| assert "best_fixed_winner" in oracle | |
| assert oracle["best_fixed_winner"] in { | |
| "fifo", "priority_edd", "critical_ratio", "atc", "wspt", "slack", | |
| } | |
| # Oracle tardiness must equal the minimum among the six baseline rows | |
| # on this seed (sanity check on the synthesis logic). | |
| fixed = {r["method"]: r["total_tardiness"] | |
| for r in rows | |
| if r["method"] in {"fifo", "priority_edd", "critical_ratio", | |
| "atc", "wspt", "slack"}} | |
| if fixed: | |
| assert oracle["total_tardiness"] == pytest.approx(min(fixed.values())) | |
| def test_row_helper_shape(): | |
| """_row() output shape is the canonical contract every other writer uses.""" | |
| class _M: | |
| makespan = 100.0 | |
| total_tardiness = 5.0 | |
| sla_breach_rate = 0.1 | |
| avg_cycle_time = 30.0 | |
| zone_utilization = {0: 0.5, 1: 0.6} | |
| throughput = 80.0 | |
| queue_max = 12 | |
| completed_jobs = 100 | |
| r = _row(seed=7, method="t", m=_M(), elapsed=1.234) | |
| assert r["seed"] == 7 | |
| assert r["method"] == "t" | |
| assert r["elapsed_seconds"] == 1.234 | |
| assert 0.4 < r["zone_utilization_avg"] < 0.7 | |