Disruption-System / tests /test_heuristics_parity.py
Vittal-M's picture
Upload 66 files
906e104 verified
"""Parity tests for the six dispatch heuristics.
For each baseline, two independently constructed simulators on the same seed
must produce identical SimulationMetrics. This catches hidden global state
that would silently bias the DAHS-vs-baselines comparison.
Short shifts (120-240 min) keep pytest under a few seconds; the full
benchmark uses 600-min × 300 seeds and is not exercised here.
"""
from __future__ import annotations
import math
import pytest
from src.heuristics import (
fifo_dispatch,
priority_edd_dispatch,
critical_ratio_dispatch,
atc_dispatch,
wspt_dispatch,
slack_dispatch,
)
from src.simulator import WarehouseSimulator
HEURISTICS = [
("fifo", fifo_dispatch),
("priority_edd", priority_edd_dispatch),
("critical_ratio", critical_ratio_dispatch),
("atc", atc_dispatch),
("wspt", wspt_dispatch),
("slack", slack_dispatch),
]
def _run(heur_fn, seed: int, duration: float):
sim = WarehouseSimulator(seed=seed, heuristic_fn=heur_fn)
return sim.run(duration=duration)
@pytest.mark.parametrize("name,heur_fn", HEURISTICS)
def test_heuristic_is_deterministic(name, heur_fn):
"""Two fresh simulators on the same seed produce identical metrics."""
seed = 4242
duration = 120.0
m1 = _run(heur_fn, seed, duration)
m2 = _run(heur_fn, seed, duration)
assert m1.completed_jobs == m2.completed_jobs, name
assert math.isclose(m1.makespan, m2.makespan, rel_tol=1e-9), name
assert math.isclose(m1.total_tardiness, m2.total_tardiness, rel_tol=1e-9), name
assert math.isclose(m1.sla_breach_rate, m2.sla_breach_rate, rel_tol=1e-9), name
assert math.isclose(m1.avg_cycle_time, m2.avg_cycle_time, rel_tol=1e-9), name
def test_different_seeds_diverge():
"""Different seeds should not produce identical trajectories."""
a = _run(fifo_dispatch, seed=1, duration=120.0)
b = _run(fifo_dispatch, seed=2, duration=120.0)
assert (a.completed_jobs != b.completed_jobs
or not math.isclose(a.total_tardiness, b.total_tardiness, abs_tol=1e-3))
def test_heuristics_produce_distinct_results():
"""At least two heuristics must disagree on SOME metric on a non-trivial
seed. If every heuristic returns identical metrics, dispatch ordering has
no effect — meaning the simulator is not actually using heuristic_fn.
"""
seed = 7
duration = 240.0
metrics_per = {name: _run(fn, seed, duration) for name, fn in HEURISTICS}
fingerprints = {
name: (
round(m.total_tardiness, 2),
round(m.avg_cycle_time, 2),
round(m.makespan, 2),
m.completed_jobs,
)
for name, m in metrics_per.items()
}
distinct = set(fingerprints.values())
assert len(distinct) >= 2, f"All heuristics produced identical metrics: {fingerprints}"