Spaces:

Vittal-M
/

Disruption-System

Sleeping

App Files Files Community

Disruption-System / tests /test_benchmark_smoke.py

Vittal-M

Upload 66 files

906e104 verified about 1 month ago

raw

history blame contribute delete

3.18 kB

	"""Smoke test for the per-seed benchmark worker.

	The benchmark runs ~7 method variants per seed, swallowing exceptions so a
	single broken row can't kill a 300-seed run. That defensive design is good
	for production but bad for development — bugs in dahs_hybrid_* or
	RollingHorizonOracle would just log warnings and silently halve the
	benchmark output. This test catches that by:

	1. Running _benchmark_single_seed on one short seed.
	2. Asserting the six fixed-rule baselines all produced a row.
	3. Asserting the synthesised best_fixed_oracle row exists.
	4. Asserting wall-clock timing is captured on every row.

	If models/ doesn't have selector_*.joblib (smoke-test environment), the
	DAHS-* rows are skipped — but the baseline + oracle rows must still appear.
	"""
	from __future__ import annotations

	import os
	from pathlib import Path

	import pytest

	from src.evaluator import _benchmark_single_seed, _row


	REQUIRED_BASELINES = {
	"fifo", "priority_edd", "critical_ratio", "atc", "wspt", "slack",
	"best_fixed_oracle",
	}


	@pytest.mark.timeout(300)
	def test_benchmark_single_seed_produces_baselines_and_oracle(monkeypatch):
	# Use a small seed and shorter shift via env if the simulator supports it.
	rows = _benchmark_single_seed((42,))
	methods_seen = {r["method"] for r in rows}

	missing = REQUIRED_BASELINES - methods_seen
	assert not missing, f"benchmark missing baseline rows: {missing}"

	# Every row must carry timing + the canonical metric set.
	for r in rows:
	assert "elapsed_seconds" in r, r
	assert r["elapsed_seconds"] >= 0.0
	for field in ("makespan", "total_tardiness", "sla_breach_rate",
	"avg_cycle_time", "throughput", "queue_max",
	"completed_jobs"):
	assert field in r, (r["method"], field)


	def test_oracle_row_has_winner_field():
	rows = _benchmark_single_seed((43,))
	oracle = next((r for r in rows if r["method"] == "best_fixed_oracle"), None)
	assert oracle is not None
	assert "best_fixed_winner" in oracle
	assert oracle["best_fixed_winner"] in {
	"fifo", "priority_edd", "critical_ratio", "atc", "wspt", "slack",
	}
	# Oracle tardiness must equal the minimum among the six baseline rows
	# on this seed (sanity check on the synthesis logic).
	fixed = {r["method"]: r["total_tardiness"]
	for r in rows
	if r["method"] in {"fifo", "priority_edd", "critical_ratio",
	"atc", "wspt", "slack"}}
	if fixed:
	assert oracle["total_tardiness"] == pytest.approx(min(fixed.values()))


	def test_row_helper_shape():
	"""_row() output shape is the canonical contract every other writer uses."""
	class _M:
	makespan = 100.0
	total_tardiness = 5.0
	sla_breach_rate = 0.1
	avg_cycle_time = 30.0
	zone_utilization = {0: 0.5, 1: 0.6}
	throughput = 80.0
	queue_max = 12
	completed_jobs = 100

	r = _row(seed=7, method="t", m=_M(), elapsed=1.234)
	assert r["seed"] == 7
	assert r["method"] == "t"
	assert r["elapsed_seconds"] == 1.234
	assert 0.4 < r["zone_utilization_avg"] < 0.7