| """ |
| Tests for Stage 16 — rolling backtest. |
| |
| The salesforce sample data (50 days) is shorter than the |
| salesforce config's baseline_window + lag + recent_window (51), so |
| the backtest needs its own synthetic series that's long enough for |
| the engine to actually have a window to evaluate. We build one with |
| core.MetricConfig + plain Observations rather than going through |
| a vertical config, so the test stays focused on the backtest logic. |
| """ |
| from typing import List |
|
|
| import pytest |
|
|
| from core import ( |
| HIGHER_IS_WORSE, |
| EntityTypeConfig, |
| MetricConfig, |
| Observation, |
| ) |
| from delivery.backtest import ( |
| BacktestPoint, |
| BacktestResult, |
| render_html, |
| run_backtest, |
| ) |
|
|
| |
|
|
| def _short_window_config(): |
| """Tiny windows so a short synthetic series produces multiple steps.""" |
| return EntityTypeConfig( |
| entity_type="thing", |
| metrics=[MetricConfig("m", HIGHER_IS_WORSE, weight=1.0, |
| feeds_anomaly=True, feeds_stability=True)], |
| baseline_window=7, baseline_lag=2, recent_window=5, |
| ) |
|
|
|
|
| def _series(entity_id: str, values: List[float]) -> List[Observation]: |
| """Build a daily series starting 2026-01-01 with the given values.""" |
| out = [] |
| for i, v in enumerate(values): |
| |
| from datetime import date, timedelta |
| d = (date(2026, 1, 1) + timedelta(days=i)).isoformat() |
| out.append(Observation(entity_id, d, {"m": float(v)})) |
| return out |
|
|
|
|
| def _healthy(entity_id: str, n_days: int) -> List[Observation]: |
| return _series(entity_id, [100.0] * n_days) |
|
|
|
|
| def _drifting_after(entity_id: str, n_days: int, onset: int, |
| delta: float = 8.0) -> List[Observation]: |
| """A series that is flat at 100 then climbs (worse) from ``onset``.""" |
| vals = [] |
| for i in range(n_days): |
| vals.append(100.0 if i < onset else 100.0 + (i - onset + 1) * delta) |
| return _series(entity_id, vals) |
|
|
|
|
| |
|
|
| def test_empty_observations_returns_empty_result(): |
| result = run_backtest([], _short_window_config(), step_days=7) |
| assert isinstance(result, BacktestResult) |
| assert result.points == [] |
| assert result.n_steps == 0 |
|
|
|
|
| def test_too_short_for_one_window(): |
| """Series shorter than baseline + lag + recent produces zero steps — |
| backtest can't compute a single drift score causally.""" |
| cfg = _short_window_config() |
| obs = _healthy("e", 5) |
| result = run_backtest(obs, cfg, step_days=7) |
| assert result.n_steps == 0 |
|
|
|
|
| |
|
|
| def test_steps_advance_by_step_days(): |
| cfg = _short_window_config() |
| obs = _healthy("e", 30) |
| result = run_backtest(obs, cfg, step_days=7) |
| |
| assert result.n_steps >= 2 |
| cursors = [p.cursor for p in result.points] |
| |
| from datetime import date |
| diffs = [(date.fromisoformat(b) - date.fromisoformat(a)).days |
| for a, b in zip(cursors, cursors[1:])] |
| assert all(d == 7 for d in diffs) |
|
|
|
|
| def test_engine_does_not_see_the_future_at_any_cursor(): |
| """Causality test: at cursor C, the engine only sees observations |
| with day <= C. We can prove this indirectly: a drifting series whose |
| drift starts AFTER all our cursors should produce zero issues at |
| every cursor.""" |
| cfg = _short_window_config() |
| |
| obs = _drifting_after("e", 30, onset=28, delta=20.0) |
| |
| result = run_backtest(obs, cfg, step_days=10) |
| |
| cursor_days = {p.cursor for p in result.points} |
| assert "2026-01-24" in cursor_days |
| |
| early = [p for p in result.points if p.cursor == "2026-01-24"][0] |
| assert early.n_issues == 0 |
|
|
|
|
| def test_drift_appears_only_after_onset(): |
| """The complementary causality test: as cursor advances past the |
| drift onset, the engine starts to flag the entity.""" |
| cfg = _short_window_config() |
| obs = _drifting_after("e", 30, onset=15, delta=20.0) + _healthy("h", 30) |
| result = run_backtest(obs, cfg, step_days=2) |
| |
| |
| by_cursor = {p.cursor: p for p in result.points} |
| first = result.points[0] |
| assert "e" not in {i["entity_id"] for i in first.issues} |
| flagged_at = [p for p in result.points |
| if any(i["entity_id"] == "e" for i in p.issues)] |
| assert flagged_at, "drift should surface at some cursor past onset" |
| |
| for p in result.points: |
| assert all(i["entity_id"] != "h" for i in p.issues) |
|
|
|
|
| def test_rollups_match_summed_points(): |
| cfg = _short_window_config() |
| obs = _drifting_after("e", 30, onset=10, delta=15.0) |
| result = run_backtest(obs, cfg, step_days=3) |
| summed = sum(p.n_issues for p in result.points) |
| assert result.n_issues_total == summed |
| sev_summed = {s: sum(p.severity_counts.get(s, 0) for p in result.points) |
| for s in ("critical", "high", "medium", "low")} |
| assert {k: v for k, v in result.per_severity_total.items() if v} \ |
| == {k: v for k, v in sev_summed.items() if v} |
|
|
|
|
| def test_unique_entities_ever_flagged_dedupes(): |
| cfg = _short_window_config() |
| |
| |
| obs = _drifting_after("e", 30, onset=10, delta=20.0) |
| result = run_backtest(obs, cfg, step_days=2) |
| flagged_windows = [p for p in result.points if p.n_issues > 0] |
| assert len(flagged_windows) >= 2 |
| assert result.n_unique_entities_ever_flagged == 1 |
|
|
|
|
| |
|
|
| def test_render_html_returns_self_contained_doc(): |
| cfg = _short_window_config() |
| obs = _drifting_after("e", 30, onset=10, delta=20.0) |
| result = run_backtest(obs, cfg, step_days=3) |
| html = render_html(result) |
| assert html.startswith("<!doctype html>") |
| assert html.rstrip().endswith("</html>") |
| assert "OrgState backtest" in html |
| |
| assert "<code>e</code>" in html |
|
|
|
|
| def test_render_html_empty_result_says_so(): |
| cfg = _short_window_config() |
| result = run_backtest(_healthy("e", 5), cfg, step_days=7) |
| html = render_html(result) |
| assert "No steps produced" in html |
|
|
|
|
| def test_render_html_severity_bars_when_issues_present(): |
| cfg = _short_window_config() |
| obs = _drifting_after("e", 30, onset=10, delta=20.0) |
| result = run_backtest(obs, cfg, step_days=3) |
| html = render_html(result) |
| |
| assert 'class="bar bar-' in html |
|
|
|
|
| |
|
|
| from delivery.backtest import ( |
| RecurringEntity, |
| find_recurring_entities, |
| ) |
|
|
|
|
| def _fake_result(point_specs): |
| """Build a BacktestResult from a list of (cursor, [(entity_id, severity, score)]).""" |
| points = [] |
| for cursor, issues in point_specs: |
| sev_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0} |
| issue_dicts = [] |
| for eid, sev, score in issues: |
| sev_counts[sev] = sev_counts.get(sev, 0) + 1 |
| issue_dicts.append({"entity_id": eid, "severity": sev, |
| "score": score, "title": f"d {eid}"}) |
| points.append(BacktestPoint( |
| cursor=cursor, n_states=10, n_issues=len(issues), |
| severity_counts=sev_counts, issues=issue_dicts, |
| )) |
| return BacktestResult( |
| tenant_id="t", entity_type="thing", step_days=7, |
| n_steps=len(points), points=points, |
| ) |
|
|
|
|
| def test_find_recurring_below_threshold_returns_empty(): |
| """One-off events do not surface as recurring — that's the whole |
| point of the classifier.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "medium", 0.4)]), |
| ("2026-01-08", [("e2", "medium", 0.4)]), |
| ("2026-01-15", [("e3", "medium", 0.4)]), |
| ]) |
| assert find_recurring_entities(result, min_events=3) == [] |
|
|
|
|
| def test_find_recurring_at_threshold(): |
| """An entity flagged exactly ``min_events`` times surfaces.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "medium", 0.4)]), |
| ("2026-01-08", [("e1", "medium", 0.4)]), |
| ("2026-01-15", [("e1", "medium", 0.4)]), |
| ]) |
| [rec] = find_recurring_entities(result, min_events=3) |
| assert isinstance(rec, RecurringEntity) |
| assert rec.entity_id == "e1" |
| assert rec.n_events == 3 |
| assert rec.first_cursor == "2026-01-01" |
| assert rec.last_cursor == "2026-01-15" |
| assert rec.cursors == ["2026-01-01", "2026-01-08", "2026-01-15"] |
|
|
|
|
| def test_find_recurring_top_severity_is_worst_ever(): |
| """``top_severity`` is the worst the entity ever hit across windows, |
| not the most recent or the most common.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "low", 0.3)]), |
| ("2026-01-08", [("e1", "critical", 0.9)]), |
| ("2026-01-15", [("e1", "medium", 0.4)]), |
| ]) |
| [rec] = find_recurring_entities(result, min_events=3) |
| assert rec.top_severity == "critical" |
|
|
|
|
| def test_find_recurring_ordering_most_recurring_first(): |
| """Sort key is (-n_events, recency-desc, entity_id) — most events |
| first, ties broken by who was flagged most recently.""" |
| result = _fake_result([ |
| ("2026-01-01", [("a", "medium", 0.4), ("b", "medium", 0.4)]), |
| ("2026-01-08", [("a", "medium", 0.4), ("b", "medium", 0.4)]), |
| ("2026-01-15", [("a", "medium", 0.4), ("b", "medium", 0.4), |
| ("c", "medium", 0.4)]), |
| ("2026-01-22", [("a", "medium", 0.4)]), |
| ("2026-01-29", [("b", "medium", 0.4), |
| ("c", "medium", 0.4)]), |
| ("2026-02-05", [("b", "medium", 0.4), |
| ("c", "medium", 0.4)]), |
| ]) |
| recurring = find_recurring_entities(result, min_events=3) |
| |
| assert [r.entity_id for r in recurring] == ["b", "a", "c"] |
|
|
|
|
| def test_recurring_callout_appears_in_rendered_html(): |
| """The recurrence section is visible above the timeline when there |
| is anything to surface.""" |
| result = _fake_result([ |
| ("2026-01-01", [("hot_entity", "medium", 0.4)]), |
| ("2026-01-08", [("hot_entity", "medium", 0.4)]), |
| ("2026-01-15", [("hot_entity", "high", 0.6)]), |
| ]) |
| html = render_html(result, min_recurring_events=3) |
| assert "Recurring drift" in html |
| assert "<code>hot_entity</code>" in html |
| |
| assert 'class="sev sev-high"' in html |
|
|
|
|
| def test_render_html_no_recurring_section_when_none(): |
| """If nothing is recurring the callout block is suppressed so the |
| page does not show an empty section. We still see the per-entity |
| table.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "medium", 0.4)]), |
| ("2026-01-08", [("e2", "medium", 0.4)]), |
| ]) |
| html = render_html(result, min_recurring_events=3) |
| assert "Recurring drift" not in html |
|
|
|
|
| def test_min_events_flag_is_configurable(): |
| """Operators tune the threshold via min_recurring_events on render |
| and min_events on the classifier — they're independently usable.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "medium", 0.4)]), |
| ("2026-01-08", [("e1", "medium", 0.4)]), |
| ]) |
| assert find_recurring_entities(result, min_events=3) == [] |
| [rec] = find_recurring_entities(result, min_events=2) |
| assert rec.entity_id == "e1" |
|
|
|
|
| |
|
|
| from delivery.backtest import ( |
| _slug, |
| render_entity_detail_html, |
| ) |
|
|
|
|
| def test_entity_detail_renders_summary_and_sparkline(): |
| """The drill-down shows summary + a CSS-only sparkline. We do not |
| snapshot the whole HTML; we check the key story elements are there.""" |
| result = _fake_result([ |
| ("2026-01-01", [("hot", "medium", 0.42)]), |
| ("2026-01-08", []), |
| ("2026-01-15", [("hot", "high", 0.6)]), |
| ("2026-01-22", [("hot", "medium", 0.38)]), |
| ]) |
| html = render_entity_detail_html(result, "hot") |
| assert html.startswith("<!doctype html>") |
| assert "<h1>hot</h1>" in html |
| |
| assert "<strong>Events:</strong> 3" in html |
| |
| assert "sev-high" in html |
| |
| assert html.count('class="spark-cell"') == 4 |
| |
| assert html.count("spark-bar spark-") == 3 |
| |
| assert "2026-01-08" not in html.split('<h2>Flagged events</h2>')[-1] |
|
|
|
|
| def test_entity_detail_for_non_flagged_entity_says_so(): |
| """An entity that's never in any issue list should produce a |
| cleanly-empty page, not a stack trace.""" |
| result = _fake_result([("2026-01-01", [("other", "medium", 0.4)])]) |
| html = render_entity_detail_html(result, "ghost") |
| assert "<!doctype html>" in html |
| assert "not flagged in any backtest window" in html |
|
|
|
|
| def test_entity_detail_back_link_to_backtest_page(): |
| """The drill-down lives at entities/X.html so the back link is |
| ../backtest.html — relative, no JS needed.""" |
| result = _fake_result([("2026-01-01", [("x", "medium", 0.4)])]) |
| html = render_entity_detail_html(result, "x") |
| assert 'href="../backtest.html"' in html |
|
|
|
|
| def test_main_html_links_entity_ids_when_link_entities_set(): |
| """When the CLI renders drill-down pages, the entity ids in the |
| main report become links to those pages.""" |
| result = _fake_result([ |
| ("2026-01-01", [("e1", "medium", 0.4)]), |
| ("2026-01-08", [("e1", "medium", 0.4)]), |
| ("2026-01-15", [("e1", "medium", 0.4)]), |
| ]) |
| linked = render_html(result, link_entities=True) |
| bare = render_html(result, link_entities=False) |
| |
| assert 'href="entities/e1.html"' in linked |
| |
| assert 'href="entities/' not in bare |
|
|
|
|
| def test_link_entities_can_be_a_subset(): |
| """Passing a set links only those entities — prevents 404s when |
| only the recurring entities have drill-down pages rendered.""" |
| result = _fake_result([ |
| |
| ("2026-01-01", [("e_rec", "medium", 0.4)]), |
| ("2026-01-08", [("e_rec", "medium", 0.4)]), |
| ("2026-01-15", [("e_rec", "medium", 0.4)]), |
| ("2026-01-22", [("e_oneoff", "medium", 0.4)]), |
| ]) |
| html = render_html(result, link_entities={"e_rec"}) |
| assert 'href="entities/e_rec.html"' in html |
| |
| assert 'href="entities/e_oneoff.html"' not in html |
| assert "<code>e_oneoff</code>" in html |
|
|
|
|
| |
|
|
| from delivery.backtest import ( |
| _compute_event_details, |
| _fmt_delta_pct, |
| _fmt_num, |
| ) |
|
|
|
|
| def _cfg_for_details_test(): |
| return EntityTypeConfig( |
| entity_type="thing", |
| metrics=[MetricConfig("m", HIGHER_IS_WORSE, weight=1.0, |
| feeds_anomaly=True, feeds_stability=True)], |
| baseline_window=5, baseline_lag=2, recent_window=5, |
| ) |
|
|
|
|
| def test_compute_event_details_baseline_vs_recent(): |
| """The recent mean reflects the recent_window before the cursor, |
| the baseline mean reflects the baseline_window before that (with a |
| gap of baseline_lag days). Verified by constructing a series |
| where the two halves are obviously different.""" |
| cfg = _cfg_for_details_test() |
| |
| |
| from datetime import date, timedelta |
| obs = [] |
| for i in range(12): |
| v = 200.0 if i >= 7 else 100.0 |
| obs.append(Observation("e", (date(2026, 1, 1) + timedelta(days=i)) |
| .isoformat(), {"m": v})) |
| details = _compute_event_details(obs, "e", "2026-01-12", cfg) |
| assert "m" in details |
| d = details["m"] |
| assert d["baseline_mean"] == pytest.approx(100.0) |
| assert d["recent_mean"] == pytest.approx(200.0) |
| assert d["delta_pct"] == pytest.approx(100.0) |
|
|
|
|
| def test_compute_event_details_handles_zero_baseline(): |
| """If baseline mean is zero, delta% is None (would divide by 0). |
| The renderer formats that as an em-dash.""" |
| cfg = _cfg_for_details_test() |
| from datetime import date, timedelta |
| obs = [] |
| for i in range(12): |
| v = 0.0 if i < 7 else 5.0 |
| obs.append(Observation("e", (date(2026, 1, 1) + timedelta(days=i)) |
| .isoformat(), {"m": v})) |
| details = _compute_event_details(obs, "e", "2026-01-12", cfg) |
| d = details["m"] |
| assert d["baseline_mean"] == 0.0 |
| assert d["recent_mean"] == pytest.approx(5.0) |
| assert d["delta_pct"] is None |
|
|
|
|
| def test_compute_event_details_other_entities_excluded(): |
| """An entity's details must not pull in another entity's numbers.""" |
| cfg = _cfg_for_details_test() |
| from datetime import date, timedelta |
| obs = [] |
| for i in range(12): |
| for entity_id, v in (("target", 50.0), |
| ("noise", 9999.0)): |
| obs.append(Observation(entity_id, |
| (date(2026, 1, 1) |
| + timedelta(days=i)).isoformat(), |
| {"m": v})) |
| details = _compute_event_details(obs, "target", "2026-01-12", cfg) |
| |
| assert details["m"]["baseline_mean"] == pytest.approx(50.0) |
| assert details["m"]["recent_mean"] == pytest.approx(50.0) |
|
|
|
|
| def test_what_changed_appears_in_entity_detail_html(): |
| """Run a real backtest with the details path active, then verify |
| the entity drill-down renders the 'What changed' section.""" |
| cfg = _short_window_config() |
| obs = _drifting_after("e", 30, onset=10, delta=20.0) |
| result = run_backtest(obs, cfg, step_days=3) |
| html = render_entity_detail_html(result, "e") |
| assert "What changed" in html |
| assert 'class="changed-block"' in html |
| assert "Baseline mean" in html |
| assert "Recent mean" in html |
|
|
|
|
| def test_what_changed_section_absent_when_no_events(): |
| cfg = _short_window_config() |
| obs = _healthy("e", 30) |
| result = run_backtest(obs, cfg, step_days=3) |
| html = render_entity_detail_html(result, "e") |
| assert "What changed" not in html |
|
|
|
|
| def test_fmt_helpers(): |
| assert _fmt_num(None) == "—" |
| assert _fmt_num(1234.5) == "1234.5" |
| assert _fmt_num(0.123) == "0.123" |
| assert _fmt_delta_pct(None) == "—" |
| assert _fmt_delta_pct(5.0) == "+5.0%" |
| assert _fmt_delta_pct(-3.2) == "-3.2%" |
|
|
|
|
| def test_slug_makes_unsafe_entity_ids_filesystem_safe(): |
| """A real customer might use ids containing '/', '..', or spaces; |
| we must not let those become writable paths or path-traversal.""" |
| assert _slug("acc/123") == "acc_123" |
| assert _slug("../etc/passwd") == ".._etc_passwd" |
| assert _slug("with spaces") == "with_spaces" |
| |
| assert _slug("a-b_c.d99") == "a-b_c.d99" |
|
|