"""
Tests for Stage 16 — rolling backtest.

The salesforce sample data (50 days) is shorter than the
salesforce config's baseline_window + lag + recent_window (51), so
the backtest needs its own synthetic series that's long enough for
the engine to actually have a window to evaluate. We build one with
core.MetricConfig + plain Observations rather than going through
a vertical config, so the test stays focused on the backtest logic.
"""
from typing import List

import pytest

from core import (
    HIGHER_IS_WORSE,
    EntityTypeConfig,
    MetricConfig,
    Observation,
)
from delivery.backtest import (
    BacktestPoint,
    BacktestResult,
    render_html,
    run_backtest,
)

# --- fixtures --------------------------------------------------------------

def _short_window_config():
    """Tiny windows so a short synthetic series produces multiple steps."""
    return EntityTypeConfig(
        entity_type="thing",
        metrics=[MetricConfig("m", HIGHER_IS_WORSE, weight=1.0,
                              feeds_anomaly=True, feeds_stability=True)],
        baseline_window=7, baseline_lag=2, recent_window=5,
    )


def _series(entity_id: str, values: List[float]) -> List[Observation]:
    """Build a daily series starting 2026-01-01 with the given values."""
    out = []
    for i, v in enumerate(values):
        # YYYY-MM-DD, only the first 31 days for simplicity
        from datetime import date, timedelta
        d = (date(2026, 1, 1) + timedelta(days=i)).isoformat()
        out.append(Observation(entity_id, d, {"m": float(v)}))
    return out


def _healthy(entity_id: str, n_days: int) -> List[Observation]:
    return _series(entity_id, [100.0] * n_days)


def _drifting_after(entity_id: str, n_days: int, onset: int,
                    delta: float = 8.0) -> List[Observation]:
    """A series that is flat at 100 then climbs (worse) from ``onset``."""
    vals = []
    for i in range(n_days):
        vals.append(100.0 if i < onset else 100.0 + (i - onset + 1) * delta)
    return _series(entity_id, vals)


# --- empty / degenerate inputs ------------------------------------------

def test_empty_observations_returns_empty_result():
    result = run_backtest([], _short_window_config(), step_days=7)
    assert isinstance(result, BacktestResult)
    assert result.points == []
    assert result.n_steps == 0


def test_too_short_for_one_window():
    """Series shorter than baseline + lag + recent produces zero steps —
    backtest can't compute a single drift score causally."""
    cfg = _short_window_config()
    obs = _healthy("e", 5)               # 5 days, need >= 14
    result = run_backtest(obs, cfg, step_days=7)
    assert result.n_steps == 0


# --- happy path: rolling steps ------------------------------------------

def test_steps_advance_by_step_days():
    cfg = _short_window_config()
    obs = _healthy("e", 30)              # 30 days, plenty of room
    result = run_backtest(obs, cfg, step_days=7)
    # at least: floor((30 - 14 + 1) / 7) + 1 cursors
    assert result.n_steps >= 2
    cursors = [p.cursor for p in result.points]
    # successive cursors are exactly 7 days apart
    from datetime import date
    diffs = [(date.fromisoformat(b) - date.fromisoformat(a)).days
             for a, b in zip(cursors, cursors[1:])]
    assert all(d == 7 for d in diffs)


def test_engine_does_not_see_the_future_at_any_cursor():
    """Causality test: at cursor C, the engine only sees observations
    with day <= C. We can prove this indirectly: a drifting series whose
    drift starts AFTER all our cursors should produce zero issues at
    every cursor."""
    cfg = _short_window_config()
    # 30 days; drift starts on day 28 (very near the end)
    obs = _drifting_after("e", 30, onset=28, delta=20.0)
    # Step large so cursors land at days 13, 23 — both BEFORE onset
    result = run_backtest(obs, cfg, step_days=10)
    # the cursors are at days 13 and 23 (indices); neither sees the drift
    cursor_days = {p.cursor for p in result.points}
    assert "2026-01-24" in cursor_days   # day index 23 (0-based)
    # ... and no issues at that cursor
    early = [p for p in result.points if p.cursor == "2026-01-24"][0]
    assert early.n_issues == 0


def test_drift_appears_only_after_onset():
    """The complementary causality test: as cursor advances past the
    drift onset, the engine starts to flag the entity."""
    cfg = _short_window_config()
    obs = _drifting_after("e", 30, onset=15, delta=20.0) + _healthy("h", 30)
    result = run_backtest(obs, cfg, step_days=2)
    # at SOME later cursor, 'e' should be flagged; at the first cursor
    # (day 13, before onset 15) it should NOT be flagged.
    by_cursor = {p.cursor: p for p in result.points}
    first = result.points[0]
    assert "e" not in {i["entity_id"] for i in first.issues}
    flagged_at = [p for p in result.points
                  if any(i["entity_id"] == "e" for i in p.issues)]
    assert flagged_at, "drift should surface at some cursor past onset"
    # and 'h' is never flagged (no drift to detect)
    for p in result.points:
        assert all(i["entity_id"] != "h" for i in p.issues)


def test_rollups_match_summed_points():
    cfg = _short_window_config()
    obs = _drifting_after("e", 30, onset=10, delta=15.0)
    result = run_backtest(obs, cfg, step_days=3)
    summed = sum(p.n_issues for p in result.points)
    assert result.n_issues_total == summed
    sev_summed = {s: sum(p.severity_counts.get(s, 0) for p in result.points)
                  for s in ("critical", "high", "medium", "low")}
    assert {k: v for k, v in result.per_severity_total.items() if v} \
        == {k: v for k, v in sev_summed.items() if v}


def test_unique_entities_ever_flagged_dedupes():
    cfg = _short_window_config()
    # one entity, drifting hard for many days — should be flagged in
    # many windows but counted once in the unique total.
    obs = _drifting_after("e", 30, onset=10, delta=20.0)
    result = run_backtest(obs, cfg, step_days=2)
    flagged_windows = [p for p in result.points if p.n_issues > 0]
    assert len(flagged_windows) >= 2
    assert result.n_unique_entities_ever_flagged == 1


# --- HTML render ---------------------------------------------------------

def test_render_html_returns_self_contained_doc():
    cfg = _short_window_config()
    obs = _drifting_after("e", 30, onset=10, delta=20.0)
    result = run_backtest(obs, cfg, step_days=3)
    html = render_html(result)
    assert html.startswith("<!doctype html>")
    assert html.rstrip().endswith("</html>")
    assert "OrgState backtest" in html
    # the entity that drifted should appear in the per-entity table
    assert "<code>e</code>" in html


def test_render_html_empty_result_says_so():
    cfg = _short_window_config()
    result = run_backtest(_healthy("e", 5), cfg, step_days=7)
    html = render_html(result)
    assert "No steps produced" in html


def test_render_html_severity_bars_when_issues_present():
    cfg = _short_window_config()
    obs = _drifting_after("e", 30, onset=10, delta=20.0)
    result = run_backtest(obs, cfg, step_days=3)
    html = render_html(result)
    # at least one stacked-severity bar appears
    assert 'class="bar bar-' in html


# --- recurrence detection (Stage 17) -----------------------------------

from delivery.backtest import (  # noqa: E402  (after fixtures intentionally)
    RecurringEntity,
    find_recurring_entities,
)


def _fake_result(point_specs):
    """Build a BacktestResult from a list of (cursor, [(entity_id, severity, score)])."""
    points = []
    for cursor, issues in point_specs:
        sev_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
        issue_dicts = []
        for eid, sev, score in issues:
            sev_counts[sev] = sev_counts.get(sev, 0) + 1
            issue_dicts.append({"entity_id": eid, "severity": sev,
                                 "score": score, "title": f"d {eid}"})
        points.append(BacktestPoint(
            cursor=cursor, n_states=10, n_issues=len(issues),
            severity_counts=sev_counts, issues=issue_dicts,
        ))
    return BacktestResult(
        tenant_id="t", entity_type="thing", step_days=7,
        n_steps=len(points), points=points,
    )


def test_find_recurring_below_threshold_returns_empty():
    """One-off events do not surface as recurring — that's the whole
    point of the classifier."""
    result = _fake_result([
        ("2026-01-01", [("e1", "medium", 0.4)]),
        ("2026-01-08", [("e2", "medium", 0.4)]),
        ("2026-01-15", [("e3", "medium", 0.4)]),
    ])
    assert find_recurring_entities(result, min_events=3) == []


def test_find_recurring_at_threshold():
    """An entity flagged exactly ``min_events`` times surfaces."""
    result = _fake_result([
        ("2026-01-01", [("e1", "medium", 0.4)]),
        ("2026-01-08", [("e1", "medium", 0.4)]),
        ("2026-01-15", [("e1", "medium", 0.4)]),
    ])
    [rec] = find_recurring_entities(result, min_events=3)
    assert isinstance(rec, RecurringEntity)
    assert rec.entity_id == "e1"
    assert rec.n_events == 3
    assert rec.first_cursor == "2026-01-01"
    assert rec.last_cursor == "2026-01-15"
    assert rec.cursors == ["2026-01-01", "2026-01-08", "2026-01-15"]


def test_find_recurring_top_severity_is_worst_ever():
    """``top_severity`` is the worst the entity ever hit across windows,
    not the most recent or the most common."""
    result = _fake_result([
        ("2026-01-01", [("e1", "low", 0.3)]),
        ("2026-01-08", [("e1", "critical", 0.9)]),     # worst
        ("2026-01-15", [("e1", "medium", 0.4)]),
    ])
    [rec] = find_recurring_entities(result, min_events=3)
    assert rec.top_severity == "critical"


def test_find_recurring_ordering_most_recurring_first():
    """Sort key is (-n_events, recency-desc, entity_id) — most events
    first, ties broken by who was flagged most recently."""
    result = _fake_result([
        ("2026-01-01", [("a", "medium", 0.4), ("b", "medium", 0.4)]),
        ("2026-01-08", [("a", "medium", 0.4), ("b", "medium", 0.4)]),
        ("2026-01-15", [("a", "medium", 0.4), ("b", "medium", 0.4),
                          ("c", "medium", 0.4)]),
        ("2026-01-22", [("a", "medium", 0.4)]),    # a gets 4 events
        ("2026-01-29", [("b", "medium", 0.4),
                          ("c", "medium", 0.4)]),    # b: 4, c: 2
        ("2026-02-05", [("b", "medium", 0.4),
                          ("c", "medium", 0.4)]),    # b: 5, c: 3
    ])
    recurring = find_recurring_entities(result, min_events=3)
    # b has the most events (5), then a (4), then c (3)
    assert [r.entity_id for r in recurring] == ["b", "a", "c"]


def test_recurring_callout_appears_in_rendered_html():
    """The recurrence section is visible above the timeline when there
    is anything to surface."""
    result = _fake_result([
        ("2026-01-01", [("hot_entity", "medium", 0.4)]),
        ("2026-01-08", [("hot_entity", "medium", 0.4)]),
        ("2026-01-15", [("hot_entity", "high", 0.6)]),
    ])
    html = render_html(result, min_recurring_events=3)
    assert "Recurring drift" in html
    assert "<code>hot_entity</code>" in html
    # the worst severity badge is present
    assert 'class="sev sev-high"' in html


def test_render_html_no_recurring_section_when_none():
    """If nothing is recurring the callout block is suppressed so the
    page does not show an empty section. We still see the per-entity
    table."""
    result = _fake_result([
        ("2026-01-01", [("e1", "medium", 0.4)]),
        ("2026-01-08", [("e2", "medium", 0.4)]),
    ])
    html = render_html(result, min_recurring_events=3)
    assert "Recurring drift" not in html


def test_min_events_flag_is_configurable():
    """Operators tune the threshold via min_recurring_events on render
    and min_events on the classifier — they're independently usable."""
    result = _fake_result([
        ("2026-01-01", [("e1", "medium", 0.4)]),
        ("2026-01-08", [("e1", "medium", 0.4)]),
    ])
    assert find_recurring_entities(result, min_events=3) == []
    [rec] = find_recurring_entities(result, min_events=2)
    assert rec.entity_id == "e1"


# --- per-entity drill-down (Stage 18) ----------------------------------

from delivery.backtest import (  # noqa: E402
    _slug,
    render_entity_detail_html,
)


def test_entity_detail_renders_summary_and_sparkline():
    """The drill-down shows summary + a CSS-only sparkline. We do not
    snapshot the whole HTML; we check the key story elements are there."""
    result = _fake_result([
        ("2026-01-01", [("hot", "medium", 0.42)]),
        ("2026-01-08", []),                              # quiet
        ("2026-01-15", [("hot", "high", 0.6)]),         # worst severity
        ("2026-01-22", [("hot", "medium", 0.38)]),
    ])
    html = render_entity_detail_html(result, "hot")
    assert html.startswith("<!doctype html>")
    assert "<h1>hot</h1>" in html
    # summary numbers
    assert "<strong>Events:</strong> 3" in html
    # worst severity is reported (not the most recent)
    assert "sev-high" in html
    # sparkline present — one cell per cursor
    assert html.count('class="spark-cell"') == 4
    # bars only for flagged cursors (3 of 4)
    assert html.count("spark-bar spark-") == 3
    # event table lists the flagged cursors only
    assert "2026-01-08" not in html.split('<h2>Flagged events</h2>')[-1]


def test_entity_detail_for_non_flagged_entity_says_so():
    """An entity that's never in any issue list should produce a
    cleanly-empty page, not a stack trace."""
    result = _fake_result([("2026-01-01", [("other", "medium", 0.4)])])
    html = render_entity_detail_html(result, "ghost")
    assert "<!doctype html>" in html
    assert "not flagged in any backtest window" in html


def test_entity_detail_back_link_to_backtest_page():
    """The drill-down lives at entities/X.html so the back link is
    ../backtest.html — relative, no JS needed."""
    result = _fake_result([("2026-01-01", [("x", "medium", 0.4)])])
    html = render_entity_detail_html(result, "x")
    assert 'href="../backtest.html"' in html


def test_main_html_links_entity_ids_when_link_entities_set():
    """When the CLI renders drill-down pages, the entity ids in the
    main report become links to those pages."""
    result = _fake_result([
        ("2026-01-01", [("e1", "medium", 0.4)]),
        ("2026-01-08", [("e1", "medium", 0.4)]),
        ("2026-01-15", [("e1", "medium", 0.4)]),
    ])
    linked = render_html(result, link_entities=True)
    bare = render_html(result, link_entities=False)
    # link-mode renders <a href="entities/...">
    assert 'href="entities/e1.html"' in linked
    # bare-mode keeps the entity id as plain code
    assert 'href="entities/' not in bare


def test_link_entities_can_be_a_subset():
    """Passing a set links only those entities — prevents 404s when
    only the recurring entities have drill-down pages rendered."""
    result = _fake_result([
        # e_rec recurs 3 times (rendered), e_oneoff once (not rendered)
        ("2026-01-01", [("e_rec", "medium", 0.4)]),
        ("2026-01-08", [("e_rec", "medium", 0.4)]),
        ("2026-01-15", [("e_rec", "medium", 0.4)]),
        ("2026-01-22", [("e_oneoff", "medium", 0.4)]),
    ])
    html = render_html(result, link_entities={"e_rec"})
    assert 'href="entities/e_rec.html"' in html
    # one-off entity stays plain — no broken link to a missing page
    assert 'href="entities/e_oneoff.html"' not in html
    assert "<code>e_oneoff</code>" in html


# --- "what changed" details (Stage 19) ------------------------------------

from delivery.backtest import (  # noqa: E402
    _compute_event_details,
    _fmt_delta_pct,
    _fmt_num,
)


def _cfg_for_details_test():
    return EntityTypeConfig(
        entity_type="thing",
        metrics=[MetricConfig("m", HIGHER_IS_WORSE, weight=1.0,
                              feeds_anomaly=True, feeds_stability=True)],
        baseline_window=5, baseline_lag=2, recent_window=5,
    )


def test_compute_event_details_baseline_vs_recent():
    """The recent mean reflects the recent_window before the cursor,
    the baseline mean reflects the baseline_window before that (with a
    gap of baseline_lag days). Verified by constructing a series
    where the two halves are obviously different."""
    cfg = _cfg_for_details_test()
    # 12 days: baseline mean = 100, recent mean = 200 (drift up)
    # baseline = days 0..4 -> values 100; gap days 5..6; recent = days 7..11 -> 200
    from datetime import date, timedelta
    obs = []
    for i in range(12):
        v = 200.0 if i >= 7 else 100.0
        obs.append(Observation("e", (date(2026, 1, 1) + timedelta(days=i))
                                       .isoformat(), {"m": v}))
    details = _compute_event_details(obs, "e", "2026-01-12", cfg)
    assert "m" in details
    d = details["m"]
    assert d["baseline_mean"] == pytest.approx(100.0)
    assert d["recent_mean"] == pytest.approx(200.0)
    assert d["delta_pct"] == pytest.approx(100.0)


def test_compute_event_details_handles_zero_baseline():
    """If baseline mean is zero, delta% is None (would divide by 0).
    The renderer formats that as an em-dash."""
    cfg = _cfg_for_details_test()
    from datetime import date, timedelta
    obs = []
    for i in range(12):
        v = 0.0 if i < 7 else 5.0
        obs.append(Observation("e", (date(2026, 1, 1) + timedelta(days=i))
                                       .isoformat(), {"m": v}))
    details = _compute_event_details(obs, "e", "2026-01-12", cfg)
    d = details["m"]
    assert d["baseline_mean"] == 0.0
    assert d["recent_mean"] == pytest.approx(5.0)
    assert d["delta_pct"] is None


def test_compute_event_details_other_entities_excluded():
    """An entity's details must not pull in another entity's numbers."""
    cfg = _cfg_for_details_test()
    from datetime import date, timedelta
    obs = []
    for i in range(12):
        for entity_id, v in (("target", 50.0),
                              ("noise", 9999.0)):
            obs.append(Observation(entity_id,
                                    (date(2026, 1, 1)
                                     + timedelta(days=i)).isoformat(),
                                    {"m": v}))
    details = _compute_event_details(obs, "target", "2026-01-12", cfg)
    # only "target"'s 50.0 values, never "noise"'s 9999
    assert details["m"]["baseline_mean"] == pytest.approx(50.0)
    assert details["m"]["recent_mean"] == pytest.approx(50.0)


def test_what_changed_appears_in_entity_detail_html():
    """Run a real backtest with the details path active, then verify
    the entity drill-down renders the 'What changed' section."""
    cfg = _short_window_config()
    obs = _drifting_after("e", 30, onset=10, delta=20.0)
    result = run_backtest(obs, cfg, step_days=3)
    html = render_entity_detail_html(result, "e")
    assert "What changed" in html
    assert 'class="changed-block"' in html
    assert "Baseline mean" in html
    assert "Recent mean" in html


def test_what_changed_section_absent_when_no_events():
    cfg = _short_window_config()
    obs = _healthy("e", 30)
    result = run_backtest(obs, cfg, step_days=3)
    html = render_entity_detail_html(result, "e")
    assert "What changed" not in html


def test_fmt_helpers():
    assert _fmt_num(None) == "—"
    assert _fmt_num(1234.5) == "1234.5"
    assert _fmt_num(0.123) == "0.123"
    assert _fmt_delta_pct(None) == "—"
    assert _fmt_delta_pct(5.0) == "+5.0%"
    assert _fmt_delta_pct(-3.2) == "-3.2%"


def test_slug_makes_unsafe_entity_ids_filesystem_safe():
    """A real customer might use ids containing '/', '..', or spaces;
    we must not let those become writable paths or path-traversal."""
    assert _slug("acc/123") == "acc_123"
    assert _slug("../etc/passwd") == ".._etc_passwd"
    assert _slug("with spaces") == "with_spaces"
    # alnum, dot, underscore, dash are preserved
    assert _slug("a-b_c.d99") == "a-b_c.d99"