from __future__ import annotations from src.analytics import ( MIN_CONFIG_N, config_leaderboard, make_decision_brief, overview_metrics, policy_at_threshold, policy_curve, retrieval_outcomes, risk_slices, top_examples, ) from src.data import DataBundle, filter_eval, filter_retrieval_events def test_overview_metrics_shape(bundle: DataBundle) -> None: metrics = overview_metrics(bundle.eval_runs, bundle.documents, bundle.chunks, bundle.retrieval_events) assert {"evaluations", "correct_rate", "hallucination_rate", "recall_at_10", "mrr_at_10"}.issubset(metrics) def test_risk_slices_have_risk_score(bundle: DataBundle) -> None: out = risk_slices(bundle.eval_runs) assert not out.empty and "risk_score" in out.columns def test_retrieval_outcomes_have_failure_modes(bundle: DataBundle) -> None: out = retrieval_outcomes(bundle.eval_runs) assert not out.empty and "failure_mode" in out.columns def test_config_leaderboard_has_config_and_score(bundle: DataBundle) -> None: out = config_leaderboard(bundle.eval_runs, min_n=MIN_CONFIG_N) assert not out.empty and {"config", "score", "correct_rate", "hallucination_rate"}.issubset(out.columns) def test_policy_curve_has_required_columns(bundle: DataBundle) -> None: out = policy_curve(bundle.eval_runs) assert {"threshold", "auto_approve_rate", "review_queue_size", "risk_captured_in_review"}.issubset(out.columns) def test_policy_at_threshold_returns_decision_metrics(bundle: DataBundle) -> None: out = policy_at_threshold(bundle.eval_runs, 0.55, reference_df=bundle.eval_runs) assert {"auto_approve_rate", "review_queue_size", "auto_correct_rate", "risk_captured_in_review"}.issubset(out) def test_decision_brief_has_action(bundle: DataBundle) -> None: brief = make_decision_brief(bundle.eval_runs, bundle.documents, bundle.chunks, bundle.retrieval_events) assert brief.recommended_action def test_high_risk_examples_are_returned(bundle: DataBundle) -> None: out = top_examples(bundle.eval_runs, mode="High risk", n=20, reference_df=bundle.eval_runs) assert 0 < len(out) <= 20 def test_domain_filter_keeps_retrieval_alignment(bundle: DataBundle) -> None: filtered_eval = filter_eval(bundle.eval_runs, domains=["financial_reports"]) filtered_retrieval = filter_retrieval_events(bundle.retrieval_events, filtered_eval) assert set(filtered_retrieval["example_id"].astype(str)).issubset(set(filtered_eval["example_id"].astype(str))) def test_empty_filter_returns_empty_retrieval(bundle: DataBundle) -> None: filtered_eval = filter_eval(bundle.eval_runs, domains=["missing_domain"]) filtered_retrieval = filter_retrieval_events(bundle.retrieval_events, filtered_eval) assert filtered_eval.empty and filtered_retrieval.empty