rag-qa-command-cente / tests /test_analytics_contracts.py
Tarek Masryo
chore: update project files
6bef416
from __future__ import annotations
from src.analytics import (
MIN_CONFIG_N,
config_leaderboard,
make_decision_brief,
overview_metrics,
policy_at_threshold,
policy_curve,
retrieval_outcomes,
risk_slices,
top_examples,
)
from src.data import DataBundle, filter_eval, filter_retrieval_events
def test_overview_metrics_shape(bundle: DataBundle) -> None:
metrics = overview_metrics(bundle.eval_runs, bundle.documents, bundle.chunks, bundle.retrieval_events)
assert {"evaluations", "correct_rate", "hallucination_rate", "recall_at_10", "mrr_at_10"}.issubset(metrics)
def test_risk_slices_have_risk_score(bundle: DataBundle) -> None:
out = risk_slices(bundle.eval_runs)
assert not out.empty and "risk_score" in out.columns
def test_retrieval_outcomes_have_failure_modes(bundle: DataBundle) -> None:
out = retrieval_outcomes(bundle.eval_runs)
assert not out.empty and "failure_mode" in out.columns
def test_config_leaderboard_has_config_and_score(bundle: DataBundle) -> None:
out = config_leaderboard(bundle.eval_runs, min_n=MIN_CONFIG_N)
assert not out.empty and {"config", "score", "correct_rate", "hallucination_rate"}.issubset(out.columns)
def test_policy_curve_has_required_columns(bundle: DataBundle) -> None:
out = policy_curve(bundle.eval_runs)
assert {"threshold", "auto_approve_rate", "review_queue_size", "risk_captured_in_review"}.issubset(out.columns)
def test_policy_at_threshold_returns_decision_metrics(bundle: DataBundle) -> None:
out = policy_at_threshold(bundle.eval_runs, 0.55, reference_df=bundle.eval_runs)
assert {"auto_approve_rate", "review_queue_size", "auto_correct_rate", "risk_captured_in_review"}.issubset(out)
def test_decision_brief_has_action(bundle: DataBundle) -> None:
brief = make_decision_brief(bundle.eval_runs, bundle.documents, bundle.chunks, bundle.retrieval_events)
assert brief.recommended_action
def test_high_risk_examples_are_returned(bundle: DataBundle) -> None:
out = top_examples(bundle.eval_runs, mode="High risk", n=20, reference_df=bundle.eval_runs)
assert 0 < len(out) <= 20
def test_domain_filter_keeps_retrieval_alignment(bundle: DataBundle) -> None:
filtered_eval = filter_eval(bundle.eval_runs, domains=["financial_reports"])
filtered_retrieval = filter_retrieval_events(bundle.retrieval_events, filtered_eval)
assert set(filtered_retrieval["example_id"].astype(str)).issubset(set(filtered_eval["example_id"].astype(str)))
def test_empty_filter_returns_empty_retrieval(bundle: DataBundle) -> None:
filtered_eval = filter_eval(bundle.eval_runs, domains=["missing_domain"])
filtered_retrieval = filter_retrieval_events(bundle.retrieval_events, filtered_eval)
assert filtered_eval.empty and filtered_retrieval.empty