"""Evaluate RAG retrieval quality with golden queries.""" import json import os import pytest GOLDEN_QUERIES = [ { "query": "what should workers do in extreme heat", "expected_themes": ["shade", "water", "rest", "break"], "category": "safety", }, { "query": "Jangwani informal settlement heat risk", "expected_themes": ["Jangwani", "informal", "tin roof"], "zone_id": "DAR-JAN", "category": "zone", }, { "query": "how does parametric insurance payout automatically", "expected_themes": ["parametric", "automatic", "trigger", "payout"], "category": "insurance", }, { "query": "emergency phone numbers Nairobi", "expected_themes": ["Red Cross", "ambulance", "999", "Nairobi"], "category": "emergency", }, { "query": "critical heat alert actions stop work immediately", "expected_themes": ["critical", "stop", "emergency", "medical"], "category": "actions", }, { "query": "Kibera corrugated tin roofs urban heat", "expected_themes": ["Kibera", "tin", "heat"], "zone_id": "NBO-KIB", "category": "zone", }, { "query": "heat stroke dehydration symptoms workers", "expected_themes": ["heat", "water", "hydration"], "category": "safety", }, { "query": "Swahili translation heat warning joto", "expected_themes": ["joto", "Swahili", "tahadhari"], "category": "language", }, { "query": "basis risk what if trigger wrong", "expected_themes": ["basis risk", "false"], "category": "insurance", }, { "query": "warning level heat alert two consecutive days", "expected_themes": ["warning", "consecutive", "reduce"], "category": "actions", }, ] @pytest.fixture def retriever(): try: from src.explanation.rag_provider import HybridRetriever return HybridRetriever() except Exception as e: pytest.skip(f"RAG index not built: {e}") def test_retrieval_returns_results(retriever): """Each golden query should return at least 1 result.""" for case in GOLDEN_QUERIES: docs = retriever.retrieve(case["query"], zone_id=case.get("zone_id"), top_k=5) assert len(docs) > 0, f"No results for: {case['query']}" def test_retrieval_theme_coverage(retriever): """Retrieved docs should contain expected themes.""" results = [] for case in GOLDEN_QUERIES: docs = retriever.retrieve(case["query"], zone_id=case.get("zone_id"), top_k=5) combined = " ".join(docs).lower() themes_found = [t for t in case["expected_themes"] if t.lower() in combined] coverage = len(themes_found) / len(case["expected_themes"]) results.append({ "query": case["query"], "category": case["category"], "themes_expected": case["expected_themes"], "themes_found": themes_found, "coverage": round(coverage, 2), "docs_returned": len(docs), }) os.makedirs("tests/eval_results", exist_ok=True) with open("tests/eval_results/rag_eval.json", "w") as f: json.dump(results, f, indent=2) # At least 70% of queries should have >50% theme coverage good = sum(1 for r in results if r["coverage"] >= 0.5) assert good >= len(results) * 0.7, f"Only {good}/{len(results)} queries have 50%+ theme coverage" def test_zone_boosting(retriever): """Zone-specific queries should rank the matching zone doc higher.""" docs_with_boost = retriever.retrieve( "heat risk informal settlement", zone_id="DAR-JAN", top_k=5 ) docs_without_boost = retriever.retrieve( "heat risk informal settlement", top_k=5 ) # With zone boost, DAR-JAN content should appear earlier def contains_jangwani(docs): for i, d in enumerate(docs): if "jangwani" in d.lower() or "dar-jan" in d.lower(): return i return len(docs) rank_with = contains_jangwani(docs_with_boost) rank_without = contains_jangwani(docs_without_boost) assert rank_with <= rank_without, "Zone boosting should improve rank of matching zone"