"""Tests for generation quality — validates Flash responses across full query spectrum. 30 queries covering: geographic, thematic, temporal, people, genre, awards, edge cases. Each test checks: links present, relevant content mentioned, no hallucination. """ import pytest from mediastorm.rag.generator import generate_response from mediastorm.rag.retriever import HybridRetriever from mediastorm.vectorize.store import VectorStore from mediastorm.vectorize.embedder import Embedder from mediastorm.vectorize.bm25_store import BM25Store from mediastorm.rag.router import QueryRouter from mediastorm.config import CHROMADB_PATH, BM25_INDEX_PATH @pytest.fixture(autouse=True) def _reset_gemini_client(): """Reset global Gemini client between tests to avoid event loop issues.""" import mediastorm.rag.generator as gen gen._client = None yield gen._client = None @pytest.fixture(scope="module") def retriever(): import os if not os.environ.get("GEMINI_API_KEY"): pytest.skip("GEMINI_API_KEY not set") if not CHROMADB_PATH.exists(): pytest.skip("ChromaDB not built") store = VectorStore(path=CHROMADB_PATH) embedder = Embedder() bm25 = BM25Store(path=BM25_INDEX_PATH) bm25.load() return HybridRetriever( vector_store=store, bm25_store=bm25, embedder=embedder, router=QueryRouter(), top_k_final=5, ) async def _ask(retriever, query: str) -> str: result = await retriever.retrieve(query) return await generate_response(query, result, []) def _has_link(response: str) -> bool: return "https://www.mediastorm.com/" in response def _has_any(response: str, terms: list[str]) -> bool: lower = response.lower() return any(t.lower() in lower for t in terms) def _says_nothing_found(response: str) -> bool: return _has_any(response, [ "no stor", "not contain", "does not", "no relevant", "no direct", "no specific", "no primary", "not primarily", "doesn't contain", "do not have", ]) # ------------------------------------------------------------------------- # GEOGRAPHIC (5) # ------------------------------------------------------------------------- class TestGeographic: @pytest.mark.asyncio async def test_congo_war(self, retriever): r = await _ask(retriever, "Stories about the war in Congo") assert _has_link(r) assert _has_any(r, ["Congo", "Condition: Critical"]) @pytest.mark.asyncio async def test_afghanistan(self, retriever): r = await _ask(retriever, "Documentaries set in Afghanistan") assert _has_link(r) assert _has_any(r, ["Afghanistan", "Darkness Visible", "Taliban"]) @pytest.mark.asyncio async def test_east_africa(self, retriever): r = await _ask(retriever, "Stories about East Africa") assert _has_link(r) assert _has_any(r, ["Kenya", "Ethiopia", "Somalia", "Africa"]) @pytest.mark.asyncio async def test_latin_america(self, retriever): r = await _ask(retriever, "Stories filmed in Latin America or Mexico") assert _has_link(r) assert _has_any(r, ["Mexico", "Peru", "Cuba", "Latin America", "Tequila"]) @pytest.mark.asyncio async def test_israel_palestine(self, retriever): r = await _ask(retriever, "Stories about the Israeli-Palestinian conflict") assert _has_link(r) assert _has_any(r, ["Israel", "Palestin", "Crisis Guide"]) # ------------------------------------------------------------------------- # THEMATIC (5) # ------------------------------------------------------------------------- class TestThematic: @pytest.mark.asyncio async def test_ptsd_veterans(self, retriever): r = await _ask(retriever, "Stories about PTSD and veterans returning from war") assert _has_link(r) assert _has_any(r, ["veteran", "PTSD", "soldier", "war", "marine"]) @pytest.mark.asyncio async def test_climate_change(self, retriever): r = await _ask(retriever, "Climate change and environmental destruction") assert _has_link(r) assert _has_any(r, ["climate", "environment", "glacier", "water", "mining"]) @pytest.mark.asyncio async def test_womens_rights(self, retriever): r = await _ask(retriever, "Child marriage and women's rights") assert _has_link(r) assert _has_any(r, ["marriage", "women", "bride", "girl", "violence"]) @pytest.mark.asyncio async def test_wildlife(self, retriever): r = await _ask(retriever, "Wildlife conservation and endangered species") assert _has_link(r) assert _has_any(r, ["wildlife", "conservation", "rhino", "elephant", "gorilla", "ivory", "fox"]) @pytest.mark.asyncio async def test_immigration(self, retriever): r = await _ask(retriever, "Immigration and refugee stories") assert _has_link(r) assert _has_any(r, ["immigra", "refugee", "migration", "crossing", "undocumented"]) # ------------------------------------------------------------------------- # TEMPORAL (4) # ------------------------------------------------------------------------- class TestTemporal: @pytest.mark.asyncio async def test_earliest_stories(self, retriever): r = await _ask(retriever, "MediaStorm's earliest stories from 2005-2006") assert _has_link(r) or _says_nothing_found(r) @pytest.mark.asyncio async def test_recent_stories(self, retriever): r = await _ask(retriever, "Recent stories from 2022 to 2025") assert _has_link(r) or _says_nothing_found(r) @pytest.mark.asyncio async def test_financial_crisis(self, retriever): r = await _ask(retriever, "Stories from the 2008 financial crisis era") assert _has_link(r) assert _has_any(r, ["crisis", "econom", "financial", "Times of Crisis"]) @pytest.mark.asyncio async def test_around_2010(self, retriever): r = await _ask(retriever, "Stories published around 2010") assert _has_link(r) or _says_nothing_found(r) # ------------------------------------------------------------------------- # PEOPLE (4) # ------------------------------------------------------------------------- class TestPeople: @pytest.mark.asyncio async def test_salgado(self, retriever): r = await _ask(retriever, "Stories about Sebastiao Salgado") assert _has_link(r) assert _has_any(r, ["Salgado"]) @pytest.mark.asyncio async def test_don_mccullin(self, retriever): r = await _ask(retriever, "Stories featuring Don McCullin") assert _has_link(r) assert _has_any(r, ["McCullin"]) @pytest.mark.asyncio async def test_ai_weiwei(self, retriever): r = await _ask(retriever, "Stories about Ai Weiwei") assert _has_link(r) assert _has_any(r, ["Weiwei", "Ai Wei"]) @pytest.mark.asyncio async def test_angelina_jolie(self, retriever): r = await _ask(retriever, "Stories about Angelina Jolie") assert _has_link(r) assert _has_any(r, ["Jolie", "Angelina"]) # ------------------------------------------------------------------------- # GENRE / FORMAT (4) # ------------------------------------------------------------------------- class TestGenre: @pytest.mark.asyncio async def test_photo_essays(self, retriever): r = await _ask(retriever, "Photo essays in the archive") assert _has_link(r) assert _has_any(r, ["photo essay", "photo"]) @pytest.mark.asyncio async def test_crisis_guides(self, retriever): r = await _ask(retriever, "Interactive multimedia projects or crisis guides") assert _has_link(r) assert _has_any(r, ["crisis guide", "interactive", "multimedia"]) @pytest.mark.asyncio async def test_family_aging(self, retriever): r = await _ask(retriever, "Documentaries about family and aging") assert _has_link(r) assert _has_any(r, ["family", "aging", "dementia", "caregiv", "alzheimer"]) @pytest.mark.asyncio async def test_animation(self, retriever): r = await _ask(retriever, "Animated or motion design pieces") assert _has_link(r) or _says_nothing_found(r) # ------------------------------------------------------------------------- # AWARDS (4) # ------------------------------------------------------------------------- class TestAwards: @pytest.mark.asyncio async def test_emmy_winners(self, retriever): r = await _ask(retriever, "Emmy award winning stories") assert _has_any(r, ["Emmy", "award"]) @pytest.mark.asyncio async def test_world_press_photo(self, retriever): r = await _ask(retriever, "World Press Photo winners") assert _has_any(r, ["World Press", "award", "photo"]) @pytest.mark.asyncio async def test_iraq_war_awards(self, retriever): r = await _ask(retriever, "Award-winning stories about the Iraq war") assert _has_any(r, ["Iraq", "war", "award", "Marlboro"]) @pytest.mark.asyncio async def test_webby_awards(self, retriever): r = await _ask(retriever, "Stories that won at Webby Awards") assert _has_any(r, ["Webby", "award"]) # ------------------------------------------------------------------------- # EDGE CASES — should return nothing relevant (4) # ------------------------------------------------------------------------- class TestEdgeCases: @pytest.mark.asyncio async def test_minecraft_redstone(self, retriever): r = await _ask(retriever, "Best Minecraft redstone contraptions and tutorials") assert _says_nothing_found(r) @pytest.mark.asyncio async def test_pasta_recipes(self, retriever): r = await _ask(retriever, "Best Italian pasta recipes from Tuscany") assert _says_nothing_found(r) @pytest.mark.asyncio async def test_taylor_swift(self, retriever): r = await _ask(retriever, "Taylor Swift concert tour dates") assert _says_nothing_found(r) @pytest.mark.asyncio async def test_crypto_trading(self, retriever): r = await _ask(retriever, "Stock market trading strategies and cryptocurrency") assert _says_nothing_found(r)