Spaces:
Sleeping
Sleeping
| """Tests for generation quality — validates Flash responses across full query spectrum. | |
| 30 queries covering: geographic, thematic, temporal, people, genre, awards, edge cases. | |
| Each test checks: links present, relevant content mentioned, no hallucination. | |
| """ | |
| import pytest | |
| from mediastorm.rag.generator import generate_response | |
| from mediastorm.rag.retriever import HybridRetriever | |
| from mediastorm.vectorize.store import VectorStore | |
| from mediastorm.vectorize.embedder import Embedder | |
| from mediastorm.vectorize.bm25_store import BM25Store | |
| from mediastorm.rag.router import QueryRouter | |
| from mediastorm.config import CHROMADB_PATH, BM25_INDEX_PATH | |
| def _reset_gemini_client(): | |
| """Reset global Gemini client between tests to avoid event loop issues.""" | |
| import mediastorm.rag.generator as gen | |
| gen._client = None | |
| yield | |
| gen._client = None | |
| def retriever(): | |
| import os | |
| if not os.environ.get("GEMINI_API_KEY"): | |
| pytest.skip("GEMINI_API_KEY not set") | |
| if not CHROMADB_PATH.exists(): | |
| pytest.skip("ChromaDB not built") | |
| store = VectorStore(path=CHROMADB_PATH) | |
| embedder = Embedder() | |
| bm25 = BM25Store(path=BM25_INDEX_PATH) | |
| bm25.load() | |
| return HybridRetriever( | |
| vector_store=store, | |
| bm25_store=bm25, | |
| embedder=embedder, | |
| router=QueryRouter(), | |
| top_k_final=5, | |
| ) | |
| async def _ask(retriever, query: str) -> str: | |
| result = await retriever.retrieve(query) | |
| return await generate_response(query, result, []) | |
| def _has_link(response: str) -> bool: | |
| return "https://www.mediastorm.com/" in response | |
| def _has_any(response: str, terms: list[str]) -> bool: | |
| lower = response.lower() | |
| return any(t.lower() in lower for t in terms) | |
| def _says_nothing_found(response: str) -> bool: | |
| return _has_any(response, [ | |
| "no stor", "not contain", "does not", "no relevant", | |
| "no direct", "no specific", "no primary", "not primarily", | |
| "doesn't contain", "do not have", | |
| ]) | |
| # ------------------------------------------------------------------------- | |
| # GEOGRAPHIC (5) | |
| # ------------------------------------------------------------------------- | |
| class TestGeographic: | |
| async def test_congo_war(self, retriever): | |
| r = await _ask(retriever, "Stories about the war in Congo") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Congo", "Condition: Critical"]) | |
| async def test_afghanistan(self, retriever): | |
| r = await _ask(retriever, "Documentaries set in Afghanistan") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Afghanistan", "Darkness Visible", "Taliban"]) | |
| async def test_east_africa(self, retriever): | |
| r = await _ask(retriever, "Stories about East Africa") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Kenya", "Ethiopia", "Somalia", "Africa"]) | |
| async def test_latin_america(self, retriever): | |
| r = await _ask(retriever, "Stories filmed in Latin America or Mexico") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Mexico", "Peru", "Cuba", "Latin America", "Tequila"]) | |
| async def test_israel_palestine(self, retriever): | |
| r = await _ask(retriever, "Stories about the Israeli-Palestinian conflict") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Israel", "Palestin", "Crisis Guide"]) | |
| # ------------------------------------------------------------------------- | |
| # THEMATIC (5) | |
| # ------------------------------------------------------------------------- | |
| class TestThematic: | |
| async def test_ptsd_veterans(self, retriever): | |
| r = await _ask(retriever, "Stories about PTSD and veterans returning from war") | |
| assert _has_link(r) | |
| assert _has_any(r, ["veteran", "PTSD", "soldier", "war", "marine"]) | |
| async def test_climate_change(self, retriever): | |
| r = await _ask(retriever, "Climate change and environmental destruction") | |
| assert _has_link(r) | |
| assert _has_any(r, ["climate", "environment", "glacier", "water", "mining"]) | |
| async def test_womens_rights(self, retriever): | |
| r = await _ask(retriever, "Child marriage and women's rights") | |
| assert _has_link(r) | |
| assert _has_any(r, ["marriage", "women", "bride", "girl", "violence"]) | |
| async def test_wildlife(self, retriever): | |
| r = await _ask(retriever, "Wildlife conservation and endangered species") | |
| assert _has_link(r) | |
| assert _has_any(r, ["wildlife", "conservation", "rhino", "elephant", "gorilla", "ivory", "fox"]) | |
| async def test_immigration(self, retriever): | |
| r = await _ask(retriever, "Immigration and refugee stories") | |
| assert _has_link(r) | |
| assert _has_any(r, ["immigra", "refugee", "migration", "crossing", "undocumented"]) | |
| # ------------------------------------------------------------------------- | |
| # TEMPORAL (4) | |
| # ------------------------------------------------------------------------- | |
| class TestTemporal: | |
| async def test_earliest_stories(self, retriever): | |
| r = await _ask(retriever, "MediaStorm's earliest stories from 2005-2006") | |
| assert _has_link(r) or _says_nothing_found(r) | |
| async def test_recent_stories(self, retriever): | |
| r = await _ask(retriever, "Recent stories from 2022 to 2025") | |
| assert _has_link(r) or _says_nothing_found(r) | |
| async def test_financial_crisis(self, retriever): | |
| r = await _ask(retriever, "Stories from the 2008 financial crisis era") | |
| assert _has_link(r) | |
| assert _has_any(r, ["crisis", "econom", "financial", "Times of Crisis"]) | |
| async def test_around_2010(self, retriever): | |
| r = await _ask(retriever, "Stories published around 2010") | |
| assert _has_link(r) or _says_nothing_found(r) | |
| # ------------------------------------------------------------------------- | |
| # PEOPLE (4) | |
| # ------------------------------------------------------------------------- | |
| class TestPeople: | |
| async def test_salgado(self, retriever): | |
| r = await _ask(retriever, "Stories about Sebastiao Salgado") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Salgado"]) | |
| async def test_don_mccullin(self, retriever): | |
| r = await _ask(retriever, "Stories featuring Don McCullin") | |
| assert _has_link(r) | |
| assert _has_any(r, ["McCullin"]) | |
| async def test_ai_weiwei(self, retriever): | |
| r = await _ask(retriever, "Stories about Ai Weiwei") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Weiwei", "Ai Wei"]) | |
| async def test_angelina_jolie(self, retriever): | |
| r = await _ask(retriever, "Stories about Angelina Jolie") | |
| assert _has_link(r) | |
| assert _has_any(r, ["Jolie", "Angelina"]) | |
| # ------------------------------------------------------------------------- | |
| # GENRE / FORMAT (4) | |
| # ------------------------------------------------------------------------- | |
| class TestGenre: | |
| async def test_photo_essays(self, retriever): | |
| r = await _ask(retriever, "Photo essays in the archive") | |
| assert _has_link(r) | |
| assert _has_any(r, ["photo essay", "photo"]) | |
| async def test_crisis_guides(self, retriever): | |
| r = await _ask(retriever, "Interactive multimedia projects or crisis guides") | |
| assert _has_link(r) | |
| assert _has_any(r, ["crisis guide", "interactive", "multimedia"]) | |
| async def test_family_aging(self, retriever): | |
| r = await _ask(retriever, "Documentaries about family and aging") | |
| assert _has_link(r) | |
| assert _has_any(r, ["family", "aging", "dementia", "caregiv", "alzheimer"]) | |
| async def test_animation(self, retriever): | |
| r = await _ask(retriever, "Animated or motion design pieces") | |
| assert _has_link(r) or _says_nothing_found(r) | |
| # ------------------------------------------------------------------------- | |
| # AWARDS (4) | |
| # ------------------------------------------------------------------------- | |
| class TestAwards: | |
| async def test_emmy_winners(self, retriever): | |
| r = await _ask(retriever, "Emmy award winning stories") | |
| assert _has_any(r, ["Emmy", "award"]) | |
| async def test_world_press_photo(self, retriever): | |
| r = await _ask(retriever, "World Press Photo winners") | |
| assert _has_any(r, ["World Press", "award", "photo"]) | |
| async def test_iraq_war_awards(self, retriever): | |
| r = await _ask(retriever, "Award-winning stories about the Iraq war") | |
| assert _has_any(r, ["Iraq", "war", "award", "Marlboro"]) | |
| async def test_webby_awards(self, retriever): | |
| r = await _ask(retriever, "Stories that won at Webby Awards") | |
| assert _has_any(r, ["Webby", "award"]) | |
| # ------------------------------------------------------------------------- | |
| # EDGE CASES — should return nothing relevant (4) | |
| # ------------------------------------------------------------------------- | |
| class TestEdgeCases: | |
| async def test_minecraft_redstone(self, retriever): | |
| r = await _ask(retriever, "Best Minecraft redstone contraptions and tutorials") | |
| assert _says_nothing_found(r) | |
| async def test_pasta_recipes(self, retriever): | |
| r = await _ask(retriever, "Best Italian pasta recipes from Tuscany") | |
| assert _says_nothing_found(r) | |
| async def test_taylor_swift(self, retriever): | |
| r = await _ask(retriever, "Taylor Swift concert tour dates") | |
| assert _says_nothing_found(r) | |
| async def test_crypto_trading(self, retriever): | |
| r = await _ask(retriever, "Stock market trading strategies and cryptocurrency") | |
| assert _says_nothing_found(r) | |