mediastorm / tests /test_generation.py
remdms's picture
fix(test): replace quantum_computing edge case with minecraft_redstone
f84669d
"""Tests for generation quality — validates Flash responses across full query spectrum.
30 queries covering: geographic, thematic, temporal, people, genre, awards, edge cases.
Each test checks: links present, relevant content mentioned, no hallucination.
"""
import pytest
from mediastorm.rag.generator import generate_response
from mediastorm.rag.retriever import HybridRetriever
from mediastorm.vectorize.store import VectorStore
from mediastorm.vectorize.embedder import Embedder
from mediastorm.vectorize.bm25_store import BM25Store
from mediastorm.rag.router import QueryRouter
from mediastorm.config import CHROMADB_PATH, BM25_INDEX_PATH
@pytest.fixture(autouse=True)
def _reset_gemini_client():
"""Reset global Gemini client between tests to avoid event loop issues."""
import mediastorm.rag.generator as gen
gen._client = None
yield
gen._client = None
@pytest.fixture(scope="module")
def retriever():
import os
if not os.environ.get("GEMINI_API_KEY"):
pytest.skip("GEMINI_API_KEY not set")
if not CHROMADB_PATH.exists():
pytest.skip("ChromaDB not built")
store = VectorStore(path=CHROMADB_PATH)
embedder = Embedder()
bm25 = BM25Store(path=BM25_INDEX_PATH)
bm25.load()
return HybridRetriever(
vector_store=store,
bm25_store=bm25,
embedder=embedder,
router=QueryRouter(),
top_k_final=5,
)
async def _ask(retriever, query: str) -> str:
result = await retriever.retrieve(query)
return await generate_response(query, result, [])
def _has_link(response: str) -> bool:
return "https://www.mediastorm.com/" in response
def _has_any(response: str, terms: list[str]) -> bool:
lower = response.lower()
return any(t.lower() in lower for t in terms)
def _says_nothing_found(response: str) -> bool:
return _has_any(response, [
"no stor", "not contain", "does not", "no relevant",
"no direct", "no specific", "no primary", "not primarily",
"doesn't contain", "do not have",
])
# -------------------------------------------------------------------------
# GEOGRAPHIC (5)
# -------------------------------------------------------------------------
class TestGeographic:
@pytest.mark.asyncio
async def test_congo_war(self, retriever):
r = await _ask(retriever, "Stories about the war in Congo")
assert _has_link(r)
assert _has_any(r, ["Congo", "Condition: Critical"])
@pytest.mark.asyncio
async def test_afghanistan(self, retriever):
r = await _ask(retriever, "Documentaries set in Afghanistan")
assert _has_link(r)
assert _has_any(r, ["Afghanistan", "Darkness Visible", "Taliban"])
@pytest.mark.asyncio
async def test_east_africa(self, retriever):
r = await _ask(retriever, "Stories about East Africa")
assert _has_link(r)
assert _has_any(r, ["Kenya", "Ethiopia", "Somalia", "Africa"])
@pytest.mark.asyncio
async def test_latin_america(self, retriever):
r = await _ask(retriever, "Stories filmed in Latin America or Mexico")
assert _has_link(r)
assert _has_any(r, ["Mexico", "Peru", "Cuba", "Latin America", "Tequila"])
@pytest.mark.asyncio
async def test_israel_palestine(self, retriever):
r = await _ask(retriever, "Stories about the Israeli-Palestinian conflict")
assert _has_link(r)
assert _has_any(r, ["Israel", "Palestin", "Crisis Guide"])
# -------------------------------------------------------------------------
# THEMATIC (5)
# -------------------------------------------------------------------------
class TestThematic:
@pytest.mark.asyncio
async def test_ptsd_veterans(self, retriever):
r = await _ask(retriever, "Stories about PTSD and veterans returning from war")
assert _has_link(r)
assert _has_any(r, ["veteran", "PTSD", "soldier", "war", "marine"])
@pytest.mark.asyncio
async def test_climate_change(self, retriever):
r = await _ask(retriever, "Climate change and environmental destruction")
assert _has_link(r)
assert _has_any(r, ["climate", "environment", "glacier", "water", "mining"])
@pytest.mark.asyncio
async def test_womens_rights(self, retriever):
r = await _ask(retriever, "Child marriage and women's rights")
assert _has_link(r)
assert _has_any(r, ["marriage", "women", "bride", "girl", "violence"])
@pytest.mark.asyncio
async def test_wildlife(self, retriever):
r = await _ask(retriever, "Wildlife conservation and endangered species")
assert _has_link(r)
assert _has_any(r, ["wildlife", "conservation", "rhino", "elephant", "gorilla", "ivory", "fox"])
@pytest.mark.asyncio
async def test_immigration(self, retriever):
r = await _ask(retriever, "Immigration and refugee stories")
assert _has_link(r)
assert _has_any(r, ["immigra", "refugee", "migration", "crossing", "undocumented"])
# -------------------------------------------------------------------------
# TEMPORAL (4)
# -------------------------------------------------------------------------
class TestTemporal:
@pytest.mark.asyncio
async def test_earliest_stories(self, retriever):
r = await _ask(retriever, "MediaStorm's earliest stories from 2005-2006")
assert _has_link(r) or _says_nothing_found(r)
@pytest.mark.asyncio
async def test_recent_stories(self, retriever):
r = await _ask(retriever, "Recent stories from 2022 to 2025")
assert _has_link(r) or _says_nothing_found(r)
@pytest.mark.asyncio
async def test_financial_crisis(self, retriever):
r = await _ask(retriever, "Stories from the 2008 financial crisis era")
assert _has_link(r)
assert _has_any(r, ["crisis", "econom", "financial", "Times of Crisis"])
@pytest.mark.asyncio
async def test_around_2010(self, retriever):
r = await _ask(retriever, "Stories published around 2010")
assert _has_link(r) or _says_nothing_found(r)
# -------------------------------------------------------------------------
# PEOPLE (4)
# -------------------------------------------------------------------------
class TestPeople:
@pytest.mark.asyncio
async def test_salgado(self, retriever):
r = await _ask(retriever, "Stories about Sebastiao Salgado")
assert _has_link(r)
assert _has_any(r, ["Salgado"])
@pytest.mark.asyncio
async def test_don_mccullin(self, retriever):
r = await _ask(retriever, "Stories featuring Don McCullin")
assert _has_link(r)
assert _has_any(r, ["McCullin"])
@pytest.mark.asyncio
async def test_ai_weiwei(self, retriever):
r = await _ask(retriever, "Stories about Ai Weiwei")
assert _has_link(r)
assert _has_any(r, ["Weiwei", "Ai Wei"])
@pytest.mark.asyncio
async def test_angelina_jolie(self, retriever):
r = await _ask(retriever, "Stories about Angelina Jolie")
assert _has_link(r)
assert _has_any(r, ["Jolie", "Angelina"])
# -------------------------------------------------------------------------
# GENRE / FORMAT (4)
# -------------------------------------------------------------------------
class TestGenre:
@pytest.mark.asyncio
async def test_photo_essays(self, retriever):
r = await _ask(retriever, "Photo essays in the archive")
assert _has_link(r)
assert _has_any(r, ["photo essay", "photo"])
@pytest.mark.asyncio
async def test_crisis_guides(self, retriever):
r = await _ask(retriever, "Interactive multimedia projects or crisis guides")
assert _has_link(r)
assert _has_any(r, ["crisis guide", "interactive", "multimedia"])
@pytest.mark.asyncio
async def test_family_aging(self, retriever):
r = await _ask(retriever, "Documentaries about family and aging")
assert _has_link(r)
assert _has_any(r, ["family", "aging", "dementia", "caregiv", "alzheimer"])
@pytest.mark.asyncio
async def test_animation(self, retriever):
r = await _ask(retriever, "Animated or motion design pieces")
assert _has_link(r) or _says_nothing_found(r)
# -------------------------------------------------------------------------
# AWARDS (4)
# -------------------------------------------------------------------------
class TestAwards:
@pytest.mark.asyncio
async def test_emmy_winners(self, retriever):
r = await _ask(retriever, "Emmy award winning stories")
assert _has_any(r, ["Emmy", "award"])
@pytest.mark.asyncio
async def test_world_press_photo(self, retriever):
r = await _ask(retriever, "World Press Photo winners")
assert _has_any(r, ["World Press", "award", "photo"])
@pytest.mark.asyncio
async def test_iraq_war_awards(self, retriever):
r = await _ask(retriever, "Award-winning stories about the Iraq war")
assert _has_any(r, ["Iraq", "war", "award", "Marlboro"])
@pytest.mark.asyncio
async def test_webby_awards(self, retriever):
r = await _ask(retriever, "Stories that won at Webby Awards")
assert _has_any(r, ["Webby", "award"])
# -------------------------------------------------------------------------
# EDGE CASES — should return nothing relevant (4)
# -------------------------------------------------------------------------
class TestEdgeCases:
@pytest.mark.asyncio
async def test_minecraft_redstone(self, retriever):
r = await _ask(retriever, "Best Minecraft redstone contraptions and tutorials")
assert _says_nothing_found(r)
@pytest.mark.asyncio
async def test_pasta_recipes(self, retriever):
r = await _ask(retriever, "Best Italian pasta recipes from Tuscany")
assert _says_nothing_found(r)
@pytest.mark.asyncio
async def test_taylor_swift(self, retriever):
r = await _ask(retriever, "Taylor Swift concert tour dates")
assert _says_nothing_found(r)
@pytest.mark.asyncio
async def test_crypto_trading(self, retriever):
r = await _ask(retriever, "Stock market trading strategies and cryptocurrency")
assert _says_nothing_found(r)