Spaces:

remdms
/

mediastorm

Sleeping

App Files Files Community

mediastorm / tests /test_generation.py

remdms

fix(test): replace quantum_computing edge case with minecraft_redstone

f84669d 2 months ago

raw

history blame contribute delete

10.3 kB

	"""Tests for generation quality — validates Flash responses across full query spectrum.

	30 queries covering: geographic, thematic, temporal, people, genre, awards, edge cases.
	Each test checks: links present, relevant content mentioned, no hallucination.
	"""
	import pytest

	from mediastorm.rag.generator import generate_response
	from mediastorm.rag.retriever import HybridRetriever
	from mediastorm.vectorize.store import VectorStore
	from mediastorm.vectorize.embedder import Embedder
	from mediastorm.vectorize.bm25_store import BM25Store
	from mediastorm.rag.router import QueryRouter
	from mediastorm.config import CHROMADB_PATH, BM25_INDEX_PATH


	@pytest.fixture(autouse=True)
	def _reset_gemini_client():
	"""Reset global Gemini client between tests to avoid event loop issues."""
	import mediastorm.rag.generator as gen
	gen._client = None
	yield
	gen._client = None


	@pytest.fixture(scope="module")
	def retriever():
	import os
	if not os.environ.get("GEMINI_API_KEY"):
	pytest.skip("GEMINI_API_KEY not set")
	if not CHROMADB_PATH.exists():
	pytest.skip("ChromaDB not built")

	store = VectorStore(path=CHROMADB_PATH)
	embedder = Embedder()
	bm25 = BM25Store(path=BM25_INDEX_PATH)
	bm25.load()
	return HybridRetriever(
	vector_store=store,
	bm25_store=bm25,
	embedder=embedder,
	router=QueryRouter(),
	top_k_final=5,
	)


	async def _ask(retriever, query: str) -> str:
	result = await retriever.retrieve(query)
	return await generate_response(query, result, [])


	def _has_link(response: str) -> bool:
	return "https://www.mediastorm.com/" in response


	def _has_any(response: str, terms: list[str]) -> bool:
	lower = response.lower()
	return any(t.lower() in lower for t in terms)


	def _says_nothing_found(response: str) -> bool:
	return _has_any(response, [
	"no stor", "not contain", "does not", "no relevant",
	"no direct", "no specific", "no primary", "not primarily",
	"doesn't contain", "do not have",
	])


	# -------------------------------------------------------------------------
	# GEOGRAPHIC (5)
	# -------------------------------------------------------------------------
	class TestGeographic:

	@pytest.mark.asyncio
	async def test_congo_war(self, retriever):
	r = await _ask(retriever, "Stories about the war in Congo")
	assert _has_link(r)
	assert _has_any(r, ["Congo", "Condition: Critical"])

	@pytest.mark.asyncio
	async def test_afghanistan(self, retriever):
	r = await _ask(retriever, "Documentaries set in Afghanistan")
	assert _has_link(r)
	assert _has_any(r, ["Afghanistan", "Darkness Visible", "Taliban"])

	@pytest.mark.asyncio
	async def test_east_africa(self, retriever):
	r = await _ask(retriever, "Stories about East Africa")
	assert _has_link(r)
	assert _has_any(r, ["Kenya", "Ethiopia", "Somalia", "Africa"])

	@pytest.mark.asyncio
	async def test_latin_america(self, retriever):
	r = await _ask(retriever, "Stories filmed in Latin America or Mexico")
	assert _has_link(r)
	assert _has_any(r, ["Mexico", "Peru", "Cuba", "Latin America", "Tequila"])

	@pytest.mark.asyncio
	async def test_israel_palestine(self, retriever):
	r = await _ask(retriever, "Stories about the Israeli-Palestinian conflict")
	assert _has_link(r)
	assert _has_any(r, ["Israel", "Palestin", "Crisis Guide"])


	# -------------------------------------------------------------------------
	# THEMATIC (5)
	# -------------------------------------------------------------------------
	class TestThematic:

	@pytest.mark.asyncio
	async def test_ptsd_veterans(self, retriever):
	r = await _ask(retriever, "Stories about PTSD and veterans returning from war")
	assert _has_link(r)
	assert _has_any(r, ["veteran", "PTSD", "soldier", "war", "marine"])

	@pytest.mark.asyncio
	async def test_climate_change(self, retriever):
	r = await _ask(retriever, "Climate change and environmental destruction")
	assert _has_link(r)
	assert _has_any(r, ["climate", "environment", "glacier", "water", "mining"])

	@pytest.mark.asyncio
	async def test_womens_rights(self, retriever):
	r = await _ask(retriever, "Child marriage and women's rights")
	assert _has_link(r)
	assert _has_any(r, ["marriage", "women", "bride", "girl", "violence"])

	@pytest.mark.asyncio
	async def test_wildlife(self, retriever):
	r = await _ask(retriever, "Wildlife conservation and endangered species")
	assert _has_link(r)
	assert _has_any(r, ["wildlife", "conservation", "rhino", "elephant", "gorilla", "ivory", "fox"])

	@pytest.mark.asyncio
	async def test_immigration(self, retriever):
	r = await _ask(retriever, "Immigration and refugee stories")
	assert _has_link(r)
	assert _has_any(r, ["immigra", "refugee", "migration", "crossing", "undocumented"])


	# -------------------------------------------------------------------------
	# TEMPORAL (4)
	# -------------------------------------------------------------------------
	class TestTemporal:

	@pytest.mark.asyncio
	async def test_earliest_stories(self, retriever):
	r = await _ask(retriever, "MediaStorm's earliest stories from 2005-2006")
	assert _has_link(r) or _says_nothing_found(r)

	@pytest.mark.asyncio
	async def test_recent_stories(self, retriever):
	r = await _ask(retriever, "Recent stories from 2022 to 2025")
	assert _has_link(r) or _says_nothing_found(r)

	@pytest.mark.asyncio
	async def test_financial_crisis(self, retriever):
	r = await _ask(retriever, "Stories from the 2008 financial crisis era")
	assert _has_link(r)
	assert _has_any(r, ["crisis", "econom", "financial", "Times of Crisis"])

	@pytest.mark.asyncio
	async def test_around_2010(self, retriever):
	r = await _ask(retriever, "Stories published around 2010")
	assert _has_link(r) or _says_nothing_found(r)


	# -------------------------------------------------------------------------
	# PEOPLE (4)
	# -------------------------------------------------------------------------
	class TestPeople:

	@pytest.mark.asyncio
	async def test_salgado(self, retriever):
	r = await _ask(retriever, "Stories about Sebastiao Salgado")
	assert _has_link(r)
	assert _has_any(r, ["Salgado"])

	@pytest.mark.asyncio
	async def test_don_mccullin(self, retriever):
	r = await _ask(retriever, "Stories featuring Don McCullin")
	assert _has_link(r)
	assert _has_any(r, ["McCullin"])

	@pytest.mark.asyncio
	async def test_ai_weiwei(self, retriever):
	r = await _ask(retriever, "Stories about Ai Weiwei")
	assert _has_link(r)
	assert _has_any(r, ["Weiwei", "Ai Wei"])

	@pytest.mark.asyncio
	async def test_angelina_jolie(self, retriever):
	r = await _ask(retriever, "Stories about Angelina Jolie")
	assert _has_link(r)
	assert _has_any(r, ["Jolie", "Angelina"])


	# -------------------------------------------------------------------------
	# GENRE / FORMAT (4)
	# -------------------------------------------------------------------------
	class TestGenre:

	@pytest.mark.asyncio
	async def test_photo_essays(self, retriever):
	r = await _ask(retriever, "Photo essays in the archive")
	assert _has_link(r)
	assert _has_any(r, ["photo essay", "photo"])

	@pytest.mark.asyncio
	async def test_crisis_guides(self, retriever):
	r = await _ask(retriever, "Interactive multimedia projects or crisis guides")
	assert _has_link(r)
	assert _has_any(r, ["crisis guide", "interactive", "multimedia"])

	@pytest.mark.asyncio
	async def test_family_aging(self, retriever):
	r = await _ask(retriever, "Documentaries about family and aging")
	assert _has_link(r)
	assert _has_any(r, ["family", "aging", "dementia", "caregiv", "alzheimer"])

	@pytest.mark.asyncio
	async def test_animation(self, retriever):
	r = await _ask(retriever, "Animated or motion design pieces")
	assert _has_link(r) or _says_nothing_found(r)


	# -------------------------------------------------------------------------
	# AWARDS (4)
	# -------------------------------------------------------------------------
	class TestAwards:

	@pytest.mark.asyncio
	async def test_emmy_winners(self, retriever):
	r = await _ask(retriever, "Emmy award winning stories")
	assert _has_any(r, ["Emmy", "award"])

	@pytest.mark.asyncio
	async def test_world_press_photo(self, retriever):
	r = await _ask(retriever, "World Press Photo winners")
	assert _has_any(r, ["World Press", "award", "photo"])

	@pytest.mark.asyncio
	async def test_iraq_war_awards(self, retriever):
	r = await _ask(retriever, "Award-winning stories about the Iraq war")
	assert _has_any(r, ["Iraq", "war", "award", "Marlboro"])

	@pytest.mark.asyncio
	async def test_webby_awards(self, retriever):
	r = await _ask(retriever, "Stories that won at Webby Awards")
	assert _has_any(r, ["Webby", "award"])


	# -------------------------------------------------------------------------
	# EDGE CASES — should return nothing relevant (4)
	# -------------------------------------------------------------------------
	class TestEdgeCases:

	@pytest.mark.asyncio
	async def test_minecraft_redstone(self, retriever):
	r = await _ask(retriever, "Best Minecraft redstone contraptions and tutorials")
	assert _says_nothing_found(r)

	@pytest.mark.asyncio
	async def test_pasta_recipes(self, retriever):
	r = await _ask(retriever, "Best Italian pasta recipes from Tuscany")
	assert _says_nothing_found(r)

	@pytest.mark.asyncio
	async def test_taylor_swift(self, retriever):
	r = await _ask(retriever, "Taylor Swift concert tour dates")
	assert _says_nothing_found(r)

	@pytest.mark.asyncio
	async def test_crypto_trading(self, retriever):
	r = await _ask(retriever, "Stock market trading strategies and cryptocurrency")
	assert _says_nothing_found(r)