Spaces:
Sleeping
Sleeping
| """ | |
| Unit tests for quiz and podcast artifact generators. | |
| All external dependencies (OpenAI, ChromaDB, TTS) are mocked so these tests | |
| run without network access or API keys. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import pathlib | |
| import sys | |
| from unittest.mock import MagicMock, patch | |
| import pytest | |
| ROOT = pathlib.Path(__file__).resolve().parents[1] | |
| sys.path.insert(0, str(ROOT)) | |
| from src.artifacts.quiz_generator import QuizGenerator | |
| from src.artifacts.podcast_generator import PodcastGenerator | |
| # ββ Shared fixtures βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MOCK_QUIZ_LLM_RESPONSE = { | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "question": "What is machine learning?", | |
| "options": [ | |
| "A) A type of computer hardware", | |
| "B) A method for training models on data", | |
| "C) A programming language", | |
| "D) A database technology", | |
| ], | |
| "correct_answer": "B", | |
| "explanation": "Machine learning trains models on data to make predictions.", | |
| "difficulty": "easy", | |
| "topic": "Machine Learning Basics", | |
| } | |
| ] | |
| } | |
| MOCK_PODCAST_LLM_RESPONSE = { | |
| "segments": [ | |
| {"speaker": "Alex", "text": "Welcome to our podcast about machine learning!"}, | |
| {"speaker": "Jordan", "text": "Thanks, Alex! Machine learning is fascinating."}, | |
| {"speaker": "Alex", "text": "What is the core idea behind it?"}, | |
| {"speaker": "Jordan", "text": "The core idea is training models on data."}, | |
| ] | |
| } | |
| MOCK_CHROMA_RESULTS = [ | |
| ("chunk-1", 0.85, {"document": "Machine learning is a subset of AI.", "metadata": {}}), | |
| ("chunk-2", 0.80, {"document": "Models are trained on labelled datasets.", "metadata": {}}), | |
| ] | |
| def _make_openai_chat_response(content_dict: dict) -> MagicMock: | |
| """Build a mock that mimics openai.chat.completions.create() return value.""" | |
| mock_response = MagicMock() | |
| mock_response.choices[0].message.content = json.dumps(content_dict) | |
| return mock_response | |
| def _chroma_dir(tmp_path: pathlib.Path, user: str = "1", nb: str = "1") -> pathlib.Path: | |
| """Create and return the expected chroma directory under tmp_path.""" | |
| d = tmp_path / "data" / "users" / user / "notebooks" / nb / "chroma" | |
| d.mkdir(parents=True, exist_ok=True) | |
| return d | |
| # ββ QuizGenerator tests βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestQuizGenerator: | |
| def test_generate_quiz_returns_questions(self, tmp_path): | |
| """Returns correct questions dict when context and LLM are available.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| mock_llm_resp = _make_openai_chat_response(MOCK_QUIZ_LLM_RESPONSE) | |
| env = {"STORAGE_BASE_DIR": str(tmp_path / "data"), "OPENAI_API_KEY": "test-key"} | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.quiz_generator.ChromaAdapter", return_value=mock_store): | |
| with patch("src.artifacts.quiz_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = QuizGenerator() | |
| result = gen.generate_quiz( | |
| user_id="1", | |
| notebook_id="1", | |
| num_questions=1, | |
| difficulty="easy", | |
| ) | |
| assert "questions" in result | |
| assert len(result["questions"]) == 1 | |
| assert result["questions"][0]["correct_answer"] == "B" | |
| assert result["metadata"]["difficulty"] == "easy" | |
| assert result["metadata"]["num_questions"] == 1 | |
| def test_generate_quiz_no_chroma_dir_returns_error(self, tmp_path): | |
| """Returns error dict when the chroma directory does not exist.""" | |
| env = {"STORAGE_BASE_DIR": str(tmp_path / "nonexistent"), "OPENAI_API_KEY": "test-key"} | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.quiz_generator.OpenAI"): | |
| gen = QuizGenerator() | |
| result = gen.generate_quiz(user_id="1", notebook_id="1") | |
| assert "error" in result | |
| assert result["questions"] == [] | |
| def test_generate_quiz_empty_vectorstore_returns_error(self, tmp_path): | |
| """Returns error dict when vectorstore returns no chunks.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = [] | |
| env = {"STORAGE_BASE_DIR": str(tmp_path / "data"), "OPENAI_API_KEY": "test-key"} | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.quiz_generator.ChromaAdapter", return_value=mock_store): | |
| with patch("src.artifacts.quiz_generator.OpenAI"): | |
| gen = QuizGenerator() | |
| result = gen.generate_quiz(user_id="1", notebook_id="1") | |
| assert "error" in result | |
| def test_generate_quiz_defaults_applied(self, tmp_path): | |
| """Default num_questions and difficulty are read from env vars.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| mock_llm_resp = _make_openai_chat_response( | |
| {"questions": [MOCK_QUIZ_LLM_RESPONSE["questions"][0]] * 3} | |
| ) | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "DEFAULT_QUIZ_QUESTIONS": "3", | |
| "DEFAULT_QUIZ_DIFFICULTY": "hard", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.quiz_generator.ChromaAdapter", return_value=mock_store): | |
| with patch("src.artifacts.quiz_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = QuizGenerator() | |
| result = gen.generate_quiz(user_id="1", notebook_id="1") | |
| assert result["metadata"]["num_questions"] == 3 | |
| assert result["metadata"]["difficulty"] == "hard" | |
| def test_save_quiz_creates_markdown_file(self, tmp_path): | |
| """save_quiz writes a markdown file with questions and answer key.""" | |
| quiz_data = { | |
| "questions": MOCK_QUIZ_LLM_RESPONSE["questions"], | |
| "metadata": {"num_questions": 1, "difficulty": "easy"}, | |
| } | |
| with patch("src.artifacts.quiz_generator.OpenAI"): | |
| gen = QuizGenerator() | |
| markdown = gen.format_quiz_markdown(quiz_data, title="Quiz") | |
| saved_path = gen.save_quiz(markdown, "1", "1") | |
| p = pathlib.Path(saved_path) | |
| assert p.exists() | |
| assert p.suffix == ".md" | |
| saved = p.read_text(encoding="utf-8") | |
| assert "## Questions" in saved | |
| assert "## Answer Key" in saved | |
| assert "1. **B**" in saved | |
| def test_generate_quiz_normalizes_multiline_options(self, tmp_path): | |
| """Multiline option strings are normalized into labeled bullet options.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| raw_payload = { | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "question": "What is the goal?", | |
| "options": "A) One\nB) Two\nC) Three\nD) Four", | |
| "correct_answer": "B) Two", | |
| "explanation": "Two is correct.", | |
| "topic": "Goals", | |
| } | |
| ] | |
| } | |
| mock_llm_resp = _make_openai_chat_response(raw_payload) | |
| env = {"STORAGE_BASE_DIR": str(tmp_path / "data"), "OPENAI_API_KEY": "test-key"} | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.quiz_generator.ChromaAdapter", return_value=mock_store): | |
| with patch("src.artifacts.quiz_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = QuizGenerator() | |
| result = gen.generate_quiz(user_id="1", notebook_id="1", num_questions=1) | |
| markdown = gen.format_quiz_markdown(result, title="Quiz") | |
| assert "error" not in result | |
| assert result["questions"][0]["options"] == ["A) One", "B) Two", "C) Three", "D) Four"] | |
| assert "- A) One" in markdown | |
| assert "- D) Four" in markdown | |
| # ββ PodcastGenerator tests ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestPodcastGenerator: | |
| def _make_generator(self, tmp_path: pathlib.Path, extra_env: dict | None = None): | |
| """Convenience: build a PodcastGenerator with EdgeTTS mocked out.""" | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| **(extra_env or {}), | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| return PodcastGenerator(), env | |
| def test_generate_podcast_returns_transcript(self, tmp_path): | |
| """Returns transcript list and audio_path when all mocks succeed.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| mock_llm_resp = _make_openai_chat_response(MOCK_PODCAST_LLM_RESPONSE) | |
| fake_audio = str(tmp_path / "podcast.mp3") | |
| pathlib.Path(fake_audio).write_bytes(b"fake-audio") | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch( | |
| "src.artifacts.podcast_generator.ChromaAdapter", return_value=mock_store | |
| ): | |
| with patch("src.artifacts.podcast_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = PodcastGenerator() | |
| with patch.object(gen, "_synthesize_segments", return_value=[fake_audio]): | |
| with patch.object(gen, "_combine_audio", return_value=fake_audio): | |
| result = gen.generate_podcast( | |
| user_id="1", | |
| notebook_id="1", | |
| duration_target="5min", | |
| ) | |
| assert "transcript" in result | |
| assert len(result["transcript"]) == 4 | |
| assert result["audio_path"] == fake_audio | |
| assert result["metadata"]["duration_target"] == "5min" | |
| def test_generate_podcast_no_chroma_dir_returns_error(self, tmp_path): | |
| """Returns error dict when chroma directory does not exist.""" | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "nonexistent"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch("src.artifacts.podcast_generator.OpenAI"): | |
| gen = PodcastGenerator() | |
| result = gen.generate_podcast(user_id="1", notebook_id="1") | |
| assert "error" in result | |
| assert result["transcript"] == [] | |
| def test_generate_podcast_empty_vectorstore_returns_error(self, tmp_path): | |
| """Returns error dict when vectorstore has no chunks.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = [] | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch( | |
| "src.artifacts.podcast_generator.ChromaAdapter", return_value=mock_store | |
| ): | |
| with patch("src.artifacts.podcast_generator.OpenAI"): | |
| gen = PodcastGenerator() | |
| result = gen.generate_podcast(user_id="1", notebook_id="1") | |
| assert "error" in result | |
| def test_save_transcript_creates_markdown_file(self, tmp_path): | |
| """save_transcript writes markdown transcript at the expected path.""" | |
| podcast_data = { | |
| "transcript": MOCK_PODCAST_LLM_RESPONSE["segments"], | |
| "audio_path": str(tmp_path / "podcast.mp3"), | |
| "metadata": {"duration_target": "5min"}, | |
| } | |
| env = { | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch("src.artifacts.podcast_generator.OpenAI"): | |
| gen = PodcastGenerator() | |
| saved_path = gen.save_transcript(podcast_data, "1", "1") | |
| p = pathlib.Path(saved_path) | |
| assert p.exists() | |
| assert p.suffix == ".md" | |
| saved = p.read_text(encoding="utf-8") | |
| assert "# Podcast Transcript" in saved | |
| assert "## Conversation" in saved | |
| assert "**Alex:**" in saved | |
| def test_generate_podcast_topic_focus(self, tmp_path): | |
| """topic_focus is passed through to metadata.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| mock_llm_resp = _make_openai_chat_response(MOCK_PODCAST_LLM_RESPONSE) | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch( | |
| "src.artifacts.podcast_generator.ChromaAdapter", return_value=mock_store | |
| ): | |
| with patch("src.artifacts.podcast_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = PodcastGenerator() | |
| with patch.object(gen, "_synthesize_segments", return_value=[]): | |
| with patch.object(gen, "_combine_audio", return_value=""): | |
| result = gen.generate_podcast( | |
| user_id="1", | |
| notebook_id="1", | |
| topic_focus="neural networks", | |
| ) | |
| assert result["metadata"]["topic_focus"] == "neural networks" | |
| def test_generate_podcast_when_tts_fails_returns_error_with_transcript(self, tmp_path): | |
| """If TTS produces no audio segments, generator returns an explicit error.""" | |
| _chroma_dir(tmp_path) | |
| mock_store = MagicMock() | |
| mock_store.query.return_value = MOCK_CHROMA_RESULTS | |
| mock_llm_resp = _make_openai_chat_response(MOCK_PODCAST_LLM_RESPONSE) | |
| env = { | |
| "STORAGE_BASE_DIR": str(tmp_path / "data"), | |
| "OPENAI_API_KEY": "test-key", | |
| "TRANSCRIPT_LLM_PROVIDER": "openai", | |
| "TTS_PROVIDER": "edge", | |
| } | |
| with patch.dict(os.environ, env): | |
| with patch("src.artifacts.tts_adapter.EdgeTTS"): | |
| with patch( | |
| "src.artifacts.podcast_generator.ChromaAdapter", return_value=mock_store | |
| ): | |
| with patch("src.artifacts.podcast_generator.OpenAI") as mock_openai_cls: | |
| mock_client = MagicMock() | |
| mock_client.chat.completions.create.return_value = mock_llm_resp | |
| mock_openai_cls.return_value = mock_client | |
| gen = PodcastGenerator() | |
| with patch.object(gen, "_synthesize_segments", return_value=[]): | |
| result = gen.generate_podcast(user_id="1", notebook_id="1") | |
| assert "error" in result | |
| assert "audio synthesis failed" in str(result["error"]).lower() | |
| assert isinstance(result.get("transcript"), list) | |
| assert len(result["transcript"]) > 0 | |