Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import numpy as np | |
| import pytest | |
| from agent.models import EducationPptxInput, ResearchIngestResult | |
| from agent.prompts import education_outline_user | |
| from agent.runner import AgentRunner | |
| from researchmind.config import ResearchMindConfig | |
| from researchmind.extract import ExtractedDocument | |
| def _outline_json(slide_count: int = 3) -> str: | |
| slides = [ | |
| { | |
| "title": f"Slide {i}", | |
| "bullets": ["Key point"], | |
| "speaker_note": "Note", | |
| } | |
| for i in range(1, slide_count + 1) | |
| ] | |
| return json.dumps({"title": "Test Lesson", "slides": slides}) | |
| class OutlineBackend: | |
| def load(self) -> None: | |
| return None | |
| def chat(self, messages, *, max_tokens=2048, temperature=0.3): | |
| return _outline_json(3) | |
| def generate(self, prompt, *, max_tokens=512, temperature=0.7): | |
| return self.chat([{"role": "user", "content": prompt}], max_tokens=max_tokens) | |
| def research_env(tmp_path, monkeypatch): | |
| cfg = ResearchMindConfig( | |
| data_dir=tmp_path / "rm", | |
| embed_model="test", | |
| auto_search=False, | |
| top_k=2, | |
| max_context_chunks=8, | |
| chunk_size=50, | |
| chunk_overlap=10, | |
| ) | |
| monkeypatch.setenv("RESEARCHMIND_DATA_DIR", str(cfg.data_dir)) | |
| monkeypatch.setenv("AGENT_OUTPUTS_DIR", str(tmp_path / "outputs")) | |
| def fake_embed(texts, *, model_name): | |
| vecs = [np.array([1.0, 0.0, 0.0], dtype=np.float32) for _ in texts] | |
| return np.stack(vecs) if vecs else np.zeros((0, 3), dtype=np.float32) | |
| monkeypatch.setattr("researchmind.ingest.embed_texts", fake_embed) | |
| monkeypatch.setattr("researchmind.retrieve.embed_texts", fake_embed) | |
| def fake_scrape(url: str): | |
| return ExtractedDocument( | |
| source_type="web", | |
| uri=url, | |
| title="Example", | |
| text="Photosynthesis converts light to energy in plants.", | |
| ) | |
| monkeypatch.setattr("agent.tools.research_tools.fetch_and_extract", fake_scrape) | |
| def fake_search(topic, *, n=5, check_reachable=True): | |
| return [f"https://example.com/{topic.replace(' ', '-')}"] | |
| monkeypatch.setattr("agent.tools.research_tools.search_urls", fake_search) | |
| def fake_validate(url, *, check_reachable=True): | |
| normalized = url if url.startswith("http") else f"https://{url}" | |
| return True, "ok", normalized | |
| monkeypatch.setattr("researchmind.url_validate.validate_url", fake_validate) | |
| return cfg | |
| def test_education_outline_user_includes_source_context(): | |
| req = EducationPptxInput(topic="Photosynthesis", grade="6", slide_count=3) | |
| user = education_outline_user(req, source_context="[1] Plants use chlorophyll.") | |
| assert "retrieved source excerpts" in user | |
| assert "chlorophyll" in user | |
| def test_education_outline_user_includes_conversation_context(): | |
| req = EducationPptxInput( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| conversation_context="User: What is photosynthesis?\n\nAssistant: Plants use sunlight.", | |
| ) | |
| user = education_outline_user(req) | |
| assert "conversation transcript" in user | |
| assert "What is photosynthesis?" in user | |
| def test_none_mode_skips_source_summary(research_env): | |
| runner = AgentRunner() | |
| result = runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="none", | |
| ) | |
| assert result.outline.title == "Test Lesson" | |
| assert result.source_summary == "" | |
| def test_web_auto_calls_ingest_with_auto_search(research_env, monkeypatch): | |
| calls: list[dict] = [] | |
| def fake_ingest(self, **kwargs): | |
| calls.append(kwargs) | |
| return ResearchIngestResult( | |
| session_id="sess-auto", | |
| ingested=["https://example.com/photosynthesis"], | |
| skipped=[], | |
| failures=[], | |
| doc_count=1, | |
| chunk_count=1, | |
| trace_path="/tmp/trace.json", | |
| message="Ingested 1 source(s)", | |
| ) | |
| monkeypatch.setattr(AgentRunner, "run_researchmind_ingest", fake_ingest) | |
| runner = AgentRunner() | |
| result = runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="web", | |
| search_workflow="auto", | |
| ) | |
| assert len(calls) == 1 | |
| assert calls[0]["auto_search"] is True | |
| assert "Ingested 1 source(s)" in result.source_summary | |
| def test_web_two_step_requires_urls(research_env): | |
| runner = AgentRunner() | |
| with pytest.raises(ValueError, match="Two-step web search requires"): | |
| runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="web", | |
| search_workflow="two_step", | |
| urls=[], | |
| files=[], | |
| ) | |
| def test_web_two_step_ingests_without_auto_search(research_env, monkeypatch): | |
| calls: list[dict] = [] | |
| def fake_ingest(self, **kwargs): | |
| calls.append(kwargs) | |
| return ResearchIngestResult( | |
| session_id="sess-two", | |
| ingested=["https://example.com/a"], | |
| skipped=[], | |
| failures=[], | |
| doc_count=1, | |
| chunk_count=1, | |
| trace_path="/tmp/trace.json", | |
| message="Ingested 1 source(s)", | |
| ) | |
| monkeypatch.setattr(AgentRunner, "run_researchmind_ingest", fake_ingest) | |
| runner = AgentRunner() | |
| runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="web", | |
| search_workflow="two_step", | |
| urls=["https://example.com/a"], | |
| ) | |
| assert calls[0]["auto_search"] is False | |
| def test_rag_requires_indexed_sources(research_env): | |
| runner = AgentRunner() | |
| with pytest.raises(ValueError, match="RAG mode requires indexed sources"): | |
| runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="rag", | |
| session_id="", | |
| urls=[], | |
| files=[], | |
| ) | |
| def test_web_two_step_uses_duplicate_doc_ids(research_env): | |
| runner = AgentRunner() | |
| first = runner.run_researchmind_ingest( | |
| topic="Photosynthesis", | |
| urls=["https://example.com/a"], | |
| files=[], | |
| auto_search=False, | |
| session_id=None, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| ) | |
| assert first.doc_ids | |
| new_session = runner.run_researchmind_discover( | |
| topic="Photosynthesis", | |
| auto_search=False, | |
| session_id=None, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| ).session_id | |
| second = runner.run_researchmind_ingest( | |
| topic="Photosynthesis", | |
| urls=["https://example.com/a"], | |
| files=[], | |
| auto_search=False, | |
| session_id=new_session, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| ) | |
| assert second.ingested == [] | |
| assert len(second.skipped) == 1 | |
| assert second.doc_ids == first.doc_ids | |
| result = runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="web", | |
| search_workflow="two_step", | |
| urls=["https://example.com/a"], | |
| session_id=new_session, | |
| ) | |
| assert "Retrieved" in result.source_summary | |
| assert result.source_summary.count("model knowledge only") == 0 | |
| def test_rag_uses_session_without_auto_search(research_env, monkeypatch): | |
| ingest = AgentRunner().run_researchmind_ingest( | |
| topic="Photosynthesis", | |
| urls=["https://example.com/a"], | |
| files=[], | |
| auto_search=False, | |
| session_id=None, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| ) | |
| calls: list[dict] = [] | |
| def fake_ingest(self, **kwargs): | |
| calls.append(kwargs) | |
| return ingest | |
| monkeypatch.setattr(AgentRunner, "run_researchmind_ingest", fake_ingest) | |
| runner = AgentRunner() | |
| result = runner.run_education_pptx( | |
| topic="Photosynthesis", | |
| grade="6", | |
| slide_count=3, | |
| model_key="test", | |
| backend=OutlineBackend(), | |
| source_mode="rag", | |
| session_id=ingest.session_id, | |
| ) | |
| assert calls == [] | |
| assert "Retrieved" in result.source_summary | |