# -*- coding: utf-8 -*- from pathlib import Path from pluto.doc_summary import ( DocSummary, apply_doc_summary_context, generate_doc_summary, save_doc_summaries, ) def test_generate_doc_summary_returns_valid_summary_with_mocked_llm(monkeypatch, tmp_path): corpus = tmp_path / "corpus" corpus.mkdir() (corpus / "paper.md").write_text("# Paper\n\nThis is about retrieval.", encoding="utf-8") monkeypatch.setattr( "pluto.doc_summary._call_summary_llm", lambda **kwargs: """ { "title": "Retrieval Paper", "domain": "information retrieval", "key_claims": ["Chunk context improves retrieval"], "structure": ["intro", "methodology", "results"], "open_questions": ["How robust is it?"] } """, ) summary = generate_doc_summary("paper", corpus) assert isinstance(summary, DocSummary) assert summary.doc_id == "paper" assert summary.title == "Retrieval Paper" assert summary.domain == "information retrieval" assert summary.key_claims == ["Chunk context improves retrieval"] def test_generate_doc_summary_falls_back_when_llm_fails(monkeypatch, tmp_path): corpus = tmp_path / "corpus" corpus.mkdir() (corpus / "paper.md").write_text("# Paper\n\nBody.", encoding="utf-8") def fail(**kwargs): raise RuntimeError("model unavailable") monkeypatch.setattr("pluto.doc_summary._call_summary_llm", fail) summary = generate_doc_summary("paper", corpus) assert summary.doc_id == "paper" assert summary.title == "paper" assert summary.key_claims == [] assert summary.open_questions == [] def test_context_prefix_is_prepended_to_chunk_text(tmp_path): corpus = tmp_path / "corpus" corpus.mkdir() summary = DocSummary( doc_id="paper", title="Retrieval Paper", domain="AI", key_claims=["Claim A", "Claim B"], structure=[], open_questions=[], created_at="2026-01-01T00:00:00+00:00", ) save_doc_summaries(corpus, {"paper": summary}) result = apply_doc_summary_context("Original chunk", "paper", corpus) assert result.startswith("[Document context: Retrieval Paper | Domain: AI | Key claims: Claim A; Claim B]") assert result.endswith("Original chunk")