File size: 2,326 Bytes
23cdeed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | # -*- coding: utf-8 -*-
from pathlib import Path
from pluto.doc_summary import (
DocSummary,
apply_doc_summary_context,
generate_doc_summary,
save_doc_summaries,
)
def test_generate_doc_summary_returns_valid_summary_with_mocked_llm(monkeypatch, tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
(corpus / "paper.md").write_text("# Paper\n\nThis is about retrieval.", encoding="utf-8")
monkeypatch.setattr(
"pluto.doc_summary._call_summary_llm",
lambda **kwargs: """
{
"title": "Retrieval Paper",
"domain": "information retrieval",
"key_claims": ["Chunk context improves retrieval"],
"structure": ["intro", "methodology", "results"],
"open_questions": ["How robust is it?"]
}
""",
)
summary = generate_doc_summary("paper", corpus)
assert isinstance(summary, DocSummary)
assert summary.doc_id == "paper"
assert summary.title == "Retrieval Paper"
assert summary.domain == "information retrieval"
assert summary.key_claims == ["Chunk context improves retrieval"]
def test_generate_doc_summary_falls_back_when_llm_fails(monkeypatch, tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
(corpus / "paper.md").write_text("# Paper\n\nBody.", encoding="utf-8")
def fail(**kwargs):
raise RuntimeError("model unavailable")
monkeypatch.setattr("pluto.doc_summary._call_summary_llm", fail)
summary = generate_doc_summary("paper", corpus)
assert summary.doc_id == "paper"
assert summary.title == "paper"
assert summary.key_claims == []
assert summary.open_questions == []
def test_context_prefix_is_prepended_to_chunk_text(tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
summary = DocSummary(
doc_id="paper",
title="Retrieval Paper",
domain="AI",
key_claims=["Claim A", "Claim B"],
structure=[],
open_questions=[],
created_at="2026-01-01T00:00:00+00:00",
)
save_doc_summaries(corpus, {"paper": summary})
result = apply_doc_summary_context("Original chunk", "paper", corpus)
assert result.startswith("[Document context: Retrieval Paper | Domain: AI | Key claims: Claim A; Claim B]")
assert result.endswith("Original chunk")
|