plutoV2_miniProject_3rd-yr / mp1 /test_doc_summary.py
ayushKishor's picture
Add Pluto memory layer and pipeline fixes
23cdeed
# -*- coding: utf-8 -*-
from pathlib import Path
from pluto.doc_summary import (
DocSummary,
apply_doc_summary_context,
generate_doc_summary,
save_doc_summaries,
)
def test_generate_doc_summary_returns_valid_summary_with_mocked_llm(monkeypatch, tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
(corpus / "paper.md").write_text("# Paper\n\nThis is about retrieval.", encoding="utf-8")
monkeypatch.setattr(
"pluto.doc_summary._call_summary_llm",
lambda **kwargs: """
{
"title": "Retrieval Paper",
"domain": "information retrieval",
"key_claims": ["Chunk context improves retrieval"],
"structure": ["intro", "methodology", "results"],
"open_questions": ["How robust is it?"]
}
""",
)
summary = generate_doc_summary("paper", corpus)
assert isinstance(summary, DocSummary)
assert summary.doc_id == "paper"
assert summary.title == "Retrieval Paper"
assert summary.domain == "information retrieval"
assert summary.key_claims == ["Chunk context improves retrieval"]
def test_generate_doc_summary_falls_back_when_llm_fails(monkeypatch, tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
(corpus / "paper.md").write_text("# Paper\n\nBody.", encoding="utf-8")
def fail(**kwargs):
raise RuntimeError("model unavailable")
monkeypatch.setattr("pluto.doc_summary._call_summary_llm", fail)
summary = generate_doc_summary("paper", corpus)
assert summary.doc_id == "paper"
assert summary.title == "paper"
assert summary.key_claims == []
assert summary.open_questions == []
def test_context_prefix_is_prepended_to_chunk_text(tmp_path):
corpus = tmp_path / "corpus"
corpus.mkdir()
summary = DocSummary(
doc_id="paper",
title="Retrieval Paper",
domain="AI",
key_claims=["Claim A", "Claim B"],
structure=[],
open_questions=[],
created_at="2026-01-01T00:00:00+00:00",
)
save_doc_summaries(corpus, {"paper": summary})
result = apply_doc_summary_context("Original chunk", "paper", corpus)
assert result.startswith("[Document context: Retrieval Paper | Domain: AI | Key claims: Claim A; Claim B]")
assert result.endswith("Original chunk")