Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from researchmind.citations import ( | |
| clean_model_answer, | |
| format_context_block, | |
| format_references, | |
| ) | |
| from researchmind.store import StoredChunk | |
| def _chunk(chunk_id: str, doc_uri: str, text: str) -> StoredChunk: | |
| return StoredChunk( | |
| id=chunk_id, | |
| doc_id="doc1", | |
| ordinal=0, | |
| text=text, | |
| doc_title="AI Agents Review", | |
| doc_uri=doc_uri, | |
| metadata={}, | |
| ) | |
| def test_format_context_groups_chunks_by_document(): | |
| chunks = [ | |
| _chunk("c1", "https://example.com/paper", "First passage about agents."), | |
| _chunk("c2", "https://example.com/paper", "Second passage about planning."), | |
| ] | |
| context, citations = format_context_block(chunks) | |
| assert context.count("[1]") == 1 | |
| assert "[2]" not in context | |
| assert len(citations) == 1 | |
| assert "First passage" in context | |
| assert "Second passage" in context | |
| def test_format_references_one_line_per_source(): | |
| _, citations = format_context_block( | |
| [ | |
| _chunk("c1", "https://a.test", "alpha"), | |
| _chunk("c2", "https://a.test", "beta"), | |
| ] | |
| ) | |
| refs = format_references(citations) | |
| assert refs.count("https://a.test") == 1 | |
| def test_clean_passage_collapses_citation_runs(): | |
| chunks = [_chunk("c1", "https://a.test", "[1] [2] [3] [4] [5] actual content")] | |
| context, _ = format_context_block(chunks) | |
| assert "[1] [2] [3] [4] [5]" not in context | |
| assert "actual content" in context | |
| def test_clean_model_answer_strips_reference_spam(): | |
| raw = "Summary here [1][2][3][4][5].\n\n**References**\n- [1] dup" | |
| cleaned = clean_model_answer(raw) | |
| assert "**References**" not in cleaned | |
| assert "[1][2][3]" not in cleaned | |
| assert "Summary here" in cleaned | |
| def test_clean_model_answer_strips_thinking_block(): | |
| think_open = "<" + "think" + ">" | |
| think_close = "</" + "think" + ">" | |
| raw = f"{think_open}\nplan\n{think_close}\n\nAgents use tools and memory [1]." | |
| cleaned = clean_model_answer(raw) | |
| assert cleaned == "Agents use tools and memory [1]." | |
| def test_clean_model_answer_rejects_unclosed_thinking(): | |
| rt_open = "<" + "redacted_thinking" + ">" | |
| raw = f"{rt_open}\nWe are given a context and need to plan the answer." | |
| cleaned = clean_model_answer(raw) | |
| assert "redacted_thinking" not in cleaned | |
| assert "planning text without a final answer" in cleaned | |