docmind / tests /test_attribution.py
AI Engineer
Initial commit for DocMind
6cca5b1
Raw
History Blame Contribute Delete
4.44 kB
"""
Tests for DocMind Attribution Parser.
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from pipeline.attribution import (
parse_attributed_response,
strip_unattributed,
)
class TestParseAttributedResponse:
"""Test parsing of LLM responses with [SOURCE: chunk_id] tags."""
def test_single_sentence_with_source(self):
response = "The policy covers employees. [SOURCE: doc1_c0001]"
result = parse_attributed_response(response)
assert len(result) == 1
assert result[0].chunk_id == "doc1_c0001"
assert "policy" in result[0].text
def test_multiple_sentences(self):
response = (
"The policy covers employees. [SOURCE: doc1_c0001] "
"Benefits include health insurance. [SOURCE: doc1_c0002]"
)
result = parse_attributed_response(response)
assert len(result) == 2
assert result[0].chunk_id == "doc1_c0001"
assert result[1].chunk_id == "doc1_c0002"
def test_insufficient_context(self):
response = "INSUFFICIENT_CONTEXT"
result = parse_attributed_response(response)
assert len(result) == 0
def test_empty_response(self):
result = parse_attributed_response("")
assert len(result) == 0
def test_no_source_tags(self):
response = "This has no source citations at all."
result = parse_attributed_response(response)
# Should parse as sentence with no chunk_id
assert len(result) >= 1
assert result[0].chunk_id is None
class TestStripUnattributed:
"""Test filtering of invalid citations."""
def test_keeps_valid_citations(self):
from pipeline.attribution import AttributedSentence
sentences = [
AttributedSentence(text="Valid claim.", chunk_id="c001", raw_text=""),
AttributedSentence(text="Another valid.", chunk_id="c002", raw_text=""),
]
valid_ids = {"c001", "c002", "c003"}
result = strip_unattributed(sentences, valid_ids)
assert len(result) == 2
def test_removes_invalid_citations(self):
from pipeline.attribution import AttributedSentence
sentences = [
AttributedSentence(text="Valid.", chunk_id="c001", raw_text=""),
AttributedSentence(text="Invalid.", chunk_id="c999", raw_text=""),
]
valid_ids = {"c001", "c002"}
result = strip_unattributed(sentences, valid_ids)
assert len(result) == 1
assert result[0].chunk_id == "c001"
def test_removes_uncited_sentences(self):
from pipeline.attribution import AttributedSentence
sentences = [
AttributedSentence(text="No citation.", chunk_id=None, raw_text=""),
AttributedSentence(text="Has citation.", chunk_id="c001", raw_text=""),
]
valid_ids = {"c001"}
result = strip_unattributed(sentences, valid_ids)
assert len(result) == 1
def test_all_invalid(self):
from pipeline.attribution import AttributedSentence
sentences = [
AttributedSentence(text="Bad.", chunk_id="c999", raw_text=""),
AttributedSentence(text="Also bad.", chunk_id=None, raw_text=""),
]
valid_ids = {"c001"}
result = strip_unattributed(sentences, valid_ids)
assert len(result) == 0
def test_empty_input(self):
result = strip_unattributed([], {"c001"})
assert len(result) == 0
# ── Run tests ───────────────────────────────────────────────────────
if __name__ == "__main__":
for cls in [TestParseAttributedResponse, TestStripUnattributed]:
print(f"\n{cls.__name__}:")
test = cls()
passed = failed = 0
for method_name in sorted(dir(test)):
if not method_name.startswith("test_"):
continue
try:
getattr(test, method_name)()
print(f" [PASS] {method_name}")
passed += 1
except AssertionError as e:
print(f" [FAIL] {method_name}: {e}")
failed += 1
except Exception as e:
print(f" [ERROR] {method_name}: {type(e).__name__}: {e}")
failed += 1
print(f" {passed} passed, {failed} failed")