Spaces:

Param2121
/

docmind

Sleeping

docmind / tests /test_attribution.py

AI Engineer

Initial commit for DocMind

6cca5b1 20 days ago

4.44 kB

	"""
	Tests for DocMind Attribution Parser.
	"""

	import sys
	import os

	sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

	from pipeline.attribution import (
	parse_attributed_response,
	strip_unattributed,
	)


	class TestParseAttributedResponse:
	"""Test parsing of LLM responses with [SOURCE: chunk_id] tags."""

	def test_single_sentence_with_source(self):
	response = "The policy covers employees. [SOURCE: doc1_c0001]"
	result = parse_attributed_response(response)
	assert len(result) == 1
	assert result[0].chunk_id == "doc1_c0001"
	assert "policy" in result[0].text

	def test_multiple_sentences(self):
	response = (
	"The policy covers employees. [SOURCE: doc1_c0001] "
	"Benefits include health insurance. [SOURCE: doc1_c0002]"
	)
	result = parse_attributed_response(response)
	assert len(result) == 2
	assert result[0].chunk_id == "doc1_c0001"
	assert result[1].chunk_id == "doc1_c0002"

	def test_insufficient_context(self):
	response = "INSUFFICIENT_CONTEXT"
	result = parse_attributed_response(response)
	assert len(result) == 0

	def test_empty_response(self):
	result = parse_attributed_response("")
	assert len(result) == 0

	def test_no_source_tags(self):
	response = "This has no source citations at all."
	result = parse_attributed_response(response)
	# Should parse as sentence with no chunk_id
	assert len(result) >= 1
	assert result[0].chunk_id is None


	class TestStripUnattributed:
	"""Test filtering of invalid citations."""

	def test_keeps_valid_citations(self):
	from pipeline.attribution import AttributedSentence
	sentences = [
	AttributedSentence(text="Valid claim.", chunk_id="c001", raw_text=""),
	AttributedSentence(text="Another valid.", chunk_id="c002", raw_text=""),
	]
	valid_ids = {"c001", "c002", "c003"}
	result = strip_unattributed(sentences, valid_ids)
	assert len(result) == 2

	def test_removes_invalid_citations(self):
	from pipeline.attribution import AttributedSentence
	sentences = [
	AttributedSentence(text="Valid.", chunk_id="c001", raw_text=""),
	AttributedSentence(text="Invalid.", chunk_id="c999", raw_text=""),
	]
	valid_ids = {"c001", "c002"}
	result = strip_unattributed(sentences, valid_ids)
	assert len(result) == 1
	assert result[0].chunk_id == "c001"

	def test_removes_uncited_sentences(self):
	from pipeline.attribution import AttributedSentence
	sentences = [
	AttributedSentence(text="No citation.", chunk_id=None, raw_text=""),
	AttributedSentence(text="Has citation.", chunk_id="c001", raw_text=""),
	]
	valid_ids = {"c001"}
	result = strip_unattributed(sentences, valid_ids)
	assert len(result) == 1

	def test_all_invalid(self):
	from pipeline.attribution import AttributedSentence
	sentences = [
	AttributedSentence(text="Bad.", chunk_id="c999", raw_text=""),
	AttributedSentence(text="Also bad.", chunk_id=None, raw_text=""),
	]
	valid_ids = {"c001"}
	result = strip_unattributed(sentences, valid_ids)
	assert len(result) == 0

	def test_empty_input(self):
	result = strip_unattributed([], {"c001"})
	assert len(result) == 0


	# ── Run tests ───────────────────────────────────────────────────────

	if __name__ == "__main__":
	for cls in [TestParseAttributedResponse, TestStripUnattributed]:
	print(f"\n{cls.__name__}:")
	test = cls()
	passed = failed = 0
	for method_name in sorted(dir(test)):
	if not method_name.startswith("test_"):
	continue
	try:
	getattr(test, method_name)()
	print(f" [PASS] {method_name}")
	passed += 1
	except AssertionError as e:
	print(f" [FAIL] {method_name}: {e}")
	failed += 1
	except Exception as e:
	print(f" [ERROR] {method_name}: {type(e).__name__}: {e}")
	failed += 1
	print(f" {passed} passed, {failed} failed")