Spaces:

ThejasRao
/

ModPilot

Sleeping

App Files Files Community

ModPilot / llm /test_validation.py

ThejasRao

Deploy ModPilot Investigation Engine

7302343 about 1 month ago

Raw

History Blame Contribute Delete

14.5 kB

	"""Citation validator tests — 100% coverage target (load-bearing per ADR-0003)."""

	from __future__ import annotations

	import pytest

	from llm.validation import (
	ValidationResult,
	contains_ev_reference,
	is_substantive,
	parse_ev_references,
	split_sentences,
	uncited_substantive_sentences,
	validate_citations,
	)
	from orchestrator.tools import EvidenceAccumulator, ToolResult

	# === Helpers =============================================================


	def _result(
	tool: str = "policy_match",
	status: str = "success",
	summary: str = "ok",
	) -> ToolResult:
	return ToolResult(
	tool=tool, # type: ignore[arg-type]
	status=status, # type: ignore[arg-type]
	summary=summary,
	latency_ms=10,
	)


	def _acc(*results: ToolResult) -> EvidenceAccumulator:
	acc = EvidenceAccumulator()
	for r in results:
	acc.append(r)
	return acc


	def _acc_3_success() -> EvidenceAccumulator:
	"""Standard 3-entry accumulator: ev-1, ev-2, ev-3 all success."""
	return _acc(
	_result("policy_match", summary="rule matched"),
	_result("report_velocity", summary="3 reports in 5min"),
	_result("user_history", summary="2 prior violations"),
	)


	# === parse_ev_references =================================================


	class TestParseEvReferences:
	def test_single_reference(self) -> None:
	assert parse_ev_references("violated rule [ev-1]") == {"ev-1"}

	def test_multiple_references(self) -> None:
	text = "matched [ev-1] and history [ev-3] shows pattern [ev-2]"
	assert parse_ev_references(text) == {"ev-1", "ev-2", "ev-3"}

	def test_duplicate_references_deduplicated(self) -> None:
	text = "per [ev-1] and again [ev-1]"
	assert parse_ev_references(text) == {"ev-1"}

	def test_no_references(self) -> None:
	assert parse_ev_references("no citations here") == set()

	def test_empty_string(self) -> None:
	assert parse_ev_references("") == set()

	def test_malformed_not_matched(self) -> None:
	assert parse_ev_references("[ev-] [ev-abc] ev-1 [EV-1]") == set()

	def test_high_ids(self) -> None:
	assert parse_ev_references("[ev-42] [ev-100]") == {"ev-42", "ev-100"}

	def test_adjacent_references(self) -> None:
	assert parse_ev_references("[ev-1][ev-2]") == {"ev-1", "ev-2"}


	# === split_sentences ======================================================


	class TestSplitSentences:
	def test_basic_split(self) -> None:
	text = "First sentence. Second sentence. Third one."
	assert split_sentences(text) == [
	"First sentence.",
	"Second sentence.",
	"Third one.",
	]

	def test_question_and_exclamation(self) -> None:
	text = "Is this spam? Yes it is! Clearly."
	result = split_sentences(text)
	assert len(result) == 3

	def test_single_sentence(self) -> None:
	assert split_sentences("Just one.") == ["Just one."]

	def test_empty_string(self) -> None:
	assert split_sentences("") == []

	def test_preserves_ev_references(self) -> None:
	text = "Matched rule [ev-1]. History shows [ev-2]."
	sentences = split_sentences(text)
	assert any("[ev-1]" in s for s in sentences)
	assert any("[ev-2]" in s for s in sentences)


	# === is_substantive =======================================================


	class TestIsSubstantive:
	def test_factual_claim_is_substantive(self) -> None:
	assert is_substantive("Author has 3 prior removals in this subreddit") is True

	def test_framing_not_substantive(self) -> None:
	assert is_substantive("In summary, the evidence shows:") is False
	assert is_substantive("Based on the above, we conclude:") is False
	assert is_substantive("Overall, the analysis indicates:") is False
	assert is_substantive("In conclusion, this is clear.") is False
	assert is_substantive("To summarize, the pattern holds.") is False
	assert is_substantive("Given the above, removal is warranted.") is False
	assert is_substantive("Considering the evidence, this is clear.") is False

	def test_recommendation_not_substantive(self) -> None:
	assert is_substantive("Recommend: Remove this content.") is False
	assert is_substantive("Verdict: REMOVE") is False
	assert is_substantive("Action: Escalate to senior mod.") is False
	assert is_substantive("Suggestion: lock the thread.") is False

	def test_short_fragment_not_substantive(self) -> None:
	assert is_substantive("REMOVE") is False
	assert is_substantive("High risk.") is False
	assert is_substantive("See above.") is False
	assert is_substantive("No match found.") is False

	def test_six_word_sentence_is_substantive(self) -> None:
	assert is_substantive("The author posted spam three times") is True

	def test_case_insensitive_framing(self) -> None:
	assert is_substantive("IN SUMMARY, the evidence is clear.") is False
	assert is_substantive("BASED ON THE ABOVE, remove it.") is False


	# === contains_ev_reference ================================================


	class TestContainsEvReference:
	def test_has_reference(self) -> None:
	assert contains_ev_reference("history shows [ev-2]") is True

	def test_no_reference(self) -> None:
	assert contains_ev_reference("no citation here") is False

	def test_multiple_references(self) -> None:
	assert contains_ev_reference("[ev-1] and [ev-3]") is True


	# === uncited_substantive_sentences ========================================


	class TestUncitedSubstantiveSentences:
	def test_all_cited_returns_empty(self) -> None:
	text = "Author has 3 violations [ev-1]. Thread is heated [ev-2]."
	assert uncited_substantive_sentences(text) == []

	def test_uncited_claim_returned(self) -> None:
	text = (
	"Author has 3 violations [ev-1]. "
	"The content is clearly toxic and harmful to the community."
	)
	result = uncited_substantive_sentences(text)
	assert len(result) == 1
	assert "clearly toxic" in result[0]

	def test_framing_without_citation_ok(self) -> None:
	text = "In summary, the evidence is clear. Author violated rules [ev-1]."
	assert uncited_substantive_sentences(text) == []

	def test_recommendation_without_citation_ok(self) -> None:
	text = "Author has history [ev-1]. Recommend: Remove."
	assert uncited_substantive_sentences(text) == []

	def test_short_fragment_without_citation_ok(self) -> None:
	text = "Author has history [ev-1]. High risk."
	assert uncited_substantive_sentences(text) == []


	# === ValidationResult =====================================================


	class TestValidationResult:
	def test_ok(self) -> None:
	r = ValidationResult.ok()
	assert r.passed is True
	assert r.reason == ""

	def test_failed(self) -> None:
	r = ValidationResult.failed("bad", ids=["ev-99"])
	assert r.passed is False
	assert r.reason == "bad"
	assert r.details == {"ids": ["ev-99"]}

	def test_frozen(self) -> None:
	r = ValidationResult.ok()
	with pytest.raises(AttributeError):
	r.passed = False # type: ignore[misc]


	# === validate_citations (integration) =====================================


	class TestValidateCitations:
	def test_valid_rationale_passes(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has 3 prior violations in this subreddit [ev-3]. "
	"Report velocity is elevated at 3 in 5 min [ev-2]. "
	"Content matches Rule 2 against personal attacks [ev-1]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is True

	def test_empty_rationale_fails(self) -> None:
	result = validate_citations("", _acc_3_success())
	assert result.passed is False
	assert result.reason == "empty_rationale"

	def test_whitespace_rationale_fails(self) -> None:
	result = validate_citations(" \n ", _acc_3_success())
	assert result.passed is False
	assert result.reason == "empty_rationale"

	def test_no_citations_fails(self) -> None:
	rationale = "The author has a long history of violations and should be removed."
	result = validate_citations(rationale, _acc_3_success())
	assert result.passed is False
	assert result.reason == "no_citations"

	def test_hallucinated_id_fails(self) -> None:
	acc = _acc_3_success() # has ev-1, ev-2, ev-3
	rationale = (
	"Author has violations [ev-1] and thread context [ev-7] is concerning. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "hallucinated_evidence_ids"
	assert "ev-7" in result.details["ids"] # type: ignore[operator]

	def test_citing_failure_evidence_fails(self) -> None:
	acc = _acc(
	_result("policy_match", status="success", summary="matched"),
	_result("user_history", status="failure", summary="db error"),
	)
	rationale = (
	"Rule matched against personal attacks [ev-1]. "
	"User history shows concerning pattern [ev-2]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "cited_non_success_evidence"
	assert "ev-2" in result.details["ids"] # type: ignore[operator]

	def test_citing_timeout_evidence_fails(self) -> None:
	acc = _acc(
	_result("policy_match", status="success"),
	_result("thread_context", status="timeout", summary="slow"),
	)
	rationale = (
	"Rule matched [ev-1]. Thread context timed out but shows [ev-2]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "cited_non_success_evidence"

	def test_citing_skipped_evidence_fails(self) -> None:
	acc = _acc(
	_result("policy_match", status="success"),
	_result("prior_actions", status="skipped"),
	)
	rationale = (
	"Rule matched [ev-1]. Prior actions show pattern [ev-2]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "cited_non_success_evidence"

	def test_uncited_claim_fails(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has 3 prior violations [ev-3]. "
	"The content is clearly toxic and harmful to the community."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "uncited_claims"

	def test_cited_field_mismatch_fails(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has violations [ev-3]. "
	"Content matches rules [ev-1]. "
	"Recommend: Remove."
	)
	# Declare ev-2 but it's not in the rationale
	result = validate_citations(
	rationale, acc, cited_evidence_ids=["ev-1", "ev-2", "ev-3"]
	)
	assert result.passed is False
	assert result.reason == "cited_field_mismatch"

	def test_cited_field_match_passes(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has violations [ev-3]. "
	"Content matches rules [ev-1]. "
	"Recommend: Remove."
	)
	result = validate_citations(
	rationale, acc, cited_evidence_ids=["ev-1", "ev-3"]
	)
	assert result.passed is True

	def test_cited_field_none_skips_check(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has violations [ev-3]. "
	"Content matches rules [ev-1]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc, cited_evidence_ids=None)
	assert result.passed is True

	def test_framing_sentences_dont_need_citations(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has 3 prior violations [ev-3]. "
	"Report velocity is elevated [ev-2]. "
	"In summary, the evidence clearly supports removal. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is True

	def test_short_fragments_dont_need_citations(self) -> None:
	acc = _acc_3_success()
	rationale = (
	"Author has 3 prior violations [ev-3]. "
	"High risk. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is True

	def test_multiple_hallucinated_ids_all_reported(self) -> None:
	acc = _acc(_result("policy_match")) # only ev-1
	rationale = (
	"Rule matched [ev-1] and history [ev-5] with context [ev-9]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is False
	assert result.reason == "hallucinated_evidence_ids"
	ids = result.details["ids"]
	assert "ev-5" in ids # type: ignore[operator]
	assert "ev-9" in ids # type: ignore[operator]

	def test_single_entry_accumulator_valid(self) -> None:
	acc = _acc(_result("policy_match", summary="matched rule"))
	rationale = (
	"Content clearly violates the no-spam rule [ev-1]. "
	"Recommend: Remove."
	)
	result = validate_citations(rationale, acc)
	assert result.passed is True

	def test_failure_reason_priority_hallucinated_before_uncited(self) -> None:
	"""Hallucinated IDs are checked before uncited claims."""
	acc = _acc(_result("policy_match")) # ev-1 only
	rationale = (
	"Rule matched [ev-99]. " # hallucinated
	"Author has a pattern of abuse." # uncited
	)
	result = validate_citations(rationale, acc)
	assert result.reason == "hallucinated_evidence_ids"