Sage / tests /test_verification.py
vxa8502's picture
Add punctuation normalization
7a112a4
"""Tests for sage.core.verification — quote, citation, and forbidden phrase checks."""
from sage.core.verification import (
check_forbidden_phrases,
extract_citations,
extract_quotes,
normalize_text,
verify_citation,
verify_citations,
verify_explanation,
verify_quote_in_evidence,
)
class TestExtractQuotes:
def test_extracts_double_quotes(self):
text = 'The reviewer said "great sound quality" and "comfortable fit".'
quotes = extract_quotes(text)
assert "great sound quality" in quotes
assert "comfortable fit" in quotes
def test_extracts_single_quotes(self):
text = "The reviewer noted 'excellent battery life' in their review."
quotes = extract_quotes(text)
assert "excellent battery life" in quotes
def test_filters_short_quotes(self):
text = 'Said "ok" and "this is a longer meaningful quote".'
quotes = extract_quotes(text, min_length=4)
assert "ok" not in quotes
assert "this is a longer meaningful quote" in quotes
def test_deduplicates(self):
text = '"same quote" appears twice: "same quote".'
quotes = extract_quotes(text)
assert quotes.count("same quote") == 1
def test_no_quotes_returns_empty(self):
text = "No quotes in this text at all."
quotes = extract_quotes(text)
assert quotes == []
def test_empty_input(self):
assert extract_quotes("") == []
class TestNormalizeText:
def test_lowercases(self):
assert normalize_text("Hello World") == "hello world"
def test_collapses_whitespace(self):
assert normalize_text("hello world") == "hello world"
def test_strips(self):
assert normalize_text(" hello ") == "hello"
def test_strips_punctuation(self):
assert normalize_text("excellent!") == "excellent"
assert normalize_text("Hello, World!") == "hello world"
def test_strips_apostrophes(self):
assert normalize_text("don't") == "dont"
assert normalize_text("it's great") == "its great"
class TestVerifyQuoteInEvidence:
def test_exact_match(self):
evidence = ["The sound quality is excellent and the bass is deep."]
result = verify_quote_in_evidence("sound quality is excellent", evidence)
assert result.found is True
def test_no_match(self):
evidence = ["Battery life is good."]
result = verify_quote_in_evidence("sound quality is excellent", evidence)
assert result.found is False
def test_case_insensitive(self):
evidence = ["The Sound Quality Is Excellent."]
result = verify_quote_in_evidence("sound quality is excellent", evidence)
assert result.found is True
def test_empty_evidence(self):
result = verify_quote_in_evidence("any quote", [])
assert result.found is False
def test_punctuation_in_quote_matches_without(self):
evidence = ["The product is excellent"]
result = verify_quote_in_evidence("excellent!", evidence)
assert result.found is True
def test_punctuation_in_evidence_matches_without(self):
evidence = ["The product is excellent!"]
result = verify_quote_in_evidence("excellent", evidence)
assert result.found is True
def test_apostrophe_mismatch_matches(self):
evidence = ["I don't recommend this"]
result = verify_quote_in_evidence("dont recommend", evidence)
assert result.found is True
class TestVerifyExplanation:
def test_all_quotes_found(self):
explanation = 'Reviewers noted "great sound" and "comfortable fit".'
evidence = [
"This has great sound quality.",
"Very comfortable fit for long sessions.",
]
result = verify_explanation(explanation, evidence)
assert result.quotes_found >= 1
def test_missing_quotes_detected(self):
explanation = 'Reviewers said "invented claim not in evidence".'
evidence = ["Completely different content about batteries."]
result = verify_explanation(explanation, evidence)
assert result.quotes_missing >= 1
def test_no_quotes_in_explanation(self):
explanation = "This product has good reviews overall."
evidence = ["Some review text."]
result = verify_explanation(explanation, evidence)
assert result.all_verified is True
assert result.quotes_found == 0
assert result.quotes_missing == 0
class TestCheckForbiddenPhrases:
def test_clean_explanation(self):
text = "Based on reviews, the battery lasts about 8 hours."
result = check_forbidden_phrases(text)
assert result.has_violations is False
assert result.violations == []
def test_detects_forbidden_phrase(self):
text = "This product is highly recommended for everyone."
result = check_forbidden_phrases(text)
assert result.has_violations is True
assert len(result.violations) > 0
def test_empty_input(self):
result = check_forbidden_phrases("")
assert result.has_violations is False
class TestExtractCitations:
def test_extracts_bracketed_citations(self):
text = '"good sound" [review_123]'
citations = extract_citations(text)
assert len(citations) >= 1
ids = [c[0] for c in citations]
assert any("review_123" in cid for cid in ids)
def test_no_citations(self):
text = "No citations here."
citations = extract_citations(text)
assert citations == []
class TestVerifyCitation:
def test_valid_citation(self):
result = verify_citation(
citation_id="review_1",
evidence_ids=["review_1", "review_2"],
evidence_texts=["Great product.", "Good value."],
)
assert result.found is True
def test_invalid_citation(self):
result = verify_citation(
citation_id="review_99",
evidence_ids=["review_1", "review_2"],
evidence_texts=["Great product.", "Good value."],
)
assert result.found is False
def test_with_quote_verification(self):
result = verify_citation(
citation_id="review_1",
evidence_ids=["review_1", "review_2"],
evidence_texts=["Great product with amazing sound.", "Good value."],
quote_text="amazing sound",
)
assert result.found is True
class TestVerifyCitations:
def test_full_pipeline(self):
explanation = '"great sound" [review_1] and "good value" [review_2]'
evidence_ids = ["review_1", "review_2"]
evidence_texts = [
"The great sound quality impressed me.",
"Offers good value for the price.",
]
result = verify_citations(explanation, evidence_ids, evidence_texts)
assert isinstance(result.all_valid, bool)
assert result.citations_found >= 0
def test_no_citations_passes(self):
explanation = "Simple explanation without citations."
result = verify_citations(explanation, ["r1"], ["text"])
assert result.all_valid is True