Spaces:

vxa8502
/

Sage

Running

App Files Files Community

Sage / tests /test_verification.py

vxa8502

Add punctuation normalization

7a112a4 3 days ago

raw

history blame contribute delete

7.19 kB

	"""Tests for sage.core.verification — quote, citation, and forbidden phrase checks."""

	from sage.core.verification import (
	check_forbidden_phrases,
	extract_citations,
	extract_quotes,
	normalize_text,
	verify_citation,
	verify_citations,
	verify_explanation,
	verify_quote_in_evidence,
	)


	class TestExtractQuotes:
	def test_extracts_double_quotes(self):
	text = 'The reviewer said "great sound quality" and "comfortable fit".'
	quotes = extract_quotes(text)
	assert "great sound quality" in quotes
	assert "comfortable fit" in quotes

	def test_extracts_single_quotes(self):
	text = "The reviewer noted 'excellent battery life' in their review."
	quotes = extract_quotes(text)
	assert "excellent battery life" in quotes

	def test_filters_short_quotes(self):
	text = 'Said "ok" and "this is a longer meaningful quote".'
	quotes = extract_quotes(text, min_length=4)
	assert "ok" not in quotes
	assert "this is a longer meaningful quote" in quotes

	def test_deduplicates(self):
	text = '"same quote" appears twice: "same quote".'
	quotes = extract_quotes(text)
	assert quotes.count("same quote") == 1

	def test_no_quotes_returns_empty(self):
	text = "No quotes in this text at all."
	quotes = extract_quotes(text)
	assert quotes == []

	def test_empty_input(self):
	assert extract_quotes("") == []


	class TestNormalizeText:
	def test_lowercases(self):
	assert normalize_text("Hello World") == "hello world"

	def test_collapses_whitespace(self):
	assert normalize_text("hello world") == "hello world"

	def test_strips(self):
	assert normalize_text(" hello ") == "hello"

	def test_strips_punctuation(self):
	assert normalize_text("excellent!") == "excellent"
	assert normalize_text("Hello, World!") == "hello world"

	def test_strips_apostrophes(self):
	assert normalize_text("don't") == "dont"
	assert normalize_text("it's great") == "its great"


	class TestVerifyQuoteInEvidence:
	def test_exact_match(self):
	evidence = ["The sound quality is excellent and the bass is deep."]
	result = verify_quote_in_evidence("sound quality is excellent", evidence)
	assert result.found is True

	def test_no_match(self):
	evidence = ["Battery life is good."]
	result = verify_quote_in_evidence("sound quality is excellent", evidence)
	assert result.found is False

	def test_case_insensitive(self):
	evidence = ["The Sound Quality Is Excellent."]
	result = verify_quote_in_evidence("sound quality is excellent", evidence)
	assert result.found is True

	def test_empty_evidence(self):
	result = verify_quote_in_evidence("any quote", [])
	assert result.found is False

	def test_punctuation_in_quote_matches_without(self):
	evidence = ["The product is excellent"]
	result = verify_quote_in_evidence("excellent!", evidence)
	assert result.found is True

	def test_punctuation_in_evidence_matches_without(self):
	evidence = ["The product is excellent!"]
	result = verify_quote_in_evidence("excellent", evidence)
	assert result.found is True

	def test_apostrophe_mismatch_matches(self):
	evidence = ["I don't recommend this"]
	result = verify_quote_in_evidence("dont recommend", evidence)
	assert result.found is True


	class TestVerifyExplanation:
	def test_all_quotes_found(self):
	explanation = 'Reviewers noted "great sound" and "comfortable fit".'
	evidence = [
	"This has great sound quality.",
	"Very comfortable fit for long sessions.",
	]
	result = verify_explanation(explanation, evidence)
	assert result.quotes_found >= 1

	def test_missing_quotes_detected(self):
	explanation = 'Reviewers said "invented claim not in evidence".'
	evidence = ["Completely different content about batteries."]
	result = verify_explanation(explanation, evidence)
	assert result.quotes_missing >= 1

	def test_no_quotes_in_explanation(self):
	explanation = "This product has good reviews overall."
	evidence = ["Some review text."]
	result = verify_explanation(explanation, evidence)
	assert result.all_verified is True
	assert result.quotes_found == 0
	assert result.quotes_missing == 0


	class TestCheckForbiddenPhrases:
	def test_clean_explanation(self):
	text = "Based on reviews, the battery lasts about 8 hours."
	result = check_forbidden_phrases(text)
	assert result.has_violations is False
	assert result.violations == []

	def test_detects_forbidden_phrase(self):
	text = "This product is highly recommended for everyone."
	result = check_forbidden_phrases(text)
	assert result.has_violations is True
	assert len(result.violations) > 0

	def test_empty_input(self):
	result = check_forbidden_phrases("")
	assert result.has_violations is False


	class TestExtractCitations:
	def test_extracts_bracketed_citations(self):
	text = '"good sound" [review_123]'
	citations = extract_citations(text)
	assert len(citations) >= 1
	ids = [c[0] for c in citations]
	assert any("review_123" in cid for cid in ids)

	def test_no_citations(self):
	text = "No citations here."
	citations = extract_citations(text)
	assert citations == []


	class TestVerifyCitation:
	def test_valid_citation(self):
	result = verify_citation(
	citation_id="review_1",
	evidence_ids=["review_1", "review_2"],
	evidence_texts=["Great product.", "Good value."],
	)
	assert result.found is True

	def test_invalid_citation(self):
	result = verify_citation(
	citation_id="review_99",
	evidence_ids=["review_1", "review_2"],
	evidence_texts=["Great product.", "Good value."],
	)
	assert result.found is False

	def test_with_quote_verification(self):
	result = verify_citation(
	citation_id="review_1",
	evidence_ids=["review_1", "review_2"],
	evidence_texts=["Great product with amazing sound.", "Good value."],
	quote_text="amazing sound",
	)
	assert result.found is True


	class TestVerifyCitations:
	def test_full_pipeline(self):
	explanation = '"great sound" [review_1] and "good value" [review_2]'
	evidence_ids = ["review_1", "review_2"]
	evidence_texts = [
	"The great sound quality impressed me.",
	"Offers good value for the price.",
	]
	result = verify_citations(explanation, evidence_ids, evidence_texts)
	assert isinstance(result.all_valid, bool)
	assert result.citations_found >= 0

	def test_no_citations_passes(self):
	explanation = "Simple explanation without citations."
	result = verify_citations(explanation, ["r1"], ["text"])
	assert result.all_valid is True