philverify-api / tests /test_improvements.py
Ryan Christian D. Deniega
feat: extension button placement, text extraction, OCR display + ML improvements
c78c2c1
raw
history blame
19.6 kB
"""
Tests for the 5 NLP pipeline improvements:
1. calamanCy NER fallback chain
2. Tagalog-RoBERTa classifier (ModelNotFoundError)
3. EnsembleClassifier
4. EDA augmentation
5. Sentence-scoring ClaimExtractor
6. NLI stance detection (Rule 1.5)
"""
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
# Ensure project root is on path
sys.path.insert(0, str(Path(__file__).parent.parent))
# ── Helpers ───────────────────────────────────────────────────────────────────
def _make_sample(text: str, label: int = 0):
from ml.dataset import Sample
return Sample(text=text, label=label)
# ══════════════════════════════════════════════════════════════════════════════
# Part 1 β€” EDA Augmentation
# ══════════════════════════════════════════════════════════════════════════════
class TestEDAugmentation:
def test_empty_input_returns_empty(self):
from ml.dataset import augment_samples
assert augment_samples([]) == []
def test_augment_produces_two_variants_per_sample(self):
from ml.dataset import augment_samples
samples = [_make_sample("DOH confirms 500 new COVID cases today", 0)]
aug = augment_samples(samples, seed=42)
# One deletion + one swap variant per sample
assert len(aug) == 2
def test_augmented_labels_match_originals(self):
from ml.dataset import augment_samples
samples = [
_make_sample("Senate passes new bill on health care reform", 0),
_make_sample("SHOCKING truth about vaccines hidden by government", 2),
]
aug = augment_samples(samples, seed=42)
orig_labels = {s.label for s in samples}
for a in aug:
assert a.label in orig_labels
def test_short_samples_skipped(self):
from ml.dataset import augment_samples
samples = [
_make_sample("ok", 1), # 1 word β€” too short
_make_sample("fake news", 2), # 2 words β€” too short
]
aug = augment_samples(samples, seed=42)
assert aug == []
def test_augmented_texts_differ_from_original(self):
from ml.dataset import augment_samples
original = "GRABE sinabi ng DOH na 200 bata ang nagkasakit sa bagong virus"
samples = [_make_sample(original, 2)]
aug = augment_samples(samples, seed=99)
# At least one variant should differ
assert any(a.text != original for a in aug)
def test_augment_triples_training_set_size(self):
from ml.dataset import get_split, augment_samples
train, _ = get_split()
aug = augment_samples(train, seed=42)
# aug should be at most 2Γ— train size (some short samples may be skipped)
assert len(aug) >= len(train)
assert len(aug) <= 2 * len(train)
def test_augmented_samples_are_non_empty(self):
from ml.dataset import augment_samples
samples = [_make_sample("The senator confirmed signing the new law today", 0)]
aug = augment_samples(samples, seed=42)
for a in aug:
assert len(a.text.strip()) > 0
# ══════════════════════════════════════════════════════════════════════════════
# Part 2 β€” Sentence-scoring ClaimExtractor
# ══════════════════════════════════════════════════════════════════════════════
class TestClaimExtractor:
def test_instantiates_without_loading_model(self):
"""New ClaimExtractor has no lazy model loading at all."""
from nlp.claim_extractor import ClaimExtractor
ce = ClaimExtractor()
# No _pipe, no _loaded attributes
assert not hasattr(ce, '_pipe')
assert not hasattr(ce, '_loaded')
def test_passthrough_for_short_text(self):
from nlp.claim_extractor import ClaimExtractor
result = ClaimExtractor().extract("hi")
assert result.method == "passthrough"
assert result.claim == "hi"
def test_sentence_scoring_method_on_informative_sentence(self):
from nlp.claim_extractor import ClaimExtractor
# Has a date, a verb, and named org β€” should score high
text = "GRABE! Sinabi ng DOH noong Martes na 200 bata ang nagkasakit sa bagong virus sa Maynila."
result = ClaimExtractor().extract(text)
# Should pick the DOH sentence, not all text or just "GRABE!"
assert result.method == "sentence_scoring"
assert "DOH" in result.claim or "200" in result.claim
def test_heuristic_fallback_when_no_scored_sentences(self):
from nlp.claim_extractor import ClaimExtractor
# Text with no dates, no numbers, no verbs
text = "Wow amazing incredible unbelievable spectacular incomprehensible."
result = ClaimExtractor().extract(text)
assert result.method in ("sentence_heuristic", "sentence_scoring")
def test_returns_claim_result_dataclass(self):
from nlp.claim_extractor import ClaimExtractor, ClaimResult
result = ClaimExtractor().extract("The president signed the new healthcare law today.")
assert isinstance(result, ClaimResult)
assert isinstance(result.claim, str)
assert isinstance(result.method, str)
def test_picks_specific_sentence_over_clickbait_opener(self):
from nlp.claim_extractor import ClaimExtractor
text = "OMG! Natuklasan ng mga siyentipiko na 5,000 tao ang namatay dahil sa bagong sakit ngayong Enero."
result = ClaimExtractor().extract(text)
# The specific claim (5000 deaths) should be preferred over "OMG!"
assert "5,000" in result.claim or "siyentipiko" in result.claim or result.method == "sentence_scoring"
# ══════════════════════════════════════════════════════════════════════════════
# Part 3 β€” TagalogRobertaClassifier
# ══════════════════════════════════════════════════════════════════════════════
class TestTagalogRobertaClassifier:
def test_raises_model_not_found_when_checkpoint_missing(self, tmp_path, monkeypatch):
"""ModelNotFoundError raised when checkpoint directory doesn't exist."""
import ml.tagalog_roberta_classifier as mod
monkeypatch.setattr(mod, "MODEL_DIR", tmp_path / "nonexistent_model")
with pytest.raises(mod.ModelNotFoundError):
mod.TagalogRobertaClassifier()
def test_model_not_found_is_subclass_of_file_not_found(self):
from ml.xlm_roberta_classifier import ModelNotFoundError
assert issubclass(ModelNotFoundError, FileNotFoundError)
def test_shares_same_model_not_found_error(self):
"""Engine catches ModelNotFoundError from xlm_roberta_classifier β€”
tagalog module re-uses the same class, so the same except clause catches it."""
from ml.xlm_roberta_classifier import ModelNotFoundError as E1
from ml.tagalog_roberta_classifier import ModelNotFoundError as E2
assert E1 is E2
# ══════════════════════════════════════════════════════════════════════════════
# Part 4 β€” EnsembleClassifier
# ══════════════════════════════════════════════════════════════════════════════
class TestEnsembleClassifier:
def _make_stub(self, probs_list: list[float]):
"""Return a stub classifier whose predict_probs returns fixed probabilities."""
import torch
stub = MagicMock()
stub.predict_probs.return_value = (
torch.tensor(probs_list, dtype=torch.float32),
None,
None,
)
stub._salient_tokens = MagicMock(return_value=["token1"])
return stub
def test_raises_value_error_for_empty_list(self):
from ml.ensemble_classifier import EnsembleClassifier
with pytest.raises(ValueError):
EnsembleClassifier([])
def test_single_classifier_returns_its_prediction(self):
import torch
from ml.ensemble_classifier import EnsembleClassifier
stub = self._make_stub([0.7, 0.2, 0.1])
ens = EnsembleClassifier([stub])
result = ens.predict("any text")
assert result.verdict == "Credible"
assert abs(result.confidence - 70.0) < 1.0
def test_two_classifiers_averages_probabilities(self):
import torch
from ml.ensemble_classifier import EnsembleClassifier
# First: [0.8, 0.1, 0.1] β†’ Credible 80%
# Second: [0.4, 0.5, 0.1] β†’ Unverified 50%
# Average: [0.6, 0.3, 0.1] β†’ Credible 60%
stub1 = self._make_stub([0.8, 0.1, 0.1])
stub2 = self._make_stub([0.4, 0.5, 0.1])
ens = EnsembleClassifier([stub1, stub2])
result = ens.predict("test text")
assert result.verdict == "Credible"
assert abs(result.confidence - 60.0) < 1.5
def test_failing_classifier_gracefully_skipped(self):
import torch
from ml.ensemble_classifier import EnsembleClassifier
good = self._make_stub([0.1, 0.1, 0.8]) # Likely Fake
bad = MagicMock()
bad.predict_probs.side_effect = RuntimeError("model failed")
ens = EnsembleClassifier([good, bad])
result = ens.predict("test text")
# Should still get a result from the good classifier
assert result.verdict == "Likely Fake"
def test_all_classifiers_failing_returns_unverified_neutral(self):
from ml.ensemble_classifier import EnsembleClassifier
bad = MagicMock()
bad.predict_probs.side_effect = RuntimeError("fail")
ens = EnsembleClassifier([bad])
result = ens.predict("test")
assert result.verdict == "Unverified"
assert result.confidence == 33.3
def test_result_has_correct_type(self):
import torch
from ml.ensemble_classifier import EnsembleClassifier
from ml.xlm_roberta_classifier import Layer1Result
stub = self._make_stub([0.5, 0.3, 0.2])
ens = EnsembleClassifier([stub])
result = ens.predict("test")
assert isinstance(result, Layer1Result)
assert isinstance(result.triggered_features, list)
# ══════════════════════════════════════════════════════════════════════════════
# Part 5 β€” NLI Stance Detection
# ══════════════════════════════════════════════════════════════════════════════
class TestNLIStanceDetector:
def _reset_nli_cache(self):
"""Reset the module-level NLI singleton between tests."""
import evidence.stance_detector as mod
mod._nli_pipe = None
mod._nli_loaded = False
def test_falls_through_to_keywords_when_nli_unavailable(self):
"""When NLI model can't be loaded, keyword rules still work."""
import evidence.stance_detector as mod
self._reset_nli_cache()
with patch.object(mod, '_get_nli', return_value=None):
result = mod.detect_stance(
claim="Vaccines are safe",
article_title="Fact check: COVID vaccines proven effective",
article_description="Experts confirm vaccines are safe and effective after extensive testing.",
article_url="",
similarity=0.7,
)
from evidence.stance_detector import Stance
# "confirmed" in article β†’ Supports keyword rule
assert result.stance in (Stance.SUPPORTS, Stance.NOT_ENOUGH_INFO, Stance.REFUTES)
# Should not crash
def test_nli_supports_high_confidence(self):
"""When NLI returns 'supports' at β‰₯0.65, stance is SUPPORTS with NLI reason."""
import evidence.stance_detector as mod
self._reset_nli_cache()
mock_nli = MagicMock()
mock_nli.return_value = {
"labels": ["supports the claim", "contradicts the claim", "unrelated"],
"scores": [0.82, 0.12, 0.06],
}
with patch.object(mod, '_get_nli', return_value=mock_nli):
result = mod.detect_stance(
claim="Government confirmed 500 new cases",
article_title="Government says 500 new cases recorded",
article_description="Officials confirmed today that 500 new cases were recorded nationwide.",
similarity=0.75,
)
from evidence.stance_detector import Stance
assert result.stance == Stance.SUPPORTS
assert "NLI" in result.reason
def test_nli_contradicts_high_confidence(self):
"""When NLI returns 'contradicts' at β‰₯0.65, stance is REFUTES with NLI reason."""
import evidence.stance_detector as mod
self._reset_nli_cache()
mock_nli = MagicMock()
mock_nli.return_value = {
"labels": ["contradicts the claim", "supports the claim", "unrelated"],
"scores": [0.78, 0.15, 0.07],
}
with patch.object(mod, '_get_nli', return_value=mock_nli):
result = mod.detect_stance(
claim="There is no evidence of fraud",
article_title="Evidence of widespread fraud found",
article_description="Investigators found extensive evidence of fraud in the election.",
similarity=0.6,
)
from evidence.stance_detector import Stance
assert result.stance == Stance.REFUTES
assert "NLI" in result.reason
def test_nli_low_confidence_falls_through_to_keywords(self):
"""NLI confidence < 0.65 β€” should fall through and use keyword rules."""
import evidence.stance_detector as mod
self._reset_nli_cache()
mock_nli = MagicMock()
mock_nli.return_value = {
"labels": ["supports the claim", "contradicts the claim", "unrelated"],
"scores": [0.45, 0.35, 0.20], # below 0.65 threshold
}
with patch.object(mod, '_get_nli', return_value=mock_nli):
result = mod.detect_stance(
claim="Senator is guilty of corruption",
article_title="Fact check: False claim about senator",
article_description="This claim has been debunked by multiple fact-checkers.",
similarity=0.5,
)
from evidence.stance_detector import Stance
# Keyword "debunked" should trigger REFUTES
assert result.stance == Stance.REFUTES
def test_short_description_skips_nli(self):
"""Article description shorter than 30 chars β†’ NLI skipped, no error."""
import evidence.stance_detector as mod
self._reset_nli_cache()
mock_nli = MagicMock()
with patch.object(mod, '_get_nli', return_value=mock_nli):
result = mod.detect_stance(
claim="Some claim",
article_title="Short article",
article_description="Short.", # <30 chars
similarity=0.5,
)
# NLI should not have been called
mock_nli.assert_not_called()
# ══════════════════════════════════════════════════════════════════════════════
# Part 6 β€” calamanCy NER Fallback Chain
# ══════════════════════════════════════════════════════════════════════════════
class TestCalamanCyNERFallback:
def _fresh_extractor(self):
"""Return a fresh (unloaded) EntityExtractor."""
import importlib
import nlp.ner
importlib.reload(nlp.ner)
return nlp.ner.EntityExtractor()
def test_falls_back_to_spacy_when_calamancy_missing(self, monkeypatch):
"""When calamancy import fails, _nlp is set via spaCy en_core_web_sm."""
import nlp.ner as mod
extractor = mod.EntityExtractor()
extractor._loaded = False # force reload
# Simulate calamancy not installed
original_load = extractor._load_model.__func__
def patched_load(self):
self._loaded = True
try:
raise ImportError("No module named 'calamancy'")
except ImportError:
try:
import spacy
self._nlp = spacy.load("en_core_web_sm")
except Exception:
self._nlp = None
import types
extractor._load_model = types.MethodType(patched_load, extractor)
extractor._load_model()
# Either spaCy loaded successfully or fell back to None
assert extractor._loaded is True
def test_hint_based_fallback_when_both_unavailable(self):
"""When both calamancy and spaCy fail, hint-based NER still works."""
import nlp.ner as mod
extractor = mod.EntityExtractor()
extractor._loaded = True
extractor._nlp = None # force hint-based path
result = extractor.extract("Sinabi ni Marcos sa Davao tungkol sa DOH")
assert isinstance(result.persons, list)
assert isinstance(result.organizations, list)
assert isinstance(result.locations, list)
# Should find hint-based entities
assert any("Marcos" in p for p in result.persons)
def test_ner_result_method_reflects_path(self):
"""method field on NERResult reflects which extraction path was used."""
import nlp.ner as mod
extractor = mod.EntityExtractor()
extractor._loaded = True
extractor._nlp = None
result = extractor._hint_based_extract("Marcos is in Manila with DOH")
assert result.method == "hints"
def test_extract_with_no_model_returns_ner_result(self):
from nlp.ner import EntityExtractor, NERResult
e = EntityExtractor()
e._loaded = True
e._nlp = None
result = e.extract("DOH confirmed 500 cases in Cebu on January 2026")
assert isinstance(result, NERResult)
assert len(result.dates) > 0 # Should find "January 2026"