Spaces:
Running
Running
Ryan Christian D. Deniega
feat: extension button placement, text extraction, OCR display + ML improvements
c78c2c1 | """ | |
| Tests for the 5 NLP pipeline improvements: | |
| 1. calamanCy NER fallback chain | |
| 2. Tagalog-RoBERTa classifier (ModelNotFoundError) | |
| 3. EnsembleClassifier | |
| 4. EDA augmentation | |
| 5. Sentence-scoring ClaimExtractor | |
| 6. NLI stance detection (Rule 1.5) | |
| """ | |
| import sys | |
| from pathlib import Path | |
| from unittest.mock import patch, MagicMock | |
| import pytest | |
| # Ensure project root is on path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| # ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _make_sample(text: str, label: int = 0): | |
| from ml.dataset import Sample | |
| return Sample(text=text, label=label) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 1 β EDA Augmentation | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestEDAugmentation: | |
| def test_empty_input_returns_empty(self): | |
| from ml.dataset import augment_samples | |
| assert augment_samples([]) == [] | |
| def test_augment_produces_two_variants_per_sample(self): | |
| from ml.dataset import augment_samples | |
| samples = [_make_sample("DOH confirms 500 new COVID cases today", 0)] | |
| aug = augment_samples(samples, seed=42) | |
| # One deletion + one swap variant per sample | |
| assert len(aug) == 2 | |
| def test_augmented_labels_match_originals(self): | |
| from ml.dataset import augment_samples | |
| samples = [ | |
| _make_sample("Senate passes new bill on health care reform", 0), | |
| _make_sample("SHOCKING truth about vaccines hidden by government", 2), | |
| ] | |
| aug = augment_samples(samples, seed=42) | |
| orig_labels = {s.label for s in samples} | |
| for a in aug: | |
| assert a.label in orig_labels | |
| def test_short_samples_skipped(self): | |
| from ml.dataset import augment_samples | |
| samples = [ | |
| _make_sample("ok", 1), # 1 word β too short | |
| _make_sample("fake news", 2), # 2 words β too short | |
| ] | |
| aug = augment_samples(samples, seed=42) | |
| assert aug == [] | |
| def test_augmented_texts_differ_from_original(self): | |
| from ml.dataset import augment_samples | |
| original = "GRABE sinabi ng DOH na 200 bata ang nagkasakit sa bagong virus" | |
| samples = [_make_sample(original, 2)] | |
| aug = augment_samples(samples, seed=99) | |
| # At least one variant should differ | |
| assert any(a.text != original for a in aug) | |
| def test_augment_triples_training_set_size(self): | |
| from ml.dataset import get_split, augment_samples | |
| train, _ = get_split() | |
| aug = augment_samples(train, seed=42) | |
| # aug should be at most 2Γ train size (some short samples may be skipped) | |
| assert len(aug) >= len(train) | |
| assert len(aug) <= 2 * len(train) | |
| def test_augmented_samples_are_non_empty(self): | |
| from ml.dataset import augment_samples | |
| samples = [_make_sample("The senator confirmed signing the new law today", 0)] | |
| aug = augment_samples(samples, seed=42) | |
| for a in aug: | |
| assert len(a.text.strip()) > 0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 2 β Sentence-scoring ClaimExtractor | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestClaimExtractor: | |
| def test_instantiates_without_loading_model(self): | |
| """New ClaimExtractor has no lazy model loading at all.""" | |
| from nlp.claim_extractor import ClaimExtractor | |
| ce = ClaimExtractor() | |
| # No _pipe, no _loaded attributes | |
| assert not hasattr(ce, '_pipe') | |
| assert not hasattr(ce, '_loaded') | |
| def test_passthrough_for_short_text(self): | |
| from nlp.claim_extractor import ClaimExtractor | |
| result = ClaimExtractor().extract("hi") | |
| assert result.method == "passthrough" | |
| assert result.claim == "hi" | |
| def test_sentence_scoring_method_on_informative_sentence(self): | |
| from nlp.claim_extractor import ClaimExtractor | |
| # Has a date, a verb, and named org β should score high | |
| text = "GRABE! Sinabi ng DOH noong Martes na 200 bata ang nagkasakit sa bagong virus sa Maynila." | |
| result = ClaimExtractor().extract(text) | |
| # Should pick the DOH sentence, not all text or just "GRABE!" | |
| assert result.method == "sentence_scoring" | |
| assert "DOH" in result.claim or "200" in result.claim | |
| def test_heuristic_fallback_when_no_scored_sentences(self): | |
| from nlp.claim_extractor import ClaimExtractor | |
| # Text with no dates, no numbers, no verbs | |
| text = "Wow amazing incredible unbelievable spectacular incomprehensible." | |
| result = ClaimExtractor().extract(text) | |
| assert result.method in ("sentence_heuristic", "sentence_scoring") | |
| def test_returns_claim_result_dataclass(self): | |
| from nlp.claim_extractor import ClaimExtractor, ClaimResult | |
| result = ClaimExtractor().extract("The president signed the new healthcare law today.") | |
| assert isinstance(result, ClaimResult) | |
| assert isinstance(result.claim, str) | |
| assert isinstance(result.method, str) | |
| def test_picks_specific_sentence_over_clickbait_opener(self): | |
| from nlp.claim_extractor import ClaimExtractor | |
| text = "OMG! Natuklasan ng mga siyentipiko na 5,000 tao ang namatay dahil sa bagong sakit ngayong Enero." | |
| result = ClaimExtractor().extract(text) | |
| # The specific claim (5000 deaths) should be preferred over "OMG!" | |
| assert "5,000" in result.claim or "siyentipiko" in result.claim or result.method == "sentence_scoring" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 3 β TagalogRobertaClassifier | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestTagalogRobertaClassifier: | |
| def test_raises_model_not_found_when_checkpoint_missing(self, tmp_path, monkeypatch): | |
| """ModelNotFoundError raised when checkpoint directory doesn't exist.""" | |
| import ml.tagalog_roberta_classifier as mod | |
| monkeypatch.setattr(mod, "MODEL_DIR", tmp_path / "nonexistent_model") | |
| with pytest.raises(mod.ModelNotFoundError): | |
| mod.TagalogRobertaClassifier() | |
| def test_model_not_found_is_subclass_of_file_not_found(self): | |
| from ml.xlm_roberta_classifier import ModelNotFoundError | |
| assert issubclass(ModelNotFoundError, FileNotFoundError) | |
| def test_shares_same_model_not_found_error(self): | |
| """Engine catches ModelNotFoundError from xlm_roberta_classifier β | |
| tagalog module re-uses the same class, so the same except clause catches it.""" | |
| from ml.xlm_roberta_classifier import ModelNotFoundError as E1 | |
| from ml.tagalog_roberta_classifier import ModelNotFoundError as E2 | |
| assert E1 is E2 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 4 β EnsembleClassifier | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestEnsembleClassifier: | |
| def _make_stub(self, probs_list: list[float]): | |
| """Return a stub classifier whose predict_probs returns fixed probabilities.""" | |
| import torch | |
| stub = MagicMock() | |
| stub.predict_probs.return_value = ( | |
| torch.tensor(probs_list, dtype=torch.float32), | |
| None, | |
| None, | |
| ) | |
| stub._salient_tokens = MagicMock(return_value=["token1"]) | |
| return stub | |
| def test_raises_value_error_for_empty_list(self): | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| with pytest.raises(ValueError): | |
| EnsembleClassifier([]) | |
| def test_single_classifier_returns_its_prediction(self): | |
| import torch | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| stub = self._make_stub([0.7, 0.2, 0.1]) | |
| ens = EnsembleClassifier([stub]) | |
| result = ens.predict("any text") | |
| assert result.verdict == "Credible" | |
| assert abs(result.confidence - 70.0) < 1.0 | |
| def test_two_classifiers_averages_probabilities(self): | |
| import torch | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| # First: [0.8, 0.1, 0.1] β Credible 80% | |
| # Second: [0.4, 0.5, 0.1] β Unverified 50% | |
| # Average: [0.6, 0.3, 0.1] β Credible 60% | |
| stub1 = self._make_stub([0.8, 0.1, 0.1]) | |
| stub2 = self._make_stub([0.4, 0.5, 0.1]) | |
| ens = EnsembleClassifier([stub1, stub2]) | |
| result = ens.predict("test text") | |
| assert result.verdict == "Credible" | |
| assert abs(result.confidence - 60.0) < 1.5 | |
| def test_failing_classifier_gracefully_skipped(self): | |
| import torch | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| good = self._make_stub([0.1, 0.1, 0.8]) # Likely Fake | |
| bad = MagicMock() | |
| bad.predict_probs.side_effect = RuntimeError("model failed") | |
| ens = EnsembleClassifier([good, bad]) | |
| result = ens.predict("test text") | |
| # Should still get a result from the good classifier | |
| assert result.verdict == "Likely Fake" | |
| def test_all_classifiers_failing_returns_unverified_neutral(self): | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| bad = MagicMock() | |
| bad.predict_probs.side_effect = RuntimeError("fail") | |
| ens = EnsembleClassifier([bad]) | |
| result = ens.predict("test") | |
| assert result.verdict == "Unverified" | |
| assert result.confidence == 33.3 | |
| def test_result_has_correct_type(self): | |
| import torch | |
| from ml.ensemble_classifier import EnsembleClassifier | |
| from ml.xlm_roberta_classifier import Layer1Result | |
| stub = self._make_stub([0.5, 0.3, 0.2]) | |
| ens = EnsembleClassifier([stub]) | |
| result = ens.predict("test") | |
| assert isinstance(result, Layer1Result) | |
| assert isinstance(result.triggered_features, list) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 5 β NLI Stance Detection | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestNLIStanceDetector: | |
| def _reset_nli_cache(self): | |
| """Reset the module-level NLI singleton between tests.""" | |
| import evidence.stance_detector as mod | |
| mod._nli_pipe = None | |
| mod._nli_loaded = False | |
| def test_falls_through_to_keywords_when_nli_unavailable(self): | |
| """When NLI model can't be loaded, keyword rules still work.""" | |
| import evidence.stance_detector as mod | |
| self._reset_nli_cache() | |
| with patch.object(mod, '_get_nli', return_value=None): | |
| result = mod.detect_stance( | |
| claim="Vaccines are safe", | |
| article_title="Fact check: COVID vaccines proven effective", | |
| article_description="Experts confirm vaccines are safe and effective after extensive testing.", | |
| article_url="", | |
| similarity=0.7, | |
| ) | |
| from evidence.stance_detector import Stance | |
| # "confirmed" in article β Supports keyword rule | |
| assert result.stance in (Stance.SUPPORTS, Stance.NOT_ENOUGH_INFO, Stance.REFUTES) | |
| # Should not crash | |
| def test_nli_supports_high_confidence(self): | |
| """When NLI returns 'supports' at β₯0.65, stance is SUPPORTS with NLI reason.""" | |
| import evidence.stance_detector as mod | |
| self._reset_nli_cache() | |
| mock_nli = MagicMock() | |
| mock_nli.return_value = { | |
| "labels": ["supports the claim", "contradicts the claim", "unrelated"], | |
| "scores": [0.82, 0.12, 0.06], | |
| } | |
| with patch.object(mod, '_get_nli', return_value=mock_nli): | |
| result = mod.detect_stance( | |
| claim="Government confirmed 500 new cases", | |
| article_title="Government says 500 new cases recorded", | |
| article_description="Officials confirmed today that 500 new cases were recorded nationwide.", | |
| similarity=0.75, | |
| ) | |
| from evidence.stance_detector import Stance | |
| assert result.stance == Stance.SUPPORTS | |
| assert "NLI" in result.reason | |
| def test_nli_contradicts_high_confidence(self): | |
| """When NLI returns 'contradicts' at β₯0.65, stance is REFUTES with NLI reason.""" | |
| import evidence.stance_detector as mod | |
| self._reset_nli_cache() | |
| mock_nli = MagicMock() | |
| mock_nli.return_value = { | |
| "labels": ["contradicts the claim", "supports the claim", "unrelated"], | |
| "scores": [0.78, 0.15, 0.07], | |
| } | |
| with patch.object(mod, '_get_nli', return_value=mock_nli): | |
| result = mod.detect_stance( | |
| claim="There is no evidence of fraud", | |
| article_title="Evidence of widespread fraud found", | |
| article_description="Investigators found extensive evidence of fraud in the election.", | |
| similarity=0.6, | |
| ) | |
| from evidence.stance_detector import Stance | |
| assert result.stance == Stance.REFUTES | |
| assert "NLI" in result.reason | |
| def test_nli_low_confidence_falls_through_to_keywords(self): | |
| """NLI confidence < 0.65 β should fall through and use keyword rules.""" | |
| import evidence.stance_detector as mod | |
| self._reset_nli_cache() | |
| mock_nli = MagicMock() | |
| mock_nli.return_value = { | |
| "labels": ["supports the claim", "contradicts the claim", "unrelated"], | |
| "scores": [0.45, 0.35, 0.20], # below 0.65 threshold | |
| } | |
| with patch.object(mod, '_get_nli', return_value=mock_nli): | |
| result = mod.detect_stance( | |
| claim="Senator is guilty of corruption", | |
| article_title="Fact check: False claim about senator", | |
| article_description="This claim has been debunked by multiple fact-checkers.", | |
| similarity=0.5, | |
| ) | |
| from evidence.stance_detector import Stance | |
| # Keyword "debunked" should trigger REFUTES | |
| assert result.stance == Stance.REFUTES | |
| def test_short_description_skips_nli(self): | |
| """Article description shorter than 30 chars β NLI skipped, no error.""" | |
| import evidence.stance_detector as mod | |
| self._reset_nli_cache() | |
| mock_nli = MagicMock() | |
| with patch.object(mod, '_get_nli', return_value=mock_nli): | |
| result = mod.detect_stance( | |
| claim="Some claim", | |
| article_title="Short article", | |
| article_description="Short.", # <30 chars | |
| similarity=0.5, | |
| ) | |
| # NLI should not have been called | |
| mock_nli.assert_not_called() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Part 6 β calamanCy NER Fallback Chain | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestCalamanCyNERFallback: | |
| def _fresh_extractor(self): | |
| """Return a fresh (unloaded) EntityExtractor.""" | |
| import importlib | |
| import nlp.ner | |
| importlib.reload(nlp.ner) | |
| return nlp.ner.EntityExtractor() | |
| def test_falls_back_to_spacy_when_calamancy_missing(self, monkeypatch): | |
| """When calamancy import fails, _nlp is set via spaCy en_core_web_sm.""" | |
| import nlp.ner as mod | |
| extractor = mod.EntityExtractor() | |
| extractor._loaded = False # force reload | |
| # Simulate calamancy not installed | |
| original_load = extractor._load_model.__func__ | |
| def patched_load(self): | |
| self._loaded = True | |
| try: | |
| raise ImportError("No module named 'calamancy'") | |
| except ImportError: | |
| try: | |
| import spacy | |
| self._nlp = spacy.load("en_core_web_sm") | |
| except Exception: | |
| self._nlp = None | |
| import types | |
| extractor._load_model = types.MethodType(patched_load, extractor) | |
| extractor._load_model() | |
| # Either spaCy loaded successfully or fell back to None | |
| assert extractor._loaded is True | |
| def test_hint_based_fallback_when_both_unavailable(self): | |
| """When both calamancy and spaCy fail, hint-based NER still works.""" | |
| import nlp.ner as mod | |
| extractor = mod.EntityExtractor() | |
| extractor._loaded = True | |
| extractor._nlp = None # force hint-based path | |
| result = extractor.extract("Sinabi ni Marcos sa Davao tungkol sa DOH") | |
| assert isinstance(result.persons, list) | |
| assert isinstance(result.organizations, list) | |
| assert isinstance(result.locations, list) | |
| # Should find hint-based entities | |
| assert any("Marcos" in p for p in result.persons) | |
| def test_ner_result_method_reflects_path(self): | |
| """method field on NERResult reflects which extraction path was used.""" | |
| import nlp.ner as mod | |
| extractor = mod.EntityExtractor() | |
| extractor._loaded = True | |
| extractor._nlp = None | |
| result = extractor._hint_based_extract("Marcos is in Manila with DOH") | |
| assert result.method == "hints" | |
| def test_extract_with_no_model_returns_ner_result(self): | |
| from nlp.ner import EntityExtractor, NERResult | |
| e = EntityExtractor() | |
| e._loaded = True | |
| e._nlp = None | |
| result = e.extract("DOH confirmed 500 cases in Cebu on January 2026") | |
| assert isinstance(result, NERResult) | |
| assert len(result.dates) > 0 # Should find "January 2026" | |