File size: 19,604 Bytes
c78c2c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
"""
Tests for the 5 NLP pipeline improvements:
  1. calamanCy NER fallback chain
  2. Tagalog-RoBERTa classifier (ModelNotFoundError)
  3. EnsembleClassifier
  4. EDA augmentation
  5. Sentence-scoring ClaimExtractor
  6. NLI stance detection (Rule 1.5)
"""
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock

import pytest

# Ensure project root is on path
sys.path.insert(0, str(Path(__file__).parent.parent))


# ── Helpers ───────────────────────────────────────────────────────────────────

def _make_sample(text: str, label: int = 0):
    from ml.dataset import Sample
    return Sample(text=text, label=label)


# ══════════════════════════════════════════════════════════════════════════════
# Part 1 β€” EDA Augmentation
# ══════════════════════════════════════════════════════════════════════════════

class TestEDAugmentation:
    def test_empty_input_returns_empty(self):
        from ml.dataset import augment_samples
        assert augment_samples([]) == []

    def test_augment_produces_two_variants_per_sample(self):
        from ml.dataset import augment_samples
        samples = [_make_sample("DOH confirms 500 new COVID cases today", 0)]
        aug = augment_samples(samples, seed=42)
        # One deletion + one swap variant per sample
        assert len(aug) == 2

    def test_augmented_labels_match_originals(self):
        from ml.dataset import augment_samples
        samples = [
            _make_sample("Senate passes new bill on health care reform", 0),
            _make_sample("SHOCKING truth about vaccines hidden by government", 2),
        ]
        aug = augment_samples(samples, seed=42)
        orig_labels = {s.label for s in samples}
        for a in aug:
            assert a.label in orig_labels

    def test_short_samples_skipped(self):
        from ml.dataset import augment_samples
        samples = [
            _make_sample("ok", 1),          # 1 word β€” too short
            _make_sample("fake news", 2),   # 2 words β€” too short
        ]
        aug = augment_samples(samples, seed=42)
        assert aug == []

    def test_augmented_texts_differ_from_original(self):
        from ml.dataset import augment_samples
        original = "GRABE sinabi ng DOH na 200 bata ang nagkasakit sa bagong virus"
        samples = [_make_sample(original, 2)]
        aug = augment_samples(samples, seed=99)
        # At least one variant should differ
        assert any(a.text != original for a in aug)

    def test_augment_triples_training_set_size(self):
        from ml.dataset import get_split, augment_samples
        train, _ = get_split()
        aug = augment_samples(train, seed=42)
        # aug should be at most 2Γ— train size (some short samples may be skipped)
        assert len(aug) >= len(train)
        assert len(aug) <= 2 * len(train)

    def test_augmented_samples_are_non_empty(self):
        from ml.dataset import augment_samples
        samples = [_make_sample("The senator confirmed signing the new law today", 0)]
        aug = augment_samples(samples, seed=42)
        for a in aug:
            assert len(a.text.strip()) > 0


# ══════════════════════════════════════════════════════════════════════════════
# Part 2 β€” Sentence-scoring ClaimExtractor
# ══════════════════════════════════════════════════════════════════════════════

class TestClaimExtractor:
    def test_instantiates_without_loading_model(self):
        """New ClaimExtractor has no lazy model loading at all."""
        from nlp.claim_extractor import ClaimExtractor
        ce = ClaimExtractor()
        # No _pipe, no _loaded attributes
        assert not hasattr(ce, '_pipe')
        assert not hasattr(ce, '_loaded')

    def test_passthrough_for_short_text(self):
        from nlp.claim_extractor import ClaimExtractor
        result = ClaimExtractor().extract("hi")
        assert result.method == "passthrough"
        assert result.claim == "hi"

    def test_sentence_scoring_method_on_informative_sentence(self):
        from nlp.claim_extractor import ClaimExtractor
        # Has a date, a verb, and named org β€” should score high
        text = "GRABE! Sinabi ng DOH noong Martes na 200 bata ang nagkasakit sa bagong virus sa Maynila."
        result = ClaimExtractor().extract(text)
        # Should pick the DOH sentence, not all text or just "GRABE!"
        assert result.method == "sentence_scoring"
        assert "DOH" in result.claim or "200" in result.claim

    def test_heuristic_fallback_when_no_scored_sentences(self):
        from nlp.claim_extractor import ClaimExtractor
        # Text with no dates, no numbers, no verbs
        text = "Wow amazing incredible unbelievable spectacular incomprehensible."
        result = ClaimExtractor().extract(text)
        assert result.method in ("sentence_heuristic", "sentence_scoring")

    def test_returns_claim_result_dataclass(self):
        from nlp.claim_extractor import ClaimExtractor, ClaimResult
        result = ClaimExtractor().extract("The president signed the new healthcare law today.")
        assert isinstance(result, ClaimResult)
        assert isinstance(result.claim, str)
        assert isinstance(result.method, str)

    def test_picks_specific_sentence_over_clickbait_opener(self):
        from nlp.claim_extractor import ClaimExtractor
        text = "OMG! Natuklasan ng mga siyentipiko na 5,000 tao ang namatay dahil sa bagong sakit ngayong Enero."
        result = ClaimExtractor().extract(text)
        # The specific claim (5000 deaths) should be preferred over "OMG!"
        assert "5,000" in result.claim or "siyentipiko" in result.claim or result.method == "sentence_scoring"


# ══════════════════════════════════════════════════════════════════════════════
# Part 3 β€” TagalogRobertaClassifier
# ══════════════════════════════════════════════════════════════════════════════

class TestTagalogRobertaClassifier:
    def test_raises_model_not_found_when_checkpoint_missing(self, tmp_path, monkeypatch):
        """ModelNotFoundError raised when checkpoint directory doesn't exist."""
        import ml.tagalog_roberta_classifier as mod
        monkeypatch.setattr(mod, "MODEL_DIR", tmp_path / "nonexistent_model")
        with pytest.raises(mod.ModelNotFoundError):
            mod.TagalogRobertaClassifier()

    def test_model_not_found_is_subclass_of_file_not_found(self):
        from ml.xlm_roberta_classifier import ModelNotFoundError
        assert issubclass(ModelNotFoundError, FileNotFoundError)

    def test_shares_same_model_not_found_error(self):
        """Engine catches ModelNotFoundError from xlm_roberta_classifier β€”
        tagalog module re-uses the same class, so the same except clause catches it."""
        from ml.xlm_roberta_classifier import ModelNotFoundError as E1
        from ml.tagalog_roberta_classifier import ModelNotFoundError as E2
        assert E1 is E2


# ══════════════════════════════════════════════════════════════════════════════
# Part 4 β€” EnsembleClassifier
# ══════════════════════════════════════════════════════════════════════════════

class TestEnsembleClassifier:
    def _make_stub(self, probs_list: list[float]):
        """Return a stub classifier whose predict_probs returns fixed probabilities."""
        import torch
        stub = MagicMock()
        stub.predict_probs.return_value = (
            torch.tensor(probs_list, dtype=torch.float32),
            None,
            None,
        )
        stub._salient_tokens = MagicMock(return_value=["token1"])
        return stub

    def test_raises_value_error_for_empty_list(self):
        from ml.ensemble_classifier import EnsembleClassifier
        with pytest.raises(ValueError):
            EnsembleClassifier([])

    def test_single_classifier_returns_its_prediction(self):
        import torch
        from ml.ensemble_classifier import EnsembleClassifier
        stub = self._make_stub([0.7, 0.2, 0.1])
        ens = EnsembleClassifier([stub])
        result = ens.predict("any text")
        assert result.verdict == "Credible"
        assert abs(result.confidence - 70.0) < 1.0

    def test_two_classifiers_averages_probabilities(self):
        import torch
        from ml.ensemble_classifier import EnsembleClassifier
        # First: [0.8, 0.1, 0.1] β†’ Credible 80%
        # Second: [0.4, 0.5, 0.1] β†’ Unverified 50%
        # Average: [0.6, 0.3, 0.1] β†’ Credible 60%
        stub1 = self._make_stub([0.8, 0.1, 0.1])
        stub2 = self._make_stub([0.4, 0.5, 0.1])
        ens = EnsembleClassifier([stub1, stub2])
        result = ens.predict("test text")
        assert result.verdict == "Credible"
        assert abs(result.confidence - 60.0) < 1.5

    def test_failing_classifier_gracefully_skipped(self):
        import torch
        from ml.ensemble_classifier import EnsembleClassifier
        good = self._make_stub([0.1, 0.1, 0.8])  # Likely Fake
        bad = MagicMock()
        bad.predict_probs.side_effect = RuntimeError("model failed")
        ens = EnsembleClassifier([good, bad])
        result = ens.predict("test text")
        # Should still get a result from the good classifier
        assert result.verdict == "Likely Fake"

    def test_all_classifiers_failing_returns_unverified_neutral(self):
        from ml.ensemble_classifier import EnsembleClassifier
        bad = MagicMock()
        bad.predict_probs.side_effect = RuntimeError("fail")
        ens = EnsembleClassifier([bad])
        result = ens.predict("test")
        assert result.verdict == "Unverified"
        assert result.confidence == 33.3

    def test_result_has_correct_type(self):
        import torch
        from ml.ensemble_classifier import EnsembleClassifier
        from ml.xlm_roberta_classifier import Layer1Result
        stub = self._make_stub([0.5, 0.3, 0.2])
        ens = EnsembleClassifier([stub])
        result = ens.predict("test")
        assert isinstance(result, Layer1Result)
        assert isinstance(result.triggered_features, list)


# ══════════════════════════════════════════════════════════════════════════════
# Part 5 β€” NLI Stance Detection
# ══════════════════════════════════════════════════════════════════════════════

class TestNLIStanceDetector:
    def _reset_nli_cache(self):
        """Reset the module-level NLI singleton between tests."""
        import evidence.stance_detector as mod
        mod._nli_pipe = None
        mod._nli_loaded = False

    def test_falls_through_to_keywords_when_nli_unavailable(self):
        """When NLI model can't be loaded, keyword rules still work."""
        import evidence.stance_detector as mod
        self._reset_nli_cache()
        with patch.object(mod, '_get_nli', return_value=None):
            result = mod.detect_stance(
                claim="Vaccines are safe",
                article_title="Fact check: COVID vaccines proven effective",
                article_description="Experts confirm vaccines are safe and effective after extensive testing.",
                article_url="",
                similarity=0.7,
            )
        from evidence.stance_detector import Stance
        # "confirmed" in article β†’ Supports keyword rule
        assert result.stance in (Stance.SUPPORTS, Stance.NOT_ENOUGH_INFO, Stance.REFUTES)
        # Should not crash

    def test_nli_supports_high_confidence(self):
        """When NLI returns 'supports' at β‰₯0.65, stance is SUPPORTS with NLI reason."""
        import evidence.stance_detector as mod
        self._reset_nli_cache()
        mock_nli = MagicMock()
        mock_nli.return_value = {
            "labels": ["supports the claim", "contradicts the claim", "unrelated"],
            "scores": [0.82, 0.12, 0.06],
        }
        with patch.object(mod, '_get_nli', return_value=mock_nli):
            result = mod.detect_stance(
                claim="Government confirmed 500 new cases",
                article_title="Government says 500 new cases recorded",
                article_description="Officials confirmed today that 500 new cases were recorded nationwide.",
                similarity=0.75,
            )
        from evidence.stance_detector import Stance
        assert result.stance == Stance.SUPPORTS
        assert "NLI" in result.reason

    def test_nli_contradicts_high_confidence(self):
        """When NLI returns 'contradicts' at β‰₯0.65, stance is REFUTES with NLI reason."""
        import evidence.stance_detector as mod
        self._reset_nli_cache()
        mock_nli = MagicMock()
        mock_nli.return_value = {
            "labels": ["contradicts the claim", "supports the claim", "unrelated"],
            "scores": [0.78, 0.15, 0.07],
        }
        with patch.object(mod, '_get_nli', return_value=mock_nli):
            result = mod.detect_stance(
                claim="There is no evidence of fraud",
                article_title="Evidence of widespread fraud found",
                article_description="Investigators found extensive evidence of fraud in the election.",
                similarity=0.6,
            )
        from evidence.stance_detector import Stance
        assert result.stance == Stance.REFUTES
        assert "NLI" in result.reason

    def test_nli_low_confidence_falls_through_to_keywords(self):
        """NLI confidence < 0.65 β€” should fall through and use keyword rules."""
        import evidence.stance_detector as mod
        self._reset_nli_cache()
        mock_nli = MagicMock()
        mock_nli.return_value = {
            "labels": ["supports the claim", "contradicts the claim", "unrelated"],
            "scores": [0.45, 0.35, 0.20],  # below 0.65 threshold
        }
        with patch.object(mod, '_get_nli', return_value=mock_nli):
            result = mod.detect_stance(
                claim="Senator is guilty of corruption",
                article_title="Fact check: False claim about senator",
                article_description="This claim has been debunked by multiple fact-checkers.",
                similarity=0.5,
            )
        from evidence.stance_detector import Stance
        # Keyword "debunked" should trigger REFUTES
        assert result.stance == Stance.REFUTES

    def test_short_description_skips_nli(self):
        """Article description shorter than 30 chars β†’ NLI skipped, no error."""
        import evidence.stance_detector as mod
        self._reset_nli_cache()
        mock_nli = MagicMock()
        with patch.object(mod, '_get_nli', return_value=mock_nli):
            result = mod.detect_stance(
                claim="Some claim",
                article_title="Short article",
                article_description="Short.",  # <30 chars
                similarity=0.5,
            )
        # NLI should not have been called
        mock_nli.assert_not_called()


# ══════════════════════════════════════════════════════════════════════════════
# Part 6 β€” calamanCy NER Fallback Chain
# ══════════════════════════════════════════════════════════════════════════════

class TestCalamanCyNERFallback:
    def _fresh_extractor(self):
        """Return a fresh (unloaded) EntityExtractor."""
        import importlib
        import nlp.ner
        importlib.reload(nlp.ner)
        return nlp.ner.EntityExtractor()

    def test_falls_back_to_spacy_when_calamancy_missing(self, monkeypatch):
        """When calamancy import fails, _nlp is set via spaCy en_core_web_sm."""
        import nlp.ner as mod
        extractor = mod.EntityExtractor()
        extractor._loaded = False  # force reload

        # Simulate calamancy not installed
        original_load = extractor._load_model.__func__

        def patched_load(self):
            self._loaded = True
            try:
                raise ImportError("No module named 'calamancy'")
            except ImportError:
                try:
                    import spacy
                    self._nlp = spacy.load("en_core_web_sm")
                except Exception:
                    self._nlp = None

        import types
        extractor._load_model = types.MethodType(patched_load, extractor)
        extractor._load_model()
        # Either spaCy loaded successfully or fell back to None
        assert extractor._loaded is True

    def test_hint_based_fallback_when_both_unavailable(self):
        """When both calamancy and spaCy fail, hint-based NER still works."""
        import nlp.ner as mod
        extractor = mod.EntityExtractor()
        extractor._loaded = True
        extractor._nlp = None  # force hint-based path

        result = extractor.extract("Sinabi ni Marcos sa Davao tungkol sa DOH")
        assert isinstance(result.persons, list)
        assert isinstance(result.organizations, list)
        assert isinstance(result.locations, list)
        # Should find hint-based entities
        assert any("Marcos" in p for p in result.persons)

    def test_ner_result_method_reflects_path(self):
        """method field on NERResult reflects which extraction path was used."""
        import nlp.ner as mod
        extractor = mod.EntityExtractor()
        extractor._loaded = True
        extractor._nlp = None

        result = extractor._hint_based_extract("Marcos is in Manila with DOH")
        assert result.method == "hints"

    def test_extract_with_no_model_returns_ner_result(self):
        from nlp.ner import EntityExtractor, NERResult
        e = EntityExtractor()
        e._loaded = True
        e._nlp = None
        result = e.extract("DOH confirmed 500 cases in Cebu on January 2026")
        assert isinstance(result, NERResult)
        assert len(result.dates) > 0  # Should find "January 2026"