File size: 14,504 Bytes
7302343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
"""Citation validator tests — 100% coverage target (load-bearing per ADR-0003)."""

from __future__ import annotations

import pytest

from llm.validation import (
    ValidationResult,
    contains_ev_reference,
    is_substantive,
    parse_ev_references,
    split_sentences,
    uncited_substantive_sentences,
    validate_citations,
)
from orchestrator.tools import EvidenceAccumulator, ToolResult

# === Helpers =============================================================


def _result(
    tool: str = "policy_match",
    status: str = "success",
    summary: str = "ok",
) -> ToolResult:
    return ToolResult(
        tool=tool,  # type: ignore[arg-type]
        status=status,  # type: ignore[arg-type]
        summary=summary,
        latency_ms=10,
    )


def _acc(*results: ToolResult) -> EvidenceAccumulator:
    acc = EvidenceAccumulator()
    for r in results:
        acc.append(r)
    return acc


def _acc_3_success() -> EvidenceAccumulator:
    """Standard 3-entry accumulator: ev-1, ev-2, ev-3 all success."""
    return _acc(
        _result("policy_match", summary="rule matched"),
        _result("report_velocity", summary="3 reports in 5min"),
        _result("user_history", summary="2 prior violations"),
    )


# === parse_ev_references =================================================


class TestParseEvReferences:
    def test_single_reference(self) -> None:
        assert parse_ev_references("violated rule [ev-1]") == {"ev-1"}

    def test_multiple_references(self) -> None:
        text = "matched [ev-1] and history [ev-3] shows pattern [ev-2]"
        assert parse_ev_references(text) == {"ev-1", "ev-2", "ev-3"}

    def test_duplicate_references_deduplicated(self) -> None:
        text = "per [ev-1] and again [ev-1]"
        assert parse_ev_references(text) == {"ev-1"}

    def test_no_references(self) -> None:
        assert parse_ev_references("no citations here") == set()

    def test_empty_string(self) -> None:
        assert parse_ev_references("") == set()

    def test_malformed_not_matched(self) -> None:
        assert parse_ev_references("[ev-] [ev-abc] ev-1 [EV-1]") == set()

    def test_high_ids(self) -> None:
        assert parse_ev_references("[ev-42] [ev-100]") == {"ev-42", "ev-100"}

    def test_adjacent_references(self) -> None:
        assert parse_ev_references("[ev-1][ev-2]") == {"ev-1", "ev-2"}


# === split_sentences ======================================================


class TestSplitSentences:
    def test_basic_split(self) -> None:
        text = "First sentence. Second sentence. Third one."
        assert split_sentences(text) == [
            "First sentence.",
            "Second sentence.",
            "Third one.",
        ]

    def test_question_and_exclamation(self) -> None:
        text = "Is this spam? Yes it is! Clearly."
        result = split_sentences(text)
        assert len(result) == 3

    def test_single_sentence(self) -> None:
        assert split_sentences("Just one.") == ["Just one."]

    def test_empty_string(self) -> None:
        assert split_sentences("") == []

    def test_preserves_ev_references(self) -> None:
        text = "Matched rule [ev-1]. History shows [ev-2]."
        sentences = split_sentences(text)
        assert any("[ev-1]" in s for s in sentences)
        assert any("[ev-2]" in s for s in sentences)


# === is_substantive =======================================================


class TestIsSubstantive:
    def test_factual_claim_is_substantive(self) -> None:
        assert is_substantive("Author has 3 prior removals in this subreddit") is True

    def test_framing_not_substantive(self) -> None:
        assert is_substantive("In summary, the evidence shows:") is False
        assert is_substantive("Based on the above, we conclude:") is False
        assert is_substantive("Overall, the analysis indicates:") is False
        assert is_substantive("In conclusion, this is clear.") is False
        assert is_substantive("To summarize, the pattern holds.") is False
        assert is_substantive("Given the above, removal is warranted.") is False
        assert is_substantive("Considering the evidence, this is clear.") is False

    def test_recommendation_not_substantive(self) -> None:
        assert is_substantive("Recommend: Remove this content.") is False
        assert is_substantive("Verdict: REMOVE") is False
        assert is_substantive("Action: Escalate to senior mod.") is False
        assert is_substantive("Suggestion: lock the thread.") is False

    def test_short_fragment_not_substantive(self) -> None:
        assert is_substantive("REMOVE") is False
        assert is_substantive("High risk.") is False
        assert is_substantive("See above.") is False
        assert is_substantive("No match found.") is False

    def test_six_word_sentence_is_substantive(self) -> None:
        assert is_substantive("The author posted spam three times") is True

    def test_case_insensitive_framing(self) -> None:
        assert is_substantive("IN SUMMARY, the evidence is clear.") is False
        assert is_substantive("BASED ON THE ABOVE, remove it.") is False


# === contains_ev_reference ================================================


class TestContainsEvReference:
    def test_has_reference(self) -> None:
        assert contains_ev_reference("history shows [ev-2]") is True

    def test_no_reference(self) -> None:
        assert contains_ev_reference("no citation here") is False

    def test_multiple_references(self) -> None:
        assert contains_ev_reference("[ev-1] and [ev-3]") is True


# === uncited_substantive_sentences ========================================


class TestUncitedSubstantiveSentences:
    def test_all_cited_returns_empty(self) -> None:
        text = "Author has 3 violations [ev-1]. Thread is heated [ev-2]."
        assert uncited_substantive_sentences(text) == []

    def test_uncited_claim_returned(self) -> None:
        text = (
            "Author has 3 violations [ev-1]. "
            "The content is clearly toxic and harmful to the community."
        )
        result = uncited_substantive_sentences(text)
        assert len(result) == 1
        assert "clearly toxic" in result[0]

    def test_framing_without_citation_ok(self) -> None:
        text = "In summary, the evidence is clear. Author violated rules [ev-1]."
        assert uncited_substantive_sentences(text) == []

    def test_recommendation_without_citation_ok(self) -> None:
        text = "Author has history [ev-1]. Recommend: Remove."
        assert uncited_substantive_sentences(text) == []

    def test_short_fragment_without_citation_ok(self) -> None:
        text = "Author has history [ev-1]. High risk."
        assert uncited_substantive_sentences(text) == []


# === ValidationResult =====================================================


class TestValidationResult:
    def test_ok(self) -> None:
        r = ValidationResult.ok()
        assert r.passed is True
        assert r.reason == ""

    def test_failed(self) -> None:
        r = ValidationResult.failed("bad", ids=["ev-99"])
        assert r.passed is False
        assert r.reason == "bad"
        assert r.details == {"ids": ["ev-99"]}

    def test_frozen(self) -> None:
        r = ValidationResult.ok()
        with pytest.raises(AttributeError):
            r.passed = False  # type: ignore[misc]


# === validate_citations (integration) =====================================


class TestValidateCitations:
    def test_valid_rationale_passes(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has 3 prior violations in this subreddit [ev-3]. "
            "Report velocity is elevated at 3 in 5 min [ev-2]. "
            "Content matches Rule 2 against personal attacks [ev-1]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is True

    def test_empty_rationale_fails(self) -> None:
        result = validate_citations("", _acc_3_success())
        assert result.passed is False
        assert result.reason == "empty_rationale"

    def test_whitespace_rationale_fails(self) -> None:
        result = validate_citations("   \n  ", _acc_3_success())
        assert result.passed is False
        assert result.reason == "empty_rationale"

    def test_no_citations_fails(self) -> None:
        rationale = "The author has a long history of violations and should be removed."
        result = validate_citations(rationale, _acc_3_success())
        assert result.passed is False
        assert result.reason == "no_citations"

    def test_hallucinated_id_fails(self) -> None:
        acc = _acc_3_success()  # has ev-1, ev-2, ev-3
        rationale = (
            "Author has violations [ev-1] and thread context [ev-7] is concerning. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "hallucinated_evidence_ids"
        assert "ev-7" in result.details["ids"]  # type: ignore[operator]

    def test_citing_failure_evidence_fails(self) -> None:
        acc = _acc(
            _result("policy_match", status="success", summary="matched"),
            _result("user_history", status="failure", summary="db error"),
        )
        rationale = (
            "Rule matched against personal attacks [ev-1]. "
            "User history shows concerning pattern [ev-2]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "cited_non_success_evidence"
        assert "ev-2" in result.details["ids"]  # type: ignore[operator]

    def test_citing_timeout_evidence_fails(self) -> None:
        acc = _acc(
            _result("policy_match", status="success"),
            _result("thread_context", status="timeout", summary="slow"),
        )
        rationale = (
            "Rule matched [ev-1]. Thread context timed out but shows [ev-2]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "cited_non_success_evidence"

    def test_citing_skipped_evidence_fails(self) -> None:
        acc = _acc(
            _result("policy_match", status="success"),
            _result("prior_actions", status="skipped"),
        )
        rationale = (
            "Rule matched [ev-1]. Prior actions show pattern [ev-2]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "cited_non_success_evidence"

    def test_uncited_claim_fails(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has 3 prior violations [ev-3]. "
            "The content is clearly toxic and harmful to the community."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "uncited_claims"

    def test_cited_field_mismatch_fails(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has violations [ev-3]. "
            "Content matches rules [ev-1]. "
            "Recommend: Remove."
        )
        # Declare ev-2 but it's not in the rationale
        result = validate_citations(
            rationale, acc, cited_evidence_ids=["ev-1", "ev-2", "ev-3"]
        )
        assert result.passed is False
        assert result.reason == "cited_field_mismatch"

    def test_cited_field_match_passes(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has violations [ev-3]. "
            "Content matches rules [ev-1]. "
            "Recommend: Remove."
        )
        result = validate_citations(
            rationale, acc, cited_evidence_ids=["ev-1", "ev-3"]
        )
        assert result.passed is True

    def test_cited_field_none_skips_check(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has violations [ev-3]. "
            "Content matches rules [ev-1]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc, cited_evidence_ids=None)
        assert result.passed is True

    def test_framing_sentences_dont_need_citations(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has 3 prior violations [ev-3]. "
            "Report velocity is elevated [ev-2]. "
            "In summary, the evidence clearly supports removal. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is True

    def test_short_fragments_dont_need_citations(self) -> None:
        acc = _acc_3_success()
        rationale = (
            "Author has 3 prior violations [ev-3]. "
            "High risk. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is True

    def test_multiple_hallucinated_ids_all_reported(self) -> None:
        acc = _acc(_result("policy_match"))  # only ev-1
        rationale = (
            "Rule matched [ev-1] and history [ev-5] with context [ev-9]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is False
        assert result.reason == "hallucinated_evidence_ids"
        ids = result.details["ids"]
        assert "ev-5" in ids  # type: ignore[operator]
        assert "ev-9" in ids  # type: ignore[operator]

    def test_single_entry_accumulator_valid(self) -> None:
        acc = _acc(_result("policy_match", summary="matched rule"))
        rationale = (
            "Content clearly violates the no-spam rule [ev-1]. "
            "Recommend: Remove."
        )
        result = validate_citations(rationale, acc)
        assert result.passed is True

    def test_failure_reason_priority_hallucinated_before_uncited(self) -> None:
        """Hallucinated IDs are checked before uncited claims."""
        acc = _acc(_result("policy_match"))  # ev-1 only
        rationale = (
            "Rule matched [ev-99]. "  # hallucinated
            "Author has a pattern of abuse."  # uncited
        )
        result = validate_citations(rationale, acc)
        assert result.reason == "hallucinated_evidence_ids"