File size: 6,667 Bytes
88d2f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""W14-FIX-STUB end-to-end verification.

Monkey-patches the LLM call to always return an empty string (the most
common LLM-glitch failure mode) and walks an event through the full
analysts -> translators -> synthesizer -> quality_eval -> polymarket
pipeline. Verifies every stub gate fires:

    1. ``translators.propose_candidates`` emits a ``logger.warning`` and
       sets ``candidate.is_stub = True``.
    2. ``analysts._parse_response`` emits a ``logger.warning`` for
       missing ``JSON:`` marker.
    3. ``synthesizer.synthesize`` propagates ``is_stub`` to the
       :class:`Question`.
    4. ``quality_eval.score_question`` returns ``score=0.0`` and
       ``passed=False`` with ``"stub_detected"`` in the rationale.
    5. ``polymarket.client._build_gamma_payload`` raises ``ValueError``
       before any HTTP traffic.

Run::

    python scripts/test_stub_blocking.py
"""

from __future__ import annotations

import asyncio
import logging
import sys
from typing import List

# Force-import everything we need from the package.
from polyglot_alpha import analysts, quality_eval, synthesizer, translators
from polyglot_alpha.polymarket.client import _build_gamma_payload
from polyglot_alpha.polymarket.types import Question as PMQuestion
from polyglot_alpha.schemas import NewsEvent
from polyglot_alpha.stub_detector import is_stub


# --------------------------------------------------------------------------- #
# Helpers                                                                     #
# --------------------------------------------------------------------------- #


class WarningCollector(logging.Handler):
    """Capture WARNING+ records so we can assert what was logged."""

    def __init__(self) -> None:
        super().__init__(level=logging.WARNING)
        self.records: List[logging.LogRecord] = []

    def emit(self, record: logging.LogRecord) -> None:
        self.records.append(record)

    def messages_for(self, logger_name: str) -> List[str]:
        return [r.getMessage() for r in self.records if r.name == logger_name]


def _install_collector() -> WarningCollector:
    handler = WarningCollector()
    root = logging.getLogger("polyglot_alpha")
    root.setLevel(logging.WARNING)
    root.addHandler(handler)
    return handler


async def _broken_llm(prompt: str) -> str:
    """The 'broken LLM' fixture: returns empty string regardless of prompt."""
    return ""


# --------------------------------------------------------------------------- #
# Test body                                                                   #
# --------------------------------------------------------------------------- #


async def _run() -> int:
    collector = _install_collector()

    event = NewsEvent(
        event_id="evt-stub-blocking-test",
        url="https://example.com/news/stub-test",
        title_zh="测试事件 — LLM glitch simulation",
        body_zh="此事件被故意触发以验证 stub-blocking 链。",
        cutoff_ts=1_800_000_000,
    )

    # ----- Layer 1: analysts. Empty LLM -> empty entities/risks + WARNING.
    reports = await analysts.run_analysts(event, _broken_llm)
    assert len(reports) >= 1, "expected at least one analyst report"
    for r in reports:
        assert r.relevant_entities == [], r.relevant_entities
        assert r.risk_factors == [], r.risk_factors
    analyst_warns = collector.messages_for("polyglot_alpha.analysts")
    assert any("JSON parse failed" in m or "missing 'JSON:' marker" in m for m in analyst_warns), (
        f"analysts: expected a JSON-parse warning, got: {analyst_warns!r}"
    )
    print("[1/5] analysts: emitted parse-fail WARNING (count=%d)" % len(analyst_warns))

    # ----- Layer 2: translators. Empty LLM -> is_stub=True + WARNING.
    candidates = await translators.propose_candidates(event, reports, _broken_llm, n=2)
    assert len(candidates) == 2
    for c in candidates:
        assert getattr(c, "is_stub", False), (
            f"translators: expected is_stub=True on candidate {c.translator_id}"
        )
        # Stub strings must come through verbatim so quality_eval can catch them.
        assert is_stub(c.question_en) or is_stub(c.resolution_criteria), (
            f"translators: expected stub text on {c.translator_id}, got {c.question_en!r}"
        )
    translator_warns = collector.messages_for("polyglot_alpha.translators")
    assert any("falling back to stub" in m for m in translator_warns), (
        f"translators: expected fallback WARNING, got: {translator_warns!r}"
    )
    print("[2/5] translators: emitted stub-fallback WARNING + is_stub=True on both candidates")

    # ----- Layer 3: synthesizer. is_stub must propagate to the Question.
    question = synthesizer.synthesize(event, candidates)
    assert getattr(question, "is_stub", False), (
        "synthesizer: expected is_stub=True propagated to Question"
    )
    print("[3/5] synthesizer: propagated is_stub=True to Question")

    # ----- Layer 4: quality_eval. score=0.0, passed=False, reason contains stub_detected.
    qs = quality_eval.score_question(question)
    assert qs.score == 0.0, f"quality_eval: expected score=0.0, got {qs.score}"
    assert qs.passed is False, "quality_eval: expected passed=False"
    assert "stub_detected" in qs.rationale, (
        f"quality_eval: expected 'stub_detected' in rationale, got {qs.rationale!r}"
    )
    print(f"[4/5] quality_eval: score=0.0, passed=False, rationale={qs.rationale!r}")

    # ----- Layer 5: polymarket. _build_gamma_payload must raise BEFORE any HTTP.
    pm_question = PMQuestion(
        question_id="q-stub-blocking-test",
        text=question.question_en,
        end_date_iso=question.end_date_iso,
    )
    try:
        _build_gamma_payload(pm_question, "builder-test", None)
    except ValueError as exc:
        assert "stub" in str(exc).lower(), f"polymarket: unexpected error: {exc}"
        print(f"[5/5] polymarket: refused to build Gamma payload — {exc}")
    else:
        print("[5/5] FAIL: polymarket did NOT raise on stub question!", file=sys.stderr)
        return 1

    # --- Summary --------------------------------------------------------- #
    print()
    print("=" * 72)
    print("ALL 5 STUB GATES FIRED CORRECTLY")
    print(f"  analysts WARNINGs:    {len(analyst_warns)}")
    print(f"  translators WARNINGs: {len(translator_warns)}")
    print(f"  synthesizer is_stub:  {getattr(question, 'is_stub', False)}")
    print(f"  quality_eval score:   {qs.score} (passed={qs.passed})")
    print(f"  polymarket payload:   blocked with ValueError")
    print("=" * 72)
    return 0


if __name__ == "__main__":
    sys.exit(asyncio.run(_run()))