polyglot-alpha / scripts /test_stub_blocking.py
licaomeng
deploy: main@8970ffb → HF Spaces (2026-05-27T05:19Z)
88d2f2a
"""W14-FIX-STUB end-to-end verification.
Monkey-patches the LLM call to always return an empty string (the most
common LLM-glitch failure mode) and walks an event through the full
analysts -> translators -> synthesizer -> quality_eval -> polymarket
pipeline. Verifies every stub gate fires:
1. ``translators.propose_candidates`` emits a ``logger.warning`` and
sets ``candidate.is_stub = True``.
2. ``analysts._parse_response`` emits a ``logger.warning`` for
missing ``JSON:`` marker.
3. ``synthesizer.synthesize`` propagates ``is_stub`` to the
:class:`Question`.
4. ``quality_eval.score_question`` returns ``score=0.0`` and
``passed=False`` with ``"stub_detected"`` in the rationale.
5. ``polymarket.client._build_gamma_payload`` raises ``ValueError``
before any HTTP traffic.
Run::
python scripts/test_stub_blocking.py
"""
from __future__ import annotations
import asyncio
import logging
import sys
from typing import List
# Force-import everything we need from the package.
from polyglot_alpha import analysts, quality_eval, synthesizer, translators
from polyglot_alpha.polymarket.client import _build_gamma_payload
from polyglot_alpha.polymarket.types import Question as PMQuestion
from polyglot_alpha.schemas import NewsEvent
from polyglot_alpha.stub_detector import is_stub
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
class WarningCollector(logging.Handler):
"""Capture WARNING+ records so we can assert what was logged."""
def __init__(self) -> None:
super().__init__(level=logging.WARNING)
self.records: List[logging.LogRecord] = []
def emit(self, record: logging.LogRecord) -> None:
self.records.append(record)
def messages_for(self, logger_name: str) -> List[str]:
return [r.getMessage() for r in self.records if r.name == logger_name]
def _install_collector() -> WarningCollector:
handler = WarningCollector()
root = logging.getLogger("polyglot_alpha")
root.setLevel(logging.WARNING)
root.addHandler(handler)
return handler
async def _broken_llm(prompt: str) -> str:
"""The 'broken LLM' fixture: returns empty string regardless of prompt."""
return ""
# --------------------------------------------------------------------------- #
# Test body #
# --------------------------------------------------------------------------- #
async def _run() -> int:
collector = _install_collector()
event = NewsEvent(
event_id="evt-stub-blocking-test",
url="https://example.com/news/stub-test",
title_zh="测试事件 — LLM glitch simulation",
body_zh="此事件被故意触发以验证 stub-blocking 链。",
cutoff_ts=1_800_000_000,
)
# ----- Layer 1: analysts. Empty LLM -> empty entities/risks + WARNING.
reports = await analysts.run_analysts(event, _broken_llm)
assert len(reports) >= 1, "expected at least one analyst report"
for r in reports:
assert r.relevant_entities == [], r.relevant_entities
assert r.risk_factors == [], r.risk_factors
analyst_warns = collector.messages_for("polyglot_alpha.analysts")
assert any("JSON parse failed" in m or "missing 'JSON:' marker" in m for m in analyst_warns), (
f"analysts: expected a JSON-parse warning, got: {analyst_warns!r}"
)
print("[1/5] analysts: emitted parse-fail WARNING (count=%d)" % len(analyst_warns))
# ----- Layer 2: translators. Empty LLM -> is_stub=True + WARNING.
candidates = await translators.propose_candidates(event, reports, _broken_llm, n=2)
assert len(candidates) == 2
for c in candidates:
assert getattr(c, "is_stub", False), (
f"translators: expected is_stub=True on candidate {c.translator_id}"
)
# Stub strings must come through verbatim so quality_eval can catch them.
assert is_stub(c.question_en) or is_stub(c.resolution_criteria), (
f"translators: expected stub text on {c.translator_id}, got {c.question_en!r}"
)
translator_warns = collector.messages_for("polyglot_alpha.translators")
assert any("falling back to stub" in m for m in translator_warns), (
f"translators: expected fallback WARNING, got: {translator_warns!r}"
)
print("[2/5] translators: emitted stub-fallback WARNING + is_stub=True on both candidates")
# ----- Layer 3: synthesizer. is_stub must propagate to the Question.
question = synthesizer.synthesize(event, candidates)
assert getattr(question, "is_stub", False), (
"synthesizer: expected is_stub=True propagated to Question"
)
print("[3/5] synthesizer: propagated is_stub=True to Question")
# ----- Layer 4: quality_eval. score=0.0, passed=False, reason contains stub_detected.
qs = quality_eval.score_question(question)
assert qs.score == 0.0, f"quality_eval: expected score=0.0, got {qs.score}"
assert qs.passed is False, "quality_eval: expected passed=False"
assert "stub_detected" in qs.rationale, (
f"quality_eval: expected 'stub_detected' in rationale, got {qs.rationale!r}"
)
print(f"[4/5] quality_eval: score=0.0, passed=False, rationale={qs.rationale!r}")
# ----- Layer 5: polymarket. _build_gamma_payload must raise BEFORE any HTTP.
pm_question = PMQuestion(
question_id="q-stub-blocking-test",
text=question.question_en,
end_date_iso=question.end_date_iso,
)
try:
_build_gamma_payload(pm_question, "builder-test", None)
except ValueError as exc:
assert "stub" in str(exc).lower(), f"polymarket: unexpected error: {exc}"
print(f"[5/5] polymarket: refused to build Gamma payload — {exc}")
else:
print("[5/5] FAIL: polymarket did NOT raise on stub question!", file=sys.stderr)
return 1
# --- Summary --------------------------------------------------------- #
print()
print("=" * 72)
print("ALL 5 STUB GATES FIRED CORRECTLY")
print(f" analysts WARNINGs: {len(analyst_warns)}")
print(f" translators WARNINGs: {len(translator_warns)}")
print(f" synthesizer is_stub: {getattr(question, 'is_stub', False)}")
print(f" quality_eval score: {qs.score} (passed={qs.passed})")
print(f" polymarket payload: blocked with ValueError")
print("=" * 72)
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(_run()))