Spaces:
Running
Running
| """W14-FIX-STUB end-to-end verification. | |
| Monkey-patches the LLM call to always return an empty string (the most | |
| common LLM-glitch failure mode) and walks an event through the full | |
| analysts -> translators -> synthesizer -> quality_eval -> polymarket | |
| pipeline. Verifies every stub gate fires: | |
| 1. ``translators.propose_candidates`` emits a ``logger.warning`` and | |
| sets ``candidate.is_stub = True``. | |
| 2. ``analysts._parse_response`` emits a ``logger.warning`` for | |
| missing ``JSON:`` marker. | |
| 3. ``synthesizer.synthesize`` propagates ``is_stub`` to the | |
| :class:`Question`. | |
| 4. ``quality_eval.score_question`` returns ``score=0.0`` and | |
| ``passed=False`` with ``"stub_detected"`` in the rationale. | |
| 5. ``polymarket.client._build_gamma_payload`` raises ``ValueError`` | |
| before any HTTP traffic. | |
| Run:: | |
| python scripts/test_stub_blocking.py | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import logging | |
| import sys | |
| from typing import List | |
| # Force-import everything we need from the package. | |
| from polyglot_alpha import analysts, quality_eval, synthesizer, translators | |
| from polyglot_alpha.polymarket.client import _build_gamma_payload | |
| from polyglot_alpha.polymarket.types import Question as PMQuestion | |
| from polyglot_alpha.schemas import NewsEvent | |
| from polyglot_alpha.stub_detector import is_stub | |
| # --------------------------------------------------------------------------- # | |
| # Helpers # | |
| # --------------------------------------------------------------------------- # | |
| class WarningCollector(logging.Handler): | |
| """Capture WARNING+ records so we can assert what was logged.""" | |
| def __init__(self) -> None: | |
| super().__init__(level=logging.WARNING) | |
| self.records: List[logging.LogRecord] = [] | |
| def emit(self, record: logging.LogRecord) -> None: | |
| self.records.append(record) | |
| def messages_for(self, logger_name: str) -> List[str]: | |
| return [r.getMessage() for r in self.records if r.name == logger_name] | |
| def _install_collector() -> WarningCollector: | |
| handler = WarningCollector() | |
| root = logging.getLogger("polyglot_alpha") | |
| root.setLevel(logging.WARNING) | |
| root.addHandler(handler) | |
| return handler | |
| async def _broken_llm(prompt: str) -> str: | |
| """The 'broken LLM' fixture: returns empty string regardless of prompt.""" | |
| return "" | |
| # --------------------------------------------------------------------------- # | |
| # Test body # | |
| # --------------------------------------------------------------------------- # | |
| async def _run() -> int: | |
| collector = _install_collector() | |
| event = NewsEvent( | |
| event_id="evt-stub-blocking-test", | |
| url="https://example.com/news/stub-test", | |
| title_zh="测试事件 — LLM glitch simulation", | |
| body_zh="此事件被故意触发以验证 stub-blocking 链。", | |
| cutoff_ts=1_800_000_000, | |
| ) | |
| # ----- Layer 1: analysts. Empty LLM -> empty entities/risks + WARNING. | |
| reports = await analysts.run_analysts(event, _broken_llm) | |
| assert len(reports) >= 1, "expected at least one analyst report" | |
| for r in reports: | |
| assert r.relevant_entities == [], r.relevant_entities | |
| assert r.risk_factors == [], r.risk_factors | |
| analyst_warns = collector.messages_for("polyglot_alpha.analysts") | |
| assert any("JSON parse failed" in m or "missing 'JSON:' marker" in m for m in analyst_warns), ( | |
| f"analysts: expected a JSON-parse warning, got: {analyst_warns!r}" | |
| ) | |
| print("[1/5] analysts: emitted parse-fail WARNING (count=%d)" % len(analyst_warns)) | |
| # ----- Layer 2: translators. Empty LLM -> is_stub=True + WARNING. | |
| candidates = await translators.propose_candidates(event, reports, _broken_llm, n=2) | |
| assert len(candidates) == 2 | |
| for c in candidates: | |
| assert getattr(c, "is_stub", False), ( | |
| f"translators: expected is_stub=True on candidate {c.translator_id}" | |
| ) | |
| # Stub strings must come through verbatim so quality_eval can catch them. | |
| assert is_stub(c.question_en) or is_stub(c.resolution_criteria), ( | |
| f"translators: expected stub text on {c.translator_id}, got {c.question_en!r}" | |
| ) | |
| translator_warns = collector.messages_for("polyglot_alpha.translators") | |
| assert any("falling back to stub" in m for m in translator_warns), ( | |
| f"translators: expected fallback WARNING, got: {translator_warns!r}" | |
| ) | |
| print("[2/5] translators: emitted stub-fallback WARNING + is_stub=True on both candidates") | |
| # ----- Layer 3: synthesizer. is_stub must propagate to the Question. | |
| question = synthesizer.synthesize(event, candidates) | |
| assert getattr(question, "is_stub", False), ( | |
| "synthesizer: expected is_stub=True propagated to Question" | |
| ) | |
| print("[3/5] synthesizer: propagated is_stub=True to Question") | |
| # ----- Layer 4: quality_eval. score=0.0, passed=False, reason contains stub_detected. | |
| qs = quality_eval.score_question(question) | |
| assert qs.score == 0.0, f"quality_eval: expected score=0.0, got {qs.score}" | |
| assert qs.passed is False, "quality_eval: expected passed=False" | |
| assert "stub_detected" in qs.rationale, ( | |
| f"quality_eval: expected 'stub_detected' in rationale, got {qs.rationale!r}" | |
| ) | |
| print(f"[4/5] quality_eval: score=0.0, passed=False, rationale={qs.rationale!r}") | |
| # ----- Layer 5: polymarket. _build_gamma_payload must raise BEFORE any HTTP. | |
| pm_question = PMQuestion( | |
| question_id="q-stub-blocking-test", | |
| text=question.question_en, | |
| end_date_iso=question.end_date_iso, | |
| ) | |
| try: | |
| _build_gamma_payload(pm_question, "builder-test", None) | |
| except ValueError as exc: | |
| assert "stub" in str(exc).lower(), f"polymarket: unexpected error: {exc}" | |
| print(f"[5/5] polymarket: refused to build Gamma payload — {exc}") | |
| else: | |
| print("[5/5] FAIL: polymarket did NOT raise on stub question!", file=sys.stderr) | |
| return 1 | |
| # --- Summary --------------------------------------------------------- # | |
| print() | |
| print("=" * 72) | |
| print("ALL 5 STUB GATES FIRED CORRECTLY") | |
| print(f" analysts WARNINGs: {len(analyst_warns)}") | |
| print(f" translators WARNINGs: {len(translator_warns)}") | |
| print(f" synthesizer is_stub: {getattr(question, 'is_stub', False)}") | |
| print(f" quality_eval score: {qs.score} (passed={qs.passed})") | |
| print(f" polymarket payload: blocked with ValueError") | |
| print("=" * 72) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(asyncio.run(_run())) | |