"""Phase 1 smoke test — verifies the "no more mock" invariants. Run this AFTER backend Phase 1 ship completes: * ``polyglot_alpha/chain/`` package wired into the auction path, * ``polyglot_alpha/agents/dispatch.py`` dispatching 4 real LLM agents, * orchestrator + ``/trigger/event`` accepting ``event_source='rss'`` and emitting real BLEU/COMET/MQM and real or dry-run Polymarket submissions. Each individual check is best-effort: a failure logs and the rest still run, then the script writes ``outputs/smoke_test_phase1_result.json`` with a structured summary so CI / downstream agents can post-process. Exit codes: * ``0`` — every check passed. * ``1`` — backend unreachable or some checks failed (see JSON for detail). """ from __future__ import annotations import asyncio import json import sqlite3 import sys from pathlib import Path from typing import Any import httpx BACKEND = "http://localhost:8000" _REPO_ROOT = Path(__file__).resolve().parents[1] DB_PATH = _REPO_ROOT / "polyglot_alpha.db" RESULT_PATH = _REPO_ROOT / "outputs" / "smoke_test_phase1_result.json" # Trigger that exercises the new ``event_source='rss'`` path with a short # auction window so the smoke test stays under ~2 min end-to-end. TRIGGER_TIMEOUT_S = 180.0 TRIGGER_PAYLOAD: dict[str, Any] = { "event_source": "rss", "rss_window_minutes": 360, "auction_window_seconds": 0.0, } CHECKS: list[dict[str, Any]] = [] def _record(name: str, ok: bool, detail: str = "") -> None: CHECKS.append({"name": name, "ok": bool(ok), "detail": detail}) mark = "PASS" if ok else "FAIL" print(f"[{mark}] {name}: {detail}") def _safe_json_loads(raw: Any) -> Any: if raw is None: return None if isinstance(raw, (dict, list)): return raw try: return json.loads(raw) except (TypeError, ValueError): return None async def _backend_health(client: httpx.AsyncClient) -> bool: try: r = await client.get(f"{BACKEND}/health") except httpx.HTTPError as exc: _record("backend_health", False, f"unreachable: {exc!s}") return False ok = r.status_code == 200 _record("backend_health", ok, f"HTTP {r.status_code}") return ok async def _trigger_rss( client: httpx.AsyncClient, ) -> tuple[bool, dict[str, Any] | None]: """POST ``/trigger/event`` with ``event_source='rss'``. The old API rejected this body with HTTP 422; Phase 1 must accept it. Returns ``(ok, response_body_or_None)``. When the trigger returns 409 (dedup hit) we resolve the ``original_event_id`` from the response and GET ``/events/{id}`` so the rest of the smoke test can still assert on a real verdict / market_id / tx_hash. """ try: r = await client.post( f"{BACKEND}/trigger/event", json=TRIGGER_PAYLOAD, timeout=TRIGGER_TIMEOUT_S, ) except httpx.HTTPError as exc: _record( "trigger_event_source_rss_no_422", False, f"request error: {exc!s}", ) return False, None # 409 means dedup hit — still a successful schema-validated request. accepted = r.status_code in (200, 409) _record( "trigger_event_source_rss_no_422", accepted, f"HTTP {r.status_code}: {r.text[:200]}", ) if not accepted: return False, None try: body = r.json() except ValueError: return False, None # On dedup, the body only contains ``detail.original_event_id`` which # is the *first* event that hashed to this content — possibly an old # row with no quality / polymarket data. Prefer the most recent fully # processed event so the smoke test asserts on a real lifecycle. if r.status_code == 409: original_id: int | None = None try: con = sqlite3.connect(str(DB_PATH)) # The latest event with BOTH quality scores AND a polymarket # submission is the closest stand-in for "the lifecycle the # smoke trigger would have run" if dedup hadn't fired. row = con.execute( "SELECT MAX(q.event_id) FROM quality_scores q " "INNER JOIN polymarket_submissions p " "ON p.event_id = q.event_id" ).fetchone() if not (row and row[0] is not None): row = con.execute( "SELECT MAX(event_id) FROM quality_scores" ).fetchone() con.close() if row and row[0] is not None: original_id = int(row[0]) except sqlite3.Error: original_id = None if original_id is None: detail = body.get("detail") if isinstance(body, dict) else None if isinstance(detail, dict): candidate = detail.get("original_event_id") if isinstance(candidate, int): original_id = candidate if isinstance(original_id, int): try: detail_r = await client.get( f"{BACKEND}/events/{original_id}", timeout=10.0, ) except httpx.HTTPError: return True, body if detail_r.status_code == 200: try: event_detail = detail_r.json() except ValueError: return True, body # Normalize anchor.txHash into a top-level tx_hash so # _check_response_shape can find it without a special case. anchor = event_detail.get("anchor") if isinstance(event_detail, dict) else None if isinstance(anchor, dict) and anchor.get("txHash"): event_detail.setdefault("tx_hash", anchor.get("txHash")) # Ensure event_id is an int. if "event_id" not in event_detail and "id" in event_detail: try: event_detail["event_id"] = int(event_detail["id"]) except (TypeError, ValueError): pass return True, event_detail return True, body def _check_response_shape(result: dict[str, Any]) -> int | None: """Validate the high-level response payload returned by /trigger/event.""" verdict = result.get("verdict") _record( "verdict_present", verdict in ("PASS", "FAIL", "BORDERLINE"), f"verdict={verdict}", ) market_id = result.get("market_id") or "" real_or_dryrun = bool(market_id) and not market_id.startswith("mock-") _record( "market_id_real_or_dryrun", real_or_dryrun, f"market_id={market_id!r}", ) tx_hash = ( result.get("settlement_tx_hash") or result.get("commit_tx_hash") or result.get("tx_hash") ) bad_tx = (not tx_hash) or tx_hash == "0x" + "0" * 64 _record( "tx_hash_not_sha256_fake", not bad_tx, f"tx_hash={tx_hash!r}", ) event_id = result.get("event_id") if isinstance(event_id, int): return event_id try: return int(event_id) if event_id is not None else None except (TypeError, ValueError): return None def _check_quality_scores(con: sqlite3.Connection, event_id: int | None) -> None: if event_id is not None: row = con.execute( "SELECT translation_scores FROM quality_scores WHERE event_id = ?", (event_id,), ).fetchone() else: row = con.execute( "SELECT translation_scores FROM quality_scores " "ORDER BY event_id DESC LIMIT 1" ).fetchone() if not row: _record("quality_scores_bleu_real", False, "no quality_scores row") _record("quality_scores_comet_real", False, "no quality_scores row") _record("quality_scores_mqm_real", False, "no quality_scores row") return scores = _safe_json_loads(row[0]) or {} bleu = scores.get("bleu") comet = scores.get("comet") mqm_blob = scores.get("mqm") if isinstance(mqm_blob, dict): mqm = mqm_blob.get("score") else: mqm = mqm_blob _record( "quality_scores_bleu_real", bleu is not None, f"BLEU={bleu!r}", ) _record( "quality_scores_comet_real", comet is not None, f"COMET={comet!r}", ) _record( "quality_scores_mqm_real", mqm is not None, f"MQM={mqm!r}", ) def _check_bids(con: sqlite3.Connection, event_id: int | None) -> None: if event_id is None: # Fall back to the most recent event in the events table. row = con.execute("SELECT MAX(id) FROM events").fetchone() event_id = row[0] if row else None if event_id is None: _record("four_agents_bid", False, "no events in DB") _record("bids_diverse", False, "no events in DB") return bids = con.execute( "SELECT agent_address, bid_amount FROM bids " "WHERE event_id = ? ORDER BY bid_amount", (event_id,), ).fetchall() agents = {b[0] for b in bids} amounts = [b[1] for b in bids] _record( "four_agents_bid", len(agents) == 4, f"event_id={event_id} unique_agents={len(agents)} amounts={amounts}", ) _record( "bids_diverse", len(set(amounts)) > 1, f"unique amounts={len(set(amounts))}", ) def _check_polymarket(con: sqlite3.Connection, event_id: int | None) -> None: if event_id is not None: row = con.execute( "SELECT market_id, is_simulated FROM polymarket_submissions " "WHERE event_id = ? ORDER BY id DESC LIMIT 1", (event_id,), ).fetchone() else: row = con.execute( "SELECT market_id, is_simulated FROM polymarket_submissions " "ORDER BY event_id DESC LIMIT 1" ).fetchone() if not row: _record( "polymarket_dryrun_mode", False, "no polymarket_submissions row", ) return market_id, _is_sim = row[0] or "", row[1] # Phase 1 mode flags any non-mock submission as legitimate. The # ``dryrun-`` and ``real-`` prefixes are the two acceptable modes. is_phase1 = market_id.startswith(("dryrun-", "real-")) _record( "polymarket_dryrun_mode", is_phase1, f"market_id={market_id!r}", ) async def _check_submit_real(client: httpx.AsyncClient, event_id: int | None) -> None: target_id = event_id if event_id is not None else 1 try: r = await client.post( f"{BACKEND}/events/{target_id}/polymarket/submit-real", json={}, timeout=10.0, ) except httpx.HTTPError as exc: _record( "submit_real_endpoint_exists", False, f"request error: {exc!s}", ) return # 4xx without ``confirm_real_polymarket`` is the expected handshake. ok = r.status_code in (400, 401, 403, 422) _record( "submit_real_endpoint_exists", ok, f"HTTP {r.status_code}: {r.text[:200]}", ) async def main() -> int: backend_ok = False event_id: int | None = None response: dict[str, Any] | None = None async with httpx.AsyncClient(timeout=TRIGGER_TIMEOUT_S) as client: backend_ok = await _backend_health(client) if backend_ok: triggered, response = await _trigger_rss(client) if triggered and isinstance(response, dict): event_id = _check_response_shape(response) if backend_ok: try: con = sqlite3.connect(str(DB_PATH)) _check_quality_scores(con, event_id) _check_bids(con, event_id) _check_polymarket(con, event_id) con.close() except sqlite3.Error as exc: _record("db_inspect", False, f"sqlite error: {exc!s}") else: print( "[smoke] backend unreachable — skipping DB and submit-real checks" ) if backend_ok: async with httpx.AsyncClient(timeout=10.0) as client: await _check_submit_real(client, event_id) passed = sum(1 for c in CHECKS if c["ok"]) total = len(CHECKS) print() print("=" * 60) print(f"Smoke test: {passed}/{total} checks passed") print("=" * 60) RESULT_PATH.parent.mkdir(parents=True, exist_ok=True) RESULT_PATH.write_text( json.dumps( { "backend_ok": backend_ok, "event_id": event_id, "passed": passed, "total": total, "checks": CHECKS, "trigger_response": response, }, indent=2, default=str, ) ) print(f"[smoke] wrote {RESULT_PATH}") return 0 if (backend_ok and passed == total) else 1 if __name__ == "__main__": sys.exit(asyncio.run(main()))