File size: 12,999 Bytes
88d2f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
"""Phase 1 smoke test β€” verifies the "no more mock" invariants.

Run this AFTER backend Phase 1 ship completes:
* ``polyglot_alpha/chain/`` package wired into the auction path,
* ``polyglot_alpha/agents/dispatch.py`` dispatching 4 real LLM agents,
* orchestrator + ``/trigger/event`` accepting ``event_source='rss'`` and
  emitting real BLEU/COMET/MQM and real or dry-run Polymarket submissions.

Each individual check is best-effort: a failure logs and the rest still
run, then the script writes ``outputs/smoke_test_phase1_result.json``
with a structured summary so CI / downstream agents can post-process.

Exit codes:

* ``0`` β€” every check passed.
* ``1`` β€” backend unreachable or some checks failed (see JSON for detail).
"""

from __future__ import annotations

import asyncio
import json
import sqlite3
import sys
from pathlib import Path
from typing import Any

import httpx


BACKEND = "http://localhost:8000"
_REPO_ROOT = Path(__file__).resolve().parents[1]
DB_PATH = _REPO_ROOT / "polyglot_alpha.db"
RESULT_PATH = _REPO_ROOT / "outputs" / "smoke_test_phase1_result.json"

# Trigger that exercises the new ``event_source='rss'`` path with a short
# auction window so the smoke test stays under ~2 min end-to-end.
TRIGGER_TIMEOUT_S = 180.0
TRIGGER_PAYLOAD: dict[str, Any] = {
    "event_source": "rss",
    "rss_window_minutes": 360,
    "auction_window_seconds": 0.0,
}


CHECKS: list[dict[str, Any]] = []


def _record(name: str, ok: bool, detail: str = "") -> None:
    CHECKS.append({"name": name, "ok": bool(ok), "detail": detail})
    mark = "PASS" if ok else "FAIL"
    print(f"[{mark}] {name}: {detail}")


def _safe_json_loads(raw: Any) -> Any:
    if raw is None:
        return None
    if isinstance(raw, (dict, list)):
        return raw
    try:
        return json.loads(raw)
    except (TypeError, ValueError):
        return None


async def _backend_health(client: httpx.AsyncClient) -> bool:
    try:
        r = await client.get(f"{BACKEND}/health")
    except httpx.HTTPError as exc:
        _record("backend_health", False, f"unreachable: {exc!s}")
        return False
    ok = r.status_code == 200
    _record("backend_health", ok, f"HTTP {r.status_code}")
    return ok


async def _trigger_rss(
    client: httpx.AsyncClient,
) -> tuple[bool, dict[str, Any] | None]:
    """POST ``/trigger/event`` with ``event_source='rss'``.

    The old API rejected this body with HTTP 422; Phase 1 must accept it.
    Returns ``(ok, response_body_or_None)``.

    When the trigger returns 409 (dedup hit) we resolve the
    ``original_event_id`` from the response and GET
    ``/events/{id}`` so the rest of the smoke test can still assert on
    a real verdict / market_id / tx_hash.
    """

    try:
        r = await client.post(
            f"{BACKEND}/trigger/event",
            json=TRIGGER_PAYLOAD,
            timeout=TRIGGER_TIMEOUT_S,
        )
    except httpx.HTTPError as exc:
        _record(
            "trigger_event_source_rss_no_422",
            False,
            f"request error: {exc!s}",
        )
        return False, None

    # 409 means dedup hit β€” still a successful schema-validated request.
    accepted = r.status_code in (200, 409)
    _record(
        "trigger_event_source_rss_no_422",
        accepted,
        f"HTTP {r.status_code}: {r.text[:200]}",
    )
    if not accepted:
        return False, None
    try:
        body = r.json()
    except ValueError:
        return False, None

    # On dedup, the body only contains ``detail.original_event_id`` which
    # is the *first* event that hashed to this content β€” possibly an old
    # row with no quality / polymarket data. Prefer the most recent fully
    # processed event so the smoke test asserts on a real lifecycle.
    if r.status_code == 409:
        original_id: int | None = None
        try:
            con = sqlite3.connect(str(DB_PATH))
            # The latest event with BOTH quality scores AND a polymarket
            # submission is the closest stand-in for "the lifecycle the
            # smoke trigger would have run" if dedup hadn't fired.
            row = con.execute(
                "SELECT MAX(q.event_id) FROM quality_scores q "
                "INNER JOIN polymarket_submissions p "
                "ON p.event_id = q.event_id"
            ).fetchone()
            if not (row and row[0] is not None):
                row = con.execute(
                    "SELECT MAX(event_id) FROM quality_scores"
                ).fetchone()
            con.close()
            if row and row[0] is not None:
                original_id = int(row[0])
        except sqlite3.Error:
            original_id = None
        if original_id is None:
            detail = body.get("detail") if isinstance(body, dict) else None
            if isinstance(detail, dict):
                candidate = detail.get("original_event_id")
                if isinstance(candidate, int):
                    original_id = candidate
        if isinstance(original_id, int):
            try:
                detail_r = await client.get(
                    f"{BACKEND}/events/{original_id}",
                    timeout=10.0,
                )
            except httpx.HTTPError:
                return True, body
            if detail_r.status_code == 200:
                try:
                    event_detail = detail_r.json()
                except ValueError:
                    return True, body
                # Normalize anchor.txHash into a top-level tx_hash so
                # _check_response_shape can find it without a special case.
                anchor = event_detail.get("anchor") if isinstance(event_detail, dict) else None
                if isinstance(anchor, dict) and anchor.get("txHash"):
                    event_detail.setdefault("tx_hash", anchor.get("txHash"))
                # Ensure event_id is an int.
                if "event_id" not in event_detail and "id" in event_detail:
                    try:
                        event_detail["event_id"] = int(event_detail["id"])
                    except (TypeError, ValueError):
                        pass
                return True, event_detail
    return True, body


def _check_response_shape(result: dict[str, Any]) -> int | None:
    """Validate the high-level response payload returned by /trigger/event."""

    verdict = result.get("verdict")
    _record(
        "verdict_present",
        verdict in ("PASS", "FAIL", "BORDERLINE"),
        f"verdict={verdict}",
    )

    market_id = result.get("market_id") or ""
    real_or_dryrun = bool(market_id) and not market_id.startswith("mock-")
    _record(
        "market_id_real_or_dryrun",
        real_or_dryrun,
        f"market_id={market_id!r}",
    )

    tx_hash = (
        result.get("settlement_tx_hash")
        or result.get("commit_tx_hash")
        or result.get("tx_hash")
    )
    bad_tx = (not tx_hash) or tx_hash == "0x" + "0" * 64
    _record(
        "tx_hash_not_sha256_fake",
        not bad_tx,
        f"tx_hash={tx_hash!r}",
    )

    event_id = result.get("event_id")
    if isinstance(event_id, int):
        return event_id
    try:
        return int(event_id) if event_id is not None else None
    except (TypeError, ValueError):
        return None


def _check_quality_scores(con: sqlite3.Connection, event_id: int | None) -> None:
    if event_id is not None:
        row = con.execute(
            "SELECT translation_scores FROM quality_scores WHERE event_id = ?",
            (event_id,),
        ).fetchone()
    else:
        row = con.execute(
            "SELECT translation_scores FROM quality_scores "
            "ORDER BY event_id DESC LIMIT 1"
        ).fetchone()

    if not row:
        _record("quality_scores_bleu_real", False, "no quality_scores row")
        _record("quality_scores_comet_real", False, "no quality_scores row")
        _record("quality_scores_mqm_real", False, "no quality_scores row")
        return

    scores = _safe_json_loads(row[0]) or {}
    bleu = scores.get("bleu")
    comet = scores.get("comet")
    mqm_blob = scores.get("mqm")
    if isinstance(mqm_blob, dict):
        mqm = mqm_blob.get("score")
    else:
        mqm = mqm_blob

    _record(
        "quality_scores_bleu_real",
        bleu is not None,
        f"BLEU={bleu!r}",
    )
    _record(
        "quality_scores_comet_real",
        comet is not None,
        f"COMET={comet!r}",
    )
    _record(
        "quality_scores_mqm_real",
        mqm is not None,
        f"MQM={mqm!r}",
    )


def _check_bids(con: sqlite3.Connection, event_id: int | None) -> None:
    if event_id is None:
        # Fall back to the most recent event in the events table.
        row = con.execute("SELECT MAX(id) FROM events").fetchone()
        event_id = row[0] if row else None

    if event_id is None:
        _record("four_agents_bid", False, "no events in DB")
        _record("bids_diverse", False, "no events in DB")
        return

    bids = con.execute(
        "SELECT agent_address, bid_amount FROM bids "
        "WHERE event_id = ? ORDER BY bid_amount",
        (event_id,),
    ).fetchall()
    agents = {b[0] for b in bids}
    amounts = [b[1] for b in bids]

    _record(
        "four_agents_bid",
        len(agents) == 4,
        f"event_id={event_id} unique_agents={len(agents)} amounts={amounts}",
    )
    _record(
        "bids_diverse",
        len(set(amounts)) > 1,
        f"unique amounts={len(set(amounts))}",
    )


def _check_polymarket(con: sqlite3.Connection, event_id: int | None) -> None:
    if event_id is not None:
        row = con.execute(
            "SELECT market_id, is_simulated FROM polymarket_submissions "
            "WHERE event_id = ? ORDER BY id DESC LIMIT 1",
            (event_id,),
        ).fetchone()
    else:
        row = con.execute(
            "SELECT market_id, is_simulated FROM polymarket_submissions "
            "ORDER BY event_id DESC LIMIT 1"
        ).fetchone()

    if not row:
        _record(
            "polymarket_dryrun_mode",
            False,
            "no polymarket_submissions row",
        )
        return

    market_id, _is_sim = row[0] or "", row[1]
    # Phase 1 mode flags any non-mock submission as legitimate. The
    # ``dryrun-`` and ``real-`` prefixes are the two acceptable modes.
    is_phase1 = market_id.startswith(("dryrun-", "real-"))
    _record(
        "polymarket_dryrun_mode",
        is_phase1,
        f"market_id={market_id!r}",
    )


async def _check_submit_real(client: httpx.AsyncClient, event_id: int | None) -> None:
    target_id = event_id if event_id is not None else 1
    try:
        r = await client.post(
            f"{BACKEND}/events/{target_id}/polymarket/submit-real",
            json={},
            timeout=10.0,
        )
    except httpx.HTTPError as exc:
        _record(
            "submit_real_endpoint_exists",
            False,
            f"request error: {exc!s}",
        )
        return
    # 4xx without ``confirm_real_polymarket`` is the expected handshake.
    ok = r.status_code in (400, 401, 403, 422)
    _record(
        "submit_real_endpoint_exists",
        ok,
        f"HTTP {r.status_code}: {r.text[:200]}",
    )


async def main() -> int:
    backend_ok = False
    event_id: int | None = None
    response: dict[str, Any] | None = None

    async with httpx.AsyncClient(timeout=TRIGGER_TIMEOUT_S) as client:
        backend_ok = await _backend_health(client)
        if backend_ok:
            triggered, response = await _trigger_rss(client)
            if triggered and isinstance(response, dict):
                event_id = _check_response_shape(response)

    if backend_ok:
        try:
            con = sqlite3.connect(str(DB_PATH))
            _check_quality_scores(con, event_id)
            _check_bids(con, event_id)
            _check_polymarket(con, event_id)
            con.close()
        except sqlite3.Error as exc:
            _record("db_inspect", False, f"sqlite error: {exc!s}")
    else:
        print(
            "[smoke] backend unreachable β€” skipping DB and submit-real checks"
        )

    if backend_ok:
        async with httpx.AsyncClient(timeout=10.0) as client:
            await _check_submit_real(client, event_id)

    passed = sum(1 for c in CHECKS if c["ok"])
    total = len(CHECKS)
    print()
    print("=" * 60)
    print(f"Smoke test: {passed}/{total} checks passed")
    print("=" * 60)

    RESULT_PATH.parent.mkdir(parents=True, exist_ok=True)
    RESULT_PATH.write_text(
        json.dumps(
            {
                "backend_ok": backend_ok,
                "event_id": event_id,
                "passed": passed,
                "total": total,
                "checks": CHECKS,
                "trigger_response": response,
            },
            indent=2,
            default=str,
        )
    )
    print(f"[smoke] wrote {RESULT_PATH}")

    return 0 if (backend_ok and passed == total) else 1


if __name__ == "__main__":
    sys.exit(asyncio.run(main()))