File size: 12,955 Bytes
88d2f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
"""E2E tests for failure paths in the lifecycle.

Covers:
* Hard-gate (D5) judge failure -> REJECTED
* Low MQM score -> REJECTED
* No bids -> FAILED with reason='no_bids' (no synthetic fallback)
* All bidders below reputation gate -> orchestrator's documented fallback
* On-chain commit hang -> 90s timeout -> pending sentinel

All tests use MockLLM (no live Anthropic) and the orchestrator's
``mock_bids`` knob.
"""

from __future__ import annotations

import asyncio
from typing import Any

import pytest
from sqlmodel import Session, select


@pytest.fixture(autouse=True)
def _no_anthropic_key(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
    monkeypatch.setenv("POLYGLOT_LLM_BACKEND", "mock")


@pytest.fixture()
def _deterministic_pipeline(monkeypatch: pytest.MonkeyPatch) -> None:
    """Bypass the real translator pipeline so failure tests stay fast."""

    from polyglot_alpha import orchestrator as orch_mod

    async def stub_pipeline(
        _event_dict: dict[str, Any],
        _winner: Any,
        **_kwargs: Any,
    ) -> orch_mod.PipelineResult:
        return orch_mod.PipelineResult(
            final_question={
                "title": "Will the fail-path test trigger by December 31, 2026?",
                "description": "Test placeholder",
                "resolution_criteria": "Resolves YES if test passes.",
                "resolution_source": "operator",
                "cutoff_ts": "2026-12-31T23:59:59+00:00",
                "category": "test",
                "outcomes": ["Yes", "No"],
            },
            pipeline_trace_ipfs="ipfs://fail/test",
            candidate_hash="a" * 64,
        )

    monkeypatch.setattr(orch_mod, "_run_translator_pipeline", stub_pipeline)


@pytest.mark.asyncio
async def test_d5_hard_gate_failure_marks_rejected(
    isolated_db: str,
    _deterministic_pipeline: None,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When the D5 hard gate fails the panel verdict is FAIL β†’ status=REJECTED.

    The orchestrator only observes the aggregated ``JudgePanelResult``;
    the per-judge gate logic lives inside ``judges.panel``. We simulate a
    D5 failure by returning verdict=FAIL with a missing D5 pass flag β€”
    this is the same payload the real panel would produce when D5 vetoes
    the candidate.
    """

    from polyglot_alpha import orchestrator
    from polyglot_alpha.orchestrator import BidRecord, run_lifecycle
    from polyglot_alpha.persistence.db import engine
    from polyglot_alpha.persistence.models import (
        EventStatus,
        PolymarketSubmission,
        Question,
        QualityScore,
    )

    async def d5_fails(_q: dict[str, Any]) -> orchestrator.JudgePanelResult:
        return orchestrator.JudgePanelResult(
            translation_scores={"bleu": 0.9, "comet": 0.92, "mqm": {"score": 95}},
            # D5 is False β€” the resolution-clarity hard gate has vetoed.
            style_alignment_passes={
                "d1": True, "d2": True, "d3": True, "d4": True,
                "d5": False,  # <-- hard gate failure
                "d6": True, "d7": True, "d8": True,
            },
            overall_score=0.85,
            verdict="FAIL",
        )

    monkeypatch.setattr(orchestrator, "_evaluate_with_judges", d5_fails)

    result = await run_lifecycle(
        {
            "title": "D5 hard gate failure event",
            "sources": [{"url": "https://example.com/d5"}],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[BidRecord(agent_address="0xagent", bid_amount=1.0)],
    )

    assert result["status"] == EventStatus.REJECTED.value
    assert result["verdict"] == "FAIL"

    with Session(engine) as s:
        # The QualityScore row should record the FAIL verdict.
        score = s.exec(select(QualityScore).where(QualityScore.event_id == result["event_id"])).one()
        assert score.verdict == "FAIL"
        # Downstream rows must NOT exist β€” commit / Polymarket skipped.
        assert s.exec(select(Question).where(Question.event_id == result["event_id"])).first() is None
        assert s.exec(select(PolymarketSubmission).where(PolymarketSubmission.event_id == result["event_id"])).first() is None


@pytest.mark.asyncio
async def test_low_mqm_marks_rejected(
    isolated_db: str,
    _deterministic_pipeline: None,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """An MQM score below 80 results in FAIL β†’ REJECTED."""

    from polyglot_alpha import orchestrator
    from polyglot_alpha.orchestrator import BidRecord, run_lifecycle
    from polyglot_alpha.persistence.db import engine
    from polyglot_alpha.persistence.models import EventStatus, QualityScore

    async def low_mqm(_q: dict[str, Any]) -> orchestrator.JudgePanelResult:
        # MQM score 65 β€” below the 80 threshold the panel uses.
        return orchestrator.JudgePanelResult(
            translation_scores={"bleu": 0.6, "comet": 0.55, "mqm": {"score": 65}},
            style_alignment_passes={f"d{i}": True for i in range(1, 9)},
            overall_score=0.55,
            verdict="FAIL",
        )

    monkeypatch.setattr(orchestrator, "_evaluate_with_judges", low_mqm)

    result = await run_lifecycle(
        {
            "title": "Low MQM event",
            "sources": [{"url": "https://example.com/mqm"}],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[BidRecord(agent_address="0xmqm", bid_amount=1.0)],
    )

    assert result["status"] == EventStatus.REJECTED.value
    assert result["overall_score"] < 0.7  # below QUALITY_PASS_THRESHOLD
    with Session(engine) as s:
        score = s.exec(select(QualityScore).where(QualityScore.event_id == result["event_id"])).one()
        assert score.verdict == "FAIL"


@pytest.mark.asyncio
async def test_no_bids_marks_failed_with_reason(
    isolated_db: str,
) -> None:
    """Empty ``mock_bids=[]`` => status=FAILED, reason=no_bids, no fallback."""

    from polyglot_alpha.orchestrator import run_lifecycle
    from polyglot_alpha.persistence.db import engine
    from polyglot_alpha.persistence.models import (
        Bid,
        EventStatus,
        QualityScore,
        Translation,
    )

    result = await run_lifecycle(
        {
            "title": "No bids event",
            "sources": [{"url": "https://example.com/none"}],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[],
    )

    assert result["status"] == EventStatus.FAILED.value
    assert result.get("reason") == "no_bids"
    # No synthetic mock-fallback fires: no bid, translation or quality rows.
    with Session(engine) as s:
        assert s.exec(select(Bid)).first() is None
        assert s.exec(select(Translation)).first() is None
        assert s.exec(select(QualityScore)).first() is None


@pytest.mark.asyncio
async def test_no_bids_emits_auction_failed_and_event_finalized(
    isolated_db: str,
) -> None:
    """No-bid path publishes ``auction.failed`` + ``event.finalized`` SSE."""

    from polyglot_alpha.orchestrator import run_lifecycle
    from polyglot_alpha.pubsub import get_pubsub

    hub = get_pubsub()
    captured: list[dict[str, Any]] = []
    started = asyncio.Event()
    stop = asyncio.Event()

    async def consumer() -> None:
        async with hub.subscribe() as queue:
            started.set()
            while True:
                if stop.is_set():
                    while True:
                        try:
                            captured.append(queue.get_nowait())
                        except asyncio.QueueEmpty:
                            return
                try:
                    msg = await asyncio.wait_for(queue.get(), timeout=0.2)
                    captured.append(msg)
                except asyncio.TimeoutError:
                    continue

    task = asyncio.create_task(consumer())
    await started.wait()

    await run_lifecycle(
        {
            "title": "No bids SSE event",
            "sources": [],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[],
    )

    await asyncio.sleep(0.05)
    stop.set()
    await task

    types = [m["type"] for m in captured]
    assert "auction.failed" in types
    assert "event.finalized" in types
    finalized = [m for m in captured if m["type"] == "event.finalized"][0]
    assert finalized["data"]["terminal_status"] == "FAILED"
    assert finalized["data"]["reason"] == "no_bids"


@pytest.mark.asyncio
async def test_low_reputation_falls_back_to_raw_lowest(
    isolated_db: str,
    _deterministic_pipeline: None,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When all bidders are below the reputation gate, fall back to raw-lowest.

    Per ``_settle_auction`` documentation: a bid is "qualified" if
    ``reputation >= MIN_QUALIFIED_REPUTATION`` (0.7). If no bid is
    qualified, the orchestrator falls back to the lowest raw bid so the
    lifecycle still completes β€” this test pins that contract.
    """

    from polyglot_alpha import orchestrator
    from polyglot_alpha.orchestrator import BidRecord, run_lifecycle

    async def passing(_q: dict[str, Any]) -> orchestrator.JudgePanelResult:
        return orchestrator.JudgePanelResult(
            translation_scores={"bleu": 0.85},
            style_alignment_passes={f"d{i}": True for i in range(1, 9)},
            overall_score=0.85,
            verdict="PASS",
        )

    monkeypatch.setattr(orchestrator, "_evaluate_with_judges", passing)

    result = await run_lifecycle(
        {
            "title": "Low reputation fallback event",
            "sources": [{"url": "https://example.com/rep"}],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[
            # All below the 0.7 gate β€” orchestrator must still pick a winner.
            BidRecord(agent_address="0xlow_rep_lo", bid_amount=0.5, reputation=0.1),
            BidRecord(agent_address="0xlow_rep_mid", bid_amount=1.5, reputation=0.3),
            BidRecord(agent_address="0xlow_rep_hi", bid_amount=2.5, reputation=0.5),
        ],
    )

    # Lifecycle completes; winner is the raw-lowest amount.
    assert result["status"] == "SUBMITTED"
    assert result["winner_address"] == "0xlow_rep_lo"


@pytest.mark.asyncio
async def test_chain_commit_timeout_returns_pending(
    isolated_db: str,
    _deterministic_pipeline: None,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """When ``commit_question`` hangs past 90s the orchestrator returns pending.

    To keep the test fast we patch the hard-coded 90s ``asyncio.wait_for``
    used by ``_commit_question_onchain`` by mocking ``commit_question``
    itself to raise ``asyncio.TimeoutError`` immediately β€” exercising the
    same fallback branch the orchestrator uses on a real chain hang.
    """

    from polyglot_alpha import orchestrator
    from polyglot_alpha.orchestrator import BidRecord, run_lifecycle
    from polyglot_alpha.persistence.db import engine
    from polyglot_alpha.persistence.models import Question

    async def passing(_q: dict[str, Any]) -> orchestrator.JudgePanelResult:
        return orchestrator.JudgePanelResult(
            translation_scores={"bleu": 0.85},
            style_alignment_passes={f"d{i}": True for i in range(1, 9)},
            overall_score=0.85,
            verdict="PASS",
        )

    monkeypatch.setattr(orchestrator, "_evaluate_with_judges", passing)

    class _FakeRegistry:
        @staticmethod
        async def commit_question(*_args: Any, **_kwargs: Any) -> tuple[str, str]:
            # Simulate the wait_for inside _commit_question_onchain firing.
            raise asyncio.TimeoutError("simulated 90s hang")

    monkeypatch.setattr(
        orchestrator, "_get_chain_question_registry", lambda: _FakeRegistry
    )

    # Force ``auction_mode='real'`` so the orchestrator actually delegates
    # to ``_get_chain_question_registry`` instead of the mock branch.
    result = await run_lifecycle(
        {
            "title": "Commit timeout event",
            "sources": [{"url": "https://example.com/timeout"}],
            "language": "en",
        },
        auction_window_seconds=0.0,
        mock_bids=[BidRecord(agent_address="0xt", bid_amount=1.0, reputation=1.0)],
        auction_mode="real",
    )

    # The lifecycle still reaches SUBMITTED β€” the orchestrator records
    # ``question_id = "pending-<event_id>"`` and ``tx_hash = None`` rather
    # than failing the whole event.
    assert result["status"] == "SUBMITTED"
    assert result["question_id"].startswith("pending-")
    assert result.get("commit_tx_hash") is None

    with Session(engine) as s:
        q = s.exec(select(Question).where(Question.event_id == result["event_id"])).one()
        assert q.question_id_onchain.startswith("pending-")
        assert q.tx_hash is None