File size: 10,061 Bytes
88d2f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
"""Unit tests for ``polyglot_alpha.agents.dispatch``.

These tests cover the dispatch surface that ``orchestrator.py`` depends on:

* All 3 reference seeders instantiate without a real wallet (eval-only).
* ``collect_bids_inline`` returns one bid per seeder, with bid amounts that
  visibly differ across the three bid strategies.
* ``collect_bids_inline`` tolerates a single seeder crashing and still
  returns the remaining 2 bids (no synthetic placeholder).
* ``run_pipeline`` produces a valid ``polymarket.types.Question`` with a
  populated layer trace.
* ``run_for_winner`` returns a ``PipelineResult`` whose ``final_question``
  matches the orchestrator's wire shape.

Run with: ``.venv/bin/pytest tests/test_dispatch.py -q``
"""

from __future__ import annotations

import asyncio
import json
from typing import Any

import pytest

from polyglot_alpha.agents import AGENT_REGISTRY, dispatch
from polyglot_alpha.llm import MockLLM
from polyglot_alpha.polymarket.types import Question as PolymarketQuestion


# --------------------------------------------------------------------------- #
# Fixtures                                                                    #
# --------------------------------------------------------------------------- #


@pytest.fixture()
def sample_event() -> dict[str, Any]:
    """A Chinese-language event with enough body to drive bid-strategy spread."""

    return {
        "event_id": "evt_dispatch_001",
        "title": "Sample geopolitical event for dispatch tests",
        "title_zh": "测试事件",
        "body_zh": "中国宣布将就关税政策做出回应。" * 30,
        "cutoff_ts": 1_900_000_000,
        "category": "geopolitics",
        "language": "zh",
        "url": "https://example.com/cn/news/001",
    }


@pytest.fixture()
def mock_llm_factory():
    """Factory returning a deterministic ``MockLLM`` for the whole pipeline."""

    canned = json.dumps(
        {
            "question_en": "Will the tariff response be announced by 2026-12-31?",
            "resolution_criteria": (
                "Resolves YES if the State Council issues an official "
                "tariff response before 2026-12-31T23:59:59Z."
            ),
            "end_date_iso": "2026-12-31T23:59:59Z",
            "tags": ["geopolitics", "tariffs"],
            "entities": ["State Council"],
            "risks": ["delayed announcement"],
        }
    )
    return lambda: MockLLM(model_id="mock-dispatch", canned_response=canned)


# --------------------------------------------------------------------------- #
# Agent construction                                                          #
# --------------------------------------------------------------------------- #


def test_all_seeders_instantiate_without_real_wallet() -> None:
    """The three reference seeders must construct with a throwaway PK."""

    assert set(AGENT_REGISTRY.keys()) == {"gemini-v2", "deepseek-v2", "qwen-v2"}
    for name, cls in AGENT_REGISTRY.items():
        pk = dispatch._throwaway_pk()
        agent = cls(wallet_pk=pk)
        assert agent.MODEL_ID, f"{name} missing MODEL_ID"
        assert agent.address.startswith("0x")
        assert len(agent.address) == 42


# --------------------------------------------------------------------------- #
# collect_bids_inline                                                         #
# --------------------------------------------------------------------------- #


@pytest.mark.asyncio
async def test_collect_bids_inline_returns_three_distinct_bids(
    sample_event: dict[str, Any],
) -> None:
    """All 3 seeders must bid; bid_strategy spread should yield distinct values."""

    bids = await dispatch.collect_bids_inline(sample_event, window_seconds=10.0)

    assert len(bids) == 3
    names = {b["agent_name"] for b in bids}
    assert names == {"gemini-v2", "deepseek-v2", "qwen-v2"}, (
        f"unexpected agent_name values: {names}"
    )
    bid_amounts = [b["bid_amount"] for b in bids]
    # All bids are positive.
    assert all(amount > 0 for amount in bid_amounts)
    # The three bid windows differ (BID_MIN/MAX configured per seeder), so
    # at least two distinct amounts must appear.
    assert len(set(round(a, 4) for a in bid_amounts)) >= 2, (
        f"expected bid spread, got {bid_amounts}"
    )
    # Every bid carries the required keys.
    required_keys = {
        "agent_address",
        "agent_name",
        "bid_amount",
        "candidate_hash",
        "reputation",
        "confidence",
        "expected_cost_usdc",
        "llm_model",
    }
    for bid in bids:
        assert required_keys.issubset(bid.keys()), (
            f"missing keys in bid: {required_keys - bid.keys()}"
        )


@pytest.mark.asyncio
async def test_collect_bids_inline_drops_failed_agents(
    sample_event: dict[str, Any],
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """A failing agent must NOT yield a synthetic bid.

    Previous contract: each failing agent contributed a hardcoded 1.0 USDC
    fallback bid with ``candidate_hash="0x0"`` and an ``_error`` key. That
    placeholder bid then went on-chain as if it were a real auction vote.
    The new contract is: propagate failures so the orchestrator records
    only the agents that actually produced a valid evaluation.
    """

    from polyglot_alpha.agents.base import BaseTranslatorAgent

    original_evaluate = BaseTranslatorAgent.evaluate_event
    call_count = {"n": 0}

    async def _flaky_evaluate(self, event_dict):
        call_count["n"] += 1
        # Make exactly the first agent raise; the rest succeed normally.
        if call_count["n"] == 1:
            raise RuntimeError("simulated LLM quota error")
        return await original_evaluate(self, event_dict)

    monkeypatch.setattr(
        BaseTranslatorAgent, "evaluate_event", _flaky_evaluate
    )

    bids = await dispatch.collect_bids_inline(sample_event, window_seconds=10.0)
    # Only 2 seeders successfully bid; the failing one is dropped entirely
    # so no synthetic placeholder enters the auction.
    assert len(bids) == 2
    for bid in bids:
        assert "_error" not in bid
        assert bid["candidate_hash"] != "0x0"


@pytest.mark.asyncio
async def test_collect_bids_inline_zero_window_returns_empty() -> None:
    """A zero-second window cancels every bid task and returns an empty list."""

    bids = await dispatch.collect_bids_inline(
        {"event_id": "e1", "title": "t"}, window_seconds=0.0
    )
    # Tasks may or may not have time to complete at window=0; allowed.
    assert isinstance(bids, list)
    assert all("agent_name" in b for b in bids)


# --------------------------------------------------------------------------- #
# run_pipeline                                                                #
# --------------------------------------------------------------------------- #


@pytest.mark.asyncio
async def test_run_pipeline_returns_polymarket_question(
    sample_event: dict[str, Any],
    mock_llm_factory,
) -> None:
    """``run_pipeline`` must return a ``polymarket.types.Question``."""

    question = await dispatch.run_pipeline(
        sample_event,
        winner_agent_name="gemini-v2",
        llm_factory=mock_llm_factory,
    )

    assert isinstance(question, PolymarketQuestion)
    assert question.question_id  # non-empty
    assert "tariff" in question.text.lower() or "announce" in question.text.lower()
    assert question.category == "geopolitics"
    assert question.end_date_iso  # populated from the synthesizer output


@pytest.mark.asyncio
async def test_run_pipeline_layer_trace_populated(
    sample_event: dict[str, Any],
    mock_llm_factory,
) -> None:
    """The layer trace must include synthesizer output + winning agent."""

    question = await dispatch.run_pipeline(
        sample_event,
        winner_agent_name="deepseek-v2",
        llm_factory=mock_llm_factory,
    )

    layer_trace = getattr(question, "layer_trace", None)
    assert layer_trace is not None, "layer_trace attribute missing"
    assert layer_trace["winner_agent"] == "deepseek-v2"
    assert "synthesized" in layer_trace
    assert layer_trace["quality_score"] >= 0.0
    assert layer_trace["confidence"] >= 0.0


@pytest.mark.asyncio
async def test_run_pipeline_unknown_agent_falls_back_to_gemini(
    sample_event: dict[str, Any],
    mock_llm_factory,
) -> None:
    """An unknown winner name must not crash — fall back to gemini."""

    question = await dispatch.run_pipeline(
        sample_event,
        winner_agent_name="not-a-real-agent",
        llm_factory=mock_llm_factory,
    )
    assert isinstance(question, PolymarketQuestion)
    assert question.text


# --------------------------------------------------------------------------- #
# run_for_winner (orchestrator entry point)                                   #
# --------------------------------------------------------------------------- #


@pytest.mark.asyncio
async def test_run_for_winner_returns_pipeline_result(
    sample_event: dict[str, Any],
    mock_llm_factory,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """The orchestrator-facing entry point must return a PipelineResult.

    The dispatch no longer has a fallback path that masks LLM failures,
    so we force ``make_llm`` to return ``MockLLM`` for this test instead
    of relying on the absence of API keys (the test environment may load
    ``.env`` and pick up a stale or credit-exhausted key).
    """

    monkeypatch.setattr(
        "polyglot_alpha.agents.dispatch.make_llm",
        lambda model_id: mock_llm_factory(),
    )

    result = await dispatch.run_for_winner(sample_event, winner_address="0xdead")
    assert isinstance(result, dispatch.PipelineResult)
    assert result.candidate_hash and len(result.candidate_hash) == 64
    assert result.final_question["title"].lower().startswith("will ")
    assert result.final_question["outcomes"] == ["Yes", "No"]
    assert result.pipeline_trace_ipfs and result.pipeline_trace_ipfs.startswith(
        "ipfs://pipeline/"
    )