Spaces:
Running
Running
File size: 10,061 Bytes
88d2f2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | """Unit tests for ``polyglot_alpha.agents.dispatch``.
These tests cover the dispatch surface that ``orchestrator.py`` depends on:
* All 3 reference seeders instantiate without a real wallet (eval-only).
* ``collect_bids_inline`` returns one bid per seeder, with bid amounts that
visibly differ across the three bid strategies.
* ``collect_bids_inline`` tolerates a single seeder crashing and still
returns the remaining 2 bids (no synthetic placeholder).
* ``run_pipeline`` produces a valid ``polymarket.types.Question`` with a
populated layer trace.
* ``run_for_winner`` returns a ``PipelineResult`` whose ``final_question``
matches the orchestrator's wire shape.
Run with: ``.venv/bin/pytest tests/test_dispatch.py -q``
"""
from __future__ import annotations
import asyncio
import json
from typing import Any
import pytest
from polyglot_alpha.agents import AGENT_REGISTRY, dispatch
from polyglot_alpha.llm import MockLLM
from polyglot_alpha.polymarket.types import Question as PolymarketQuestion
# --------------------------------------------------------------------------- #
# Fixtures #
# --------------------------------------------------------------------------- #
@pytest.fixture()
def sample_event() -> dict[str, Any]:
"""A Chinese-language event with enough body to drive bid-strategy spread."""
return {
"event_id": "evt_dispatch_001",
"title": "Sample geopolitical event for dispatch tests",
"title_zh": "测试事件",
"body_zh": "中国宣布将就关税政策做出回应。" * 30,
"cutoff_ts": 1_900_000_000,
"category": "geopolitics",
"language": "zh",
"url": "https://example.com/cn/news/001",
}
@pytest.fixture()
def mock_llm_factory():
"""Factory returning a deterministic ``MockLLM`` for the whole pipeline."""
canned = json.dumps(
{
"question_en": "Will the tariff response be announced by 2026-12-31?",
"resolution_criteria": (
"Resolves YES if the State Council issues an official "
"tariff response before 2026-12-31T23:59:59Z."
),
"end_date_iso": "2026-12-31T23:59:59Z",
"tags": ["geopolitics", "tariffs"],
"entities": ["State Council"],
"risks": ["delayed announcement"],
}
)
return lambda: MockLLM(model_id="mock-dispatch", canned_response=canned)
# --------------------------------------------------------------------------- #
# Agent construction #
# --------------------------------------------------------------------------- #
def test_all_seeders_instantiate_without_real_wallet() -> None:
"""The three reference seeders must construct with a throwaway PK."""
assert set(AGENT_REGISTRY.keys()) == {"gemini-v2", "deepseek-v2", "qwen-v2"}
for name, cls in AGENT_REGISTRY.items():
pk = dispatch._throwaway_pk()
agent = cls(wallet_pk=pk)
assert agent.MODEL_ID, f"{name} missing MODEL_ID"
assert agent.address.startswith("0x")
assert len(agent.address) == 42
# --------------------------------------------------------------------------- #
# collect_bids_inline #
# --------------------------------------------------------------------------- #
@pytest.mark.asyncio
async def test_collect_bids_inline_returns_three_distinct_bids(
sample_event: dict[str, Any],
) -> None:
"""All 3 seeders must bid; bid_strategy spread should yield distinct values."""
bids = await dispatch.collect_bids_inline(sample_event, window_seconds=10.0)
assert len(bids) == 3
names = {b["agent_name"] for b in bids}
assert names == {"gemini-v2", "deepseek-v2", "qwen-v2"}, (
f"unexpected agent_name values: {names}"
)
bid_amounts = [b["bid_amount"] for b in bids]
# All bids are positive.
assert all(amount > 0 for amount in bid_amounts)
# The three bid windows differ (BID_MIN/MAX configured per seeder), so
# at least two distinct amounts must appear.
assert len(set(round(a, 4) for a in bid_amounts)) >= 2, (
f"expected bid spread, got {bid_amounts}"
)
# Every bid carries the required keys.
required_keys = {
"agent_address",
"agent_name",
"bid_amount",
"candidate_hash",
"reputation",
"confidence",
"expected_cost_usdc",
"llm_model",
}
for bid in bids:
assert required_keys.issubset(bid.keys()), (
f"missing keys in bid: {required_keys - bid.keys()}"
)
@pytest.mark.asyncio
async def test_collect_bids_inline_drops_failed_agents(
sample_event: dict[str, Any],
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A failing agent must NOT yield a synthetic bid.
Previous contract: each failing agent contributed a hardcoded 1.0 USDC
fallback bid with ``candidate_hash="0x0"`` and an ``_error`` key. That
placeholder bid then went on-chain as if it were a real auction vote.
The new contract is: propagate failures so the orchestrator records
only the agents that actually produced a valid evaluation.
"""
from polyglot_alpha.agents.base import BaseTranslatorAgent
original_evaluate = BaseTranslatorAgent.evaluate_event
call_count = {"n": 0}
async def _flaky_evaluate(self, event_dict):
call_count["n"] += 1
# Make exactly the first agent raise; the rest succeed normally.
if call_count["n"] == 1:
raise RuntimeError("simulated LLM quota error")
return await original_evaluate(self, event_dict)
monkeypatch.setattr(
BaseTranslatorAgent, "evaluate_event", _flaky_evaluate
)
bids = await dispatch.collect_bids_inline(sample_event, window_seconds=10.0)
# Only 2 seeders successfully bid; the failing one is dropped entirely
# so no synthetic placeholder enters the auction.
assert len(bids) == 2
for bid in bids:
assert "_error" not in bid
assert bid["candidate_hash"] != "0x0"
@pytest.mark.asyncio
async def test_collect_bids_inline_zero_window_returns_empty() -> None:
"""A zero-second window cancels every bid task and returns an empty list."""
bids = await dispatch.collect_bids_inline(
{"event_id": "e1", "title": "t"}, window_seconds=0.0
)
# Tasks may or may not have time to complete at window=0; allowed.
assert isinstance(bids, list)
assert all("agent_name" in b for b in bids)
# --------------------------------------------------------------------------- #
# run_pipeline #
# --------------------------------------------------------------------------- #
@pytest.mark.asyncio
async def test_run_pipeline_returns_polymarket_question(
sample_event: dict[str, Any],
mock_llm_factory,
) -> None:
"""``run_pipeline`` must return a ``polymarket.types.Question``."""
question = await dispatch.run_pipeline(
sample_event,
winner_agent_name="gemini-v2",
llm_factory=mock_llm_factory,
)
assert isinstance(question, PolymarketQuestion)
assert question.question_id # non-empty
assert "tariff" in question.text.lower() or "announce" in question.text.lower()
assert question.category == "geopolitics"
assert question.end_date_iso # populated from the synthesizer output
@pytest.mark.asyncio
async def test_run_pipeline_layer_trace_populated(
sample_event: dict[str, Any],
mock_llm_factory,
) -> None:
"""The layer trace must include synthesizer output + winning agent."""
question = await dispatch.run_pipeline(
sample_event,
winner_agent_name="deepseek-v2",
llm_factory=mock_llm_factory,
)
layer_trace = getattr(question, "layer_trace", None)
assert layer_trace is not None, "layer_trace attribute missing"
assert layer_trace["winner_agent"] == "deepseek-v2"
assert "synthesized" in layer_trace
assert layer_trace["quality_score"] >= 0.0
assert layer_trace["confidence"] >= 0.0
@pytest.mark.asyncio
async def test_run_pipeline_unknown_agent_falls_back_to_gemini(
sample_event: dict[str, Any],
mock_llm_factory,
) -> None:
"""An unknown winner name must not crash — fall back to gemini."""
question = await dispatch.run_pipeline(
sample_event,
winner_agent_name="not-a-real-agent",
llm_factory=mock_llm_factory,
)
assert isinstance(question, PolymarketQuestion)
assert question.text
# --------------------------------------------------------------------------- #
# run_for_winner (orchestrator entry point) #
# --------------------------------------------------------------------------- #
@pytest.mark.asyncio
async def test_run_for_winner_returns_pipeline_result(
sample_event: dict[str, Any],
mock_llm_factory,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The orchestrator-facing entry point must return a PipelineResult.
The dispatch no longer has a fallback path that masks LLM failures,
so we force ``make_llm`` to return ``MockLLM`` for this test instead
of relying on the absence of API keys (the test environment may load
``.env`` and pick up a stale or credit-exhausted key).
"""
monkeypatch.setattr(
"polyglot_alpha.agents.dispatch.make_llm",
lambda model_id: mock_llm_factory(),
)
result = await dispatch.run_for_winner(sample_event, winner_address="0xdead")
assert isinstance(result, dispatch.PipelineResult)
assert result.candidate_hash and len(result.candidate_hash) == 64
assert result.final_question["title"].lower().startswith("will ")
assert result.final_question["outcomes"] == ["Yes", "No"]
assert result.pipeline_trace_ipfs and result.pipeline_trace_ipfs.startswith(
"ipfs://pipeline/"
)
|