Spaces:
Sleeping
Sleeping
File size: 3,531 Bytes
b7aa1f0 e32a33b b7aa1f0 e32a33b b7aa1f0 e32a33b b7aa1f0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """Unit tests for scenarios.arbitration
These tests pin the three bands of the arbitration ruling (merchant-wins,
issuer-wins, deterministic coin flip) and the $250-per-side fee accounting so
a regression in the terminal-round math shows up before the end-to-end env
tests do.
"""
from __future__ import annotations
from dataclasses import replace
from scenarios.arbitration import (
ARB_FEE_PER_SIDE,
ARB_ISSUER_WIN_THRESHOLD,
ARB_MERCHANT_WIN_THRESHOLD,
ArbitrationOutcome,
_coin_flip_merchant_wins,
arbitration_ruling,
)
from scenarios.simulation import CaseProgress, get_task
_TASK = get_task("goods_not_received_easy")
_CASE = _TASK.cases[0]
def _progress(attached: list[str]) -> CaseProgress:
p = CaseProgress()
p.attached_evidence_ids = list(attached)
return p
def test_merchant_wins_on_strong_packet():
"""Required + 2 helpful β score 0.8 clears the 0.65 bar β MERCHANT_WINS."""
progress = _progress(
["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
)
ruling = arbitration_ruling(_CASE, progress)
assert ruling.evidence_strength_score >= ARB_MERCHANT_WIN_THRESHOLD
assert ruling.outcome == ArbitrationOutcome.MERCHANT_WINS
assert ruling.arb_fee_per_side == ARB_FEE_PER_SIDE
assert ruling.merchant_net_pnl == _CASE.amount - ARB_FEE_PER_SIDE
def test_issuer_wins_on_empty_packet():
"""Score 0 sits below the 0.35 floor β ISSUER_WINS, merchant eats amount + fee."""
progress = _progress([])
ruling = arbitration_ruling(_CASE, progress)
assert ruling.evidence_strength_score <= ARB_ISSUER_WIN_THRESHOLD
assert ruling.outcome == ArbitrationOutcome.ISSUER_WINS
assert ruling.merchant_net_pnl == -_CASE.amount - ARB_FEE_PER_SIDE
def test_ambiguity_band_uses_deterministic_coin_flip():
"""Scores in (0.35, 0.65) map to a case_id-keyed coin flip β reproducible."""
# Two helpful-only evidence ids β 0.4 band score, no required subset.
progress = _progress(["E1-SIGNATURE", "E1-SUPPORT-ACK"])
r1 = arbitration_ruling(_CASE, progress)
r2 = arbitration_ruling(_CASE, progress)
assert r1.outcome == r2.outcome
assert ARB_ISSUER_WIN_THRESHOLD < r1.evidence_strength_score < ARB_MERCHANT_WIN_THRESHOLD
expected = (
ArbitrationOutcome.MERCHANT_WINS
if _coin_flip_merchant_wins(_CASE.case_id)
else ArbitrationOutcome.ISSUER_WINS
)
assert r1.outcome == expected
def test_coin_flip_varies_across_case_ids():
"""Changing only the case_id must change the coin-flip answer for some cases.
If every case_id hashed to the same parity, the ambiguity band wouldn't
actually be 50/50 across the benchmark β this test guards against that.
"""
flips = {_coin_flip_merchant_wins(f"CB-TEST-{i}") for i in range(20)}
assert flips == {True, False}
def test_ruling_is_pure():
"""Same inputs, same outputs β required for reproducible benchmarks."""
progress = _progress(
["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
)
r1 = arbitration_ruling(_CASE, progress)
r2 = arbitration_ruling(_CASE, progress)
assert r1 == r2
# A second case_id clone with identical evidence should give the same
# MERCHANT_WINS outcome (score is above 0.65, so no coin-flip involved).
cloned = replace(_CASE, case_id="CB-CLONE-1")
r3 = arbitration_ruling(cloned, progress)
assert r3.outcome == r1.outcome
assert r3.merchant_net_pnl == r1.merchant_net_pnl
|