File size: 3,531 Bytes
b7aa1f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32a33b
 
 
 
b7aa1f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32a33b
b7aa1f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e32a33b
 
 
b7aa1f0
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Unit tests for scenarios.arbitration 

These tests pin the three bands of the arbitration ruling (merchant-wins,
issuer-wins, deterministic coin flip) and the $250-per-side fee accounting so
a regression in the terminal-round math shows up before the end-to-end env
tests do.
"""

from __future__ import annotations

from dataclasses import replace

from scenarios.arbitration import (
    ARB_FEE_PER_SIDE,
    ARB_ISSUER_WIN_THRESHOLD,
    ARB_MERCHANT_WIN_THRESHOLD,
    ArbitrationOutcome,
    _coin_flip_merchant_wins,
    arbitration_ruling,
)
from scenarios.simulation import CaseProgress, get_task


_TASK = get_task("goods_not_received_easy")
_CASE = _TASK.cases[0]


def _progress(attached: list[str]) -> CaseProgress:
    p = CaseProgress()
    p.attached_evidence_ids = list(attached)
    return p


def test_merchant_wins_on_strong_packet():
    """Required + 2 helpful β†’ score 0.8 clears the 0.65 bar β†’ MERCHANT_WINS."""
    progress = _progress(
        ["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
    )
    ruling = arbitration_ruling(_CASE, progress)
    assert ruling.evidence_strength_score >= ARB_MERCHANT_WIN_THRESHOLD
    assert ruling.outcome == ArbitrationOutcome.MERCHANT_WINS
    assert ruling.arb_fee_per_side == ARB_FEE_PER_SIDE
    assert ruling.merchant_net_pnl == _CASE.amount - ARB_FEE_PER_SIDE


def test_issuer_wins_on_empty_packet():
    """Score 0 sits below the 0.35 floor β†’ ISSUER_WINS, merchant eats amount + fee."""
    progress = _progress([])
    ruling = arbitration_ruling(_CASE, progress)
    assert ruling.evidence_strength_score <= ARB_ISSUER_WIN_THRESHOLD
    assert ruling.outcome == ArbitrationOutcome.ISSUER_WINS
    assert ruling.merchant_net_pnl == -_CASE.amount - ARB_FEE_PER_SIDE


def test_ambiguity_band_uses_deterministic_coin_flip():
    """Scores in (0.35, 0.65) map to a case_id-keyed coin flip β€” reproducible."""
    # Two helpful-only evidence ids β†’ 0.4 band score, no required subset.
    progress = _progress(["E1-SIGNATURE", "E1-SUPPORT-ACK"])
    r1 = arbitration_ruling(_CASE, progress)
    r2 = arbitration_ruling(_CASE, progress)
    assert r1.outcome == r2.outcome
    assert ARB_ISSUER_WIN_THRESHOLD < r1.evidence_strength_score < ARB_MERCHANT_WIN_THRESHOLD
    expected = (
        ArbitrationOutcome.MERCHANT_WINS
        if _coin_flip_merchant_wins(_CASE.case_id)
        else ArbitrationOutcome.ISSUER_WINS
    )
    assert r1.outcome == expected


def test_coin_flip_varies_across_case_ids():
    """Changing only the case_id must change the coin-flip answer for some cases.

    If every case_id hashed to the same parity, the ambiguity band wouldn't
    actually be 50/50 across the benchmark β€” this test guards against that.
    """
    flips = {_coin_flip_merchant_wins(f"CB-TEST-{i}") for i in range(20)}
    assert flips == {True, False}


def test_ruling_is_pure():
    """Same inputs, same outputs β€” required for reproducible benchmarks."""
    progress = _progress(
        ["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
    )
    r1 = arbitration_ruling(_CASE, progress)
    r2 = arbitration_ruling(_CASE, progress)
    assert r1 == r2

    # A second case_id clone with identical evidence should give the same
    # MERCHANT_WINS outcome (score is above 0.65, so no coin-flip involved).
    cloned = replace(_CASE, case_id="CB-CLONE-1")
    r3 = arbitration_ruling(cloned, progress)
    assert r3.outcome == r1.outcome
    assert r3.merchant_net_pnl == r1.merchant_net_pnl