Spaces:

mitudrudutta
/

ChargeBackOps

Sleeping

File size: 3,531 Bytes

"""Unit tests for scenarios.arbitration 

These tests pin the three bands of the arbitration ruling (merchant-wins,
issuer-wins, deterministic coin flip) and the $250-per-side fee accounting so
a regression in the terminal-round math shows up before the end-to-end env
tests do.
"""

from __future__ import annotations

from dataclasses import replace

from scenarios.arbitration import (
    ARB_FEE_PER_SIDE,
    ARB_ISSUER_WIN_THRESHOLD,
    ARB_MERCHANT_WIN_THRESHOLD,
    ArbitrationOutcome,
    _coin_flip_merchant_wins,
    arbitration_ruling,
)
from scenarios.simulation import CaseProgress, get_task


_TASK = get_task("goods_not_received_easy")
_CASE = _TASK.cases[0]


def _progress(attached: list[str]) -> CaseProgress:
    p = CaseProgress()
    p.attached_evidence_ids = list(attached)
    return p


def test_merchant_wins_on_strong_packet():
    """Required + 2 helpful → score 0.8 clears the 0.65 bar → MERCHANT_WINS."""
    progress = _progress(
        ["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
    )
    ruling = arbitration_ruling(_CASE, progress)
    assert ruling.evidence_strength_score >= ARB_MERCHANT_WIN_THRESHOLD
    assert ruling.outcome == ArbitrationOutcome.MERCHANT_WINS
    assert ruling.arb_fee_per_side == ARB_FEE_PER_SIDE
    assert ruling.merchant_net_pnl == _CASE.amount - ARB_FEE_PER_SIDE


def test_issuer_wins_on_empty_packet():
    """Score 0 sits below the 0.35 floor → ISSUER_WINS, merchant eats amount + fee."""
    progress = _progress([])
    ruling = arbitration_ruling(_CASE, progress)
    assert ruling.evidence_strength_score <= ARB_ISSUER_WIN_THRESHOLD
    assert ruling.outcome == ArbitrationOutcome.ISSUER_WINS
    assert ruling.merchant_net_pnl == -_CASE.amount - ARB_FEE_PER_SIDE


def test_ambiguity_band_uses_deterministic_coin_flip():
    """Scores in (0.35, 0.65) map to a case_id-keyed coin flip — reproducible."""
    # Two helpful-only evidence ids → 0.4 band score, no required subset.
    progress = _progress(["E1-SIGNATURE", "E1-SUPPORT-ACK"])
    r1 = arbitration_ruling(_CASE, progress)
    r2 = arbitration_ruling(_CASE, progress)
    assert r1.outcome == r2.outcome
    assert ARB_ISSUER_WIN_THRESHOLD < r1.evidence_strength_score < ARB_MERCHANT_WIN_THRESHOLD
    expected = (
        ArbitrationOutcome.MERCHANT_WINS
        if _coin_flip_merchant_wins(_CASE.case_id)
        else ArbitrationOutcome.ISSUER_WINS
    )
    assert r1.outcome == expected


def test_coin_flip_varies_across_case_ids():
    """Changing only the case_id must change the coin-flip answer for some cases.

    If every case_id hashed to the same parity, the ambiguity band wouldn't
    actually be 50/50 across the benchmark — this test guards against that.
    """
    flips = {_coin_flip_merchant_wins(f"CB-TEST-{i}") for i in range(20)}
    assert flips == {True, False}


def test_ruling_is_pure():
    """Same inputs, same outputs — required for reproducible benchmarks."""
    progress = _progress(
        ["E1-ORDER-CONF", "E1-DELIVERY-SCAN", "E1-SIGNATURE", "E1-SUPPORT-ACK"]
    )
    r1 = arbitration_ruling(_CASE, progress)
    r2 = arbitration_ruling(_CASE, progress)
    assert r1 == r2

    # A second case_id clone with identical evidence should give the same
    # MERCHANT_WINS outcome (score is above 0.65, so no coin-flip involved).
    cloned = replace(_CASE, case_id="CB-CLONE-1")
    r3 = arbitration_ruling(cloned, progress)
    assert r3.outcome == r1.outcome
    assert r3.merchant_net_pnl == r1.merchant_net_pnl