ChargeBackOps / tests /test_env.py
mitudrudutta's picture
feat: Implement wait_for_updates action for handling delayed cases and evidence
2dedffd
from scenarios.arbitration import ARB_FEE_PER_SIDE, ArbitrationOutcome
from scenarios.case_generator import generate_task
from scenarios.issuer_model import IssuerDecision, IssuerReview
from core.models import ChargebackOpsAction
from server.chargeback_ops_environment import ChargebackOpsEnvironment
class _ScriptedIssuer:
"""Deterministic Issuer stub that returns planned decisions in sequence.
Lets round-2 env tests exercise the full dispute lifecycle without
depending on the exact thresholds in IssuerAgent — the scoring math is
already pinned in tests/test_issuer.py.
"""
def __init__(self, decisions: list[IssuerDecision]):
self._decisions = list(decisions)
self.calls: list[int] = []
def decide_review(self, case, progress, round_number):
self.calls.append(round_number)
idx = min(len(self.calls) - 1, len(self._decisions) - 1)
decision = self._decisions[idx]
return IssuerReview(
decision=decision,
evidence_strength_score=0.5,
rationale=f"stub decision {decision.value} at r{round_number}",
)
def _drive_case_into_round_2(env: ChargebackOpsEnvironment) -> None:
"""Select CB-E1, attach required evidence, submit — with Issuer stub that
requests more evidence on round 1. Leaves env in round-2 state.
"""
env.step(ChargebackOpsAction(action_type="select_case", case_id="CB-E1"))
env.step(
ChargebackOpsAction(
action_type="query_system", case_id="CB-E1", system_name="orders"
)
)
env.step(
ChargebackOpsAction(
action_type="query_system", case_id="CB-E1", system_name="shipping"
)
)
env.step(
ChargebackOpsAction(
action_type="add_evidence",
case_id="CB-E1",
evidence_ids=["E1-ORDER-CONF", "E1-DELIVERY-SCAN"],
)
)
env.step(
ChargebackOpsAction(
action_type="set_strategy", case_id="CB-E1", strategy="contest"
)
)
env.step(
ChargebackOpsAction(action_type="submit_representment", case_id="CB-E1")
)
def test_reset_returns_task_observation():
env = ChargebackOpsEnvironment()
obs = env.reset(task_id="goods_not_received_easy")
assert obs.task_id == "goods_not_received_easy"
assert obs.steps_remaining == 10
assert len(obs.queue) == 1
assert obs.queue[0].transaction_id.startswith("txn_")
assert obs.queue[0].merchant_mcc.isdigit()
assert obs.queue[0].masked_card.startswith("****")
def test_reset_accepts_curriculum_difficulty():
env = ChargebackOpsEnvironment()
obs = env.reset(difficulty="hard", seed=7)
assert obs.task_id == "generated_hard_s7"
assert obs.difficulty == "hard"
assert len(obs.queue) >= 2
def test_easy_case_can_be_won():
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env.step(ChargebackOpsAction(action_type="select_case", case_id="CB-E1"))
env.step(ChargebackOpsAction(action_type="inspect_case", case_id="CB-E1"))
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="orders",
)
)
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="shipping",
)
)
env.step(
ChargebackOpsAction(
action_type="query_system",
case_id="CB-E1",
system_name="support",
)
)
env.step(
ChargebackOpsAction(
action_type="add_evidence",
case_id="CB-E1",
evidence_ids=[
"E1-ORDER-CONF",
"E1-DELIVERY-SCAN",
"E1-SIGNATURE",
"E1-SUPPORT-ACK",
],
)
)
env.step(
ChargebackOpsAction(
action_type="set_strategy",
case_id="CB-E1",
strategy="contest",
)
)
obs = env.step(
ChargebackOpsAction(
action_type="submit_representment",
case_id="CB-E1",
)
)
assert obs.done is True
assert obs.grader_report is not None
assert obs.grader_report.normalized_score > 0.8
def test_generated_task_reproducibility():
"""Same seed must produce identical cases."""
t1 = generate_task(99, difficulty="medium")
t2 = generate_task(99, difficulty="medium")
assert t1.task_id == t2.task_id
assert len(t1.cases) == len(t2.cases)
for c1, c2 in zip(t1.cases, t2.cases):
assert c1.case_id == c2.case_id
assert c1.amount == c2.amount
assert c1.optimal_strategy == c2.optimal_strategy
def test_generated_task_runs_in_environment():
"""A generated task should reset and accept at least one step."""
env = ChargebackOpsEnvironment()
obs = env.reset(task_id="generated_easy_s7")
assert obs.task_id == "generated_easy_s7"
assert len(obs.queue) >= 1
case_id = obs.queue[0].case_id
obs = env.step(ChargebackOpsAction(action_type="select_case", case_id=case_id))
assert obs.selected_case_id == case_id
assert obs.visible_case is not None
assert obs.visible_case.transaction_timestamp.endswith("Z")
assert "episode_metrics" in obs.info
def test_marathon_task_has_wave_arrivals_and_wait_action():
env = ChargebackOpsEnvironment()
obs = env.reset(task_id="monthly_dispute_backlog_marathon")
assert obs.task_id == "monthly_dispute_backlog_marathon"
assert obs.steps_remaining == 60
assert len(obs.queue) == 4
assert obs.info["episode_metrics"]["future_case_count"] == 8.0
# Resolve the urgent refund cases that are initially visible enough to
# leave the environment waiting on future arrivals or async work later.
assert "wait_for_updates" not in obs.available_actions
def test_marathon_delayed_evidence_and_issuer_review():
env = ChargebackOpsEnvironment()
obs = env.reset(task_id="monthly_dispute_backlog_marathon")
obs = env.step(ChargebackOpsAction(action_type="select_case", case_id="CB-L02"))
obs = env.step(
ChargebackOpsAction(
action_type="query_system", case_id="CB-L02", system_name="orders"
)
)
obs = env.step(
ChargebackOpsAction(
action_type="query_system", case_id="CB-L02", system_name="shipping"
)
)
assert "delayed shipping evidence" in obs.last_action_result.lower()
assert obs.info["pending_evidence_systems"] == ["shipping"]
obs = env.step(ChargebackOpsAction(action_type="wait_for_updates"))
obs = env.step(ChargebackOpsAction(action_type="wait_for_updates"))
assert "Delayed shipping evidence arrived" in obs.last_action_result
assert obs.visible_case is not None
retrieved_ids = {item.evidence_id for item in obs.visible_case.retrieved_evidence}
assert any(eid.endswith("E1-DELIVERY-SCAN") for eid in retrieved_ids)
delivery_ids = sorted(
eid
for eid in retrieved_ids
if eid.endswith("E1-ORDER-CONF") or eid.endswith("E1-DELIVERY-SCAN")
)
obs = env.step(
ChargebackOpsAction(
action_type="add_evidence",
case_id="CB-L02",
evidence_ids=delivery_ids,
)
)
obs = env.step(
ChargebackOpsAction(
action_type="set_strategy", case_id="CB-L02", strategy="contest"
)
)
obs = env.step(
ChargebackOpsAction(action_type="submit_representment", case_id="CB-L02")
)
assert obs.visible_case is not None
assert obs.visible_case.status == "pending_issuer_review"
assert obs.info["episode_metrics"]["pending_issuer_reviews"] == 1.0
obs = env.step(ChargebackOpsAction(action_type="wait_for_updates"))
obs = env.step(ChargebackOpsAction(action_type="wait_for_updates"))
obs = env.step(ChargebackOpsAction(action_type="wait_for_updates"))
assert "Issuer" in obs.last_action_result
assert obs.info["episode_metrics"]["pending_issuer_reviews"] == 0.0
def test_generated_task_covers_all_reason_codes():
"""Generator should produce all 6 reason code families across seeds."""
seen_codes: set[str] = set()
for seed in range(50):
for diff in ("easy", "medium", "hard"):
t = generate_task(seed, difficulty=diff)
for c in t.cases:
seen_codes.add(c.reason_code)
expected = {
"goods_not_received", "fraud_cnp", "credit_not_processed",
"duplicate_processing", "product_not_as_described", "service_not_provided",
}
assert expected.issubset(seen_codes), f"Missing: {expected - seen_codes}"
# ---------------------------------------------------------------------------
# multi-round dispute lifecycle
# ---------------------------------------------------------------------------
def test_pre_arb_available_actions_exclude_submit_representment():
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env._issuer_agent = _ScriptedIssuer([IssuerDecision.REQUEST_MORE_EVIDENCE])
_drive_case_into_round_2(env)
actions = env._build_available_actions()
assert "submit_representment" not in actions
assert "respond_to_pre_arb" in actions
assert "escalate_to_arbitration" in actions
assert "accept_arbitration_loss" in actions
def test_full_three_round_cycle_ending_in_arbitration():
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env._issuer_agent = _ScriptedIssuer(
[
IssuerDecision.REQUEST_MORE_EVIDENCE,
IssuerDecision.ESCALATE_TO_ARBITRATION,
]
)
_drive_case_into_round_2(env)
env.step(
ChargebackOpsAction(
action_type="query_system", case_id="CB-E1", system_name="support"
)
)
obs = env.step(
ChargebackOpsAction(
action_type="respond_to_pre_arb",
case_id="CB-E1",
compelling_evidence_ids=["E1-SIGNATURE", "E1-SUPPORT-ACK"],
note="Added signature delivery proof and support ack for pre-arb.",
)
)
progress = env._progress_by_case["CB-E1"]
assert progress.round_number == 3
assert progress.arbitration_outcome == ArbitrationOutcome.MERCHANT_WINS.value
assert progress.arb_fees_paid == ARB_FEE_PER_SIDE
assert progress.final_economic_outcome == progress.final_economic_outcome
assert progress.final_economic_outcome is not None
assert progress.resolution_status == "won_arbitration"
assert obs.done is True
assert "arbitration" in obs.last_action_result.lower()
def test_respond_to_pre_arb_accepted_skips_arbitration():
"""If the Issuer accepts in round 2, no arbitration fee is charged and
the merchant keeps the full dispute amount."""
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env._issuer_agent = _ScriptedIssuer(
[IssuerDecision.REQUEST_MORE_EVIDENCE, IssuerDecision.ACCEPT]
)
_drive_case_into_round_2(env)
env.step(
ChargebackOpsAction(
action_type="respond_to_pre_arb",
case_id="CB-E1",
compelling_evidence_ids=["E1-SIGNATURE"],
)
)
progress = env._progress_by_case["CB-E1"]
assert progress.resolution_status == "won_pre_arb"
assert progress.arb_fees_paid == 0.0
assert progress.arbitration_outcome is None
case = env._lookup_case("CB-E1")
assert progress.final_economic_outcome == case.amount
def test_accept_arbitration_loss_skips_fees():
"""Conceding pre-arb forfeits the dispute amount but avoids the $250 fee."""
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env._issuer_agent = _ScriptedIssuer([IssuerDecision.REQUEST_MORE_EVIDENCE])
_drive_case_into_round_2(env)
env.step(
ChargebackOpsAction(
action_type="accept_arbitration_loss", case_id="CB-E1"
)
)
progress = env._progress_by_case["CB-E1"]
case = env._lookup_case("CB-E1")
assert progress.resolution_status == "conceded_pre_arb"
assert progress.arb_fees_paid == 0.0
assert progress.arbitration_outcome is None
assert progress.final_economic_outcome == -case.amount
def test_escalate_to_arbitration_from_round_2():
"""Merchant can voluntarily file for arbitration from round 2."""
env = ChargebackOpsEnvironment()
env.reset(task_id="goods_not_received_easy")
env._issuer_agent = _ScriptedIssuer([IssuerDecision.REQUEST_MORE_EVIDENCE])
_drive_case_into_round_2(env)
env.step(
ChargebackOpsAction(
action_type="escalate_to_arbitration", case_id="CB-E1"
)
)
progress = env._progress_by_case["CB-E1"]
case = env._lookup_case("CB-E1")
assert progress.round_number == 3
assert progress.arb_fees_paid == ARB_FEE_PER_SIDE
assert progress.arbitration_outcome in {
ArbitrationOutcome.MERCHANT_WINS.value,
ArbitrationOutcome.ISSUER_WINS.value,
}
if progress.arbitration_outcome == ArbitrationOutcome.MERCHANT_WINS.value:
assert progress.final_economic_outcome == case.amount - ARB_FEE_PER_SIDE
else:
assert progress.final_economic_outcome == -case.amount - ARB_FEE_PER_SIDE