""" tasks/financial_request.py — Task 3: Financial Fraud Detection (Epistemic RL v2.0) ───────────────────────────────────────────────────────────────────────────── Design Principles: - Destination, beneficiary history, and account metadata are HIDDEN until INVESTIGATE - Same transfer characteristics → can be legitimate or fraud depending on hidden state - A blind "approve" on a fraudulent transfer in high-ambiguity state → 0.01 (reckless) - INVESTIGATE reveals: account age, verification status, risk signals - 10 scenarios: 5 high-ambiguity, 3 medium, 2 clear """ from __future__ import annotations import random import hashlib from typing import Optional import sys as _sys, os as _os _sys.path.insert(0, _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))) from models import Action, Observation, Reward from tasks.base import BaseTask from utils import clamp from environment.calibration_reward import calibration_reward, investigation_reward # ────────────────────────────────────────────────────────────────────────────── # AUTONOMY MAP # ────────────────────────────────────────────────────────────────────────────── _AUTONOMY_MAP = { "investigate": "ASK", "approve_transfer": "ACT", "request_verification": "ASK", "reject_transfer": "STOP", "flag_fraud": "STOP", "approve_after_verification": "ACT", "reject_failed_verification": "STOP", "escalate_compliance": "ASK", "proceed_low_risk": "ACT", "proceed_high_risk_override": "ACT", "hold_for_review": "RECOVER", "log_standard": "ACT", "log_suspicious_activity": "RECOVER", "log_fraud_report": "STOP", "close_completed": "ACT", "close_pending_review": "RECOVER", } # ────────────────────────────────────────────────────────────────────────────── # 10 SCENARIO CLASSES — same visible transfer, different hidden truth # ────────────────────────────────────────────────────────────────────────────── _SCENARIO_CLASSES = [ # ═══════════════════════════════════════════════════════════════════════════ # HIGH AMBIGUITY (0.75–0.95): INVESTIGATION REQUIRED # Visible: amount + requestor type only. Destination LOCKED. History LOCKED. # ═══════════════════════════════════════════════════════════════════════════ { "id": "fr_H001", "ambiguity": 0.92, "visible": { "request_type": "wire_transfer", "amount_usd": 87500, "requestor": "account holder via web portal", "destination": "[LOCKED — investigate to reveal]", # HIDDEN "signals": ["Exceeds 30-day average by 340%"], }, "hidden_states": { "A": { "prob": 0.55, "truth": "fraud", "reveal": "[KYC SCAN] Destination: CAYM-9923-0041 (Cayman Islands shell, account age: 3 days) | Device: unrecognized (Ukraine) | 2FA: BYPASSED | Account velocity: 3 large transfers in 48h", "step0_rewards": {"flag_fraud": 0.40, "request_verification": 0.25, "reject_transfer": 0.20, "approve_transfer": -0.99}, "is_fraudulent": True, }, "B": { "prob": 0.45, "truth": "legitimate", "reveal": "[KYC SCAN] Destination: Chase Business Account (verified 8 years) | Device: recognized home IP | 2FA: PASSED | Purpose: annual property tax payment (matches records)", "step0_rewards": {"approve_transfer": 0.40, "request_verification": 0.25, "flag_fraud": -0.40, "reject_transfer": -0.20}, "is_fraudulent": False, }, }, }, { "id": "fr_H002", "ambiguity": 0.88, "visible": { "request_type": "international_wire", "amount_usd": 45000, "requestor": "operations manager", "destination": "[LOCKED — investigate to reveal]", "signals": ["New payee not in approved vendor list"], }, "hidden_states": { "A": { "prob": 0.50, "truth": "fraud", "reveal": "[KYC SCAN] Beneficiary: AlphaCorp Ltd (registered 6 days ago, Malta) | Bank: Satabank (high-risk jurisdiction) | Operations manager email compromised 3 days ago per IT alert", "step0_rewards": {"flag_fraud": 0.40, "request_verification": 0.30, "reject_transfer": 0.20, "approve_transfer": -0.99}, "is_fraudulent": True, }, "B": { "prob": 0.50, "truth": "legitimate", "reveal": "[KYC SCAN] Beneficiary: AlphaCorp Ltd (established 2019, Malta) | Bank: BOV (Bank of Valletta, tier-1) | New vendor onboarded last week — PO#44821 issued by CFO", "step0_rewards": {"request_verification": 0.35, "approve_transfer": 0.30, "flag_fraud": -0.35, "reject_transfer": -0.15}, "is_fraudulent": False, }, }, }, { "id": "fr_H003", "ambiguity": 0.90, "visible": { "request_type": "urgent_wire", "amount_usd": 125000, "requestor": "CEO (email request)", "destination": "[LOCKED — investigate to reveal]", "signals": ["Marked urgent by executive", "Requests bypass of dual-control"], }, "hidden_states": { "A": { "prob": 0.65, "truth": "bec_fraud", "reveal": "[KYC SCAN] Email domain: ceo@company-corp.io (NOT company.com) | Lookalike domain: 2 days old | CEO confirmed via phone: did NOT send this request | Classic BEC attack", "step0_rewards": {"flag_fraud": 0.45, "escalate_compliance": 0.30, "reject_transfer": 0.25, "approve_transfer": -0.99}, "is_fraudulent": True, }, "B": { "prob": 0.35, "truth": "legitimate_urgent", "reveal": "[KYC SCAN] CFO confirmed via Slack | Wire authorized: board pre-approval #BOA-2024-112 | Beneficiary: existing acquisition target verified by legal | CEO traveling: using assistant's email", "step0_rewards": {"escalate_compliance": 0.35, "request_verification": 0.30, "approve_transfer": 0.20, "flag_fraud": -0.35, "reject_transfer": -0.20}, "is_fraudulent": False, }, }, }, { "id": "fr_H004", "ambiguity": 0.82, "visible": { "request_type": "vendor_payment", "amount_usd": 32000, "requestor": "AP clerk (3 weeks tenure)", "destination": "[LOCKED — investigate to reveal]", "signals": ["Requestor payment authority limit: $5,000"], }, "hidden_states": { "A": { "prob": 0.60, "truth": "fraud", "reveal": "[KYC SCAN] Vendor bank account IBAN changed 48 hours ago | Acme Supplies Ltd confirmed: did NOT change banking details | Clerk's login IP: proxy (mismatch from usual) | Social engineering suspected", "step0_rewards": {"flag_fraud": 0.40, "request_verification": 0.30, "reject_transfer": 0.20, "approve_transfer": -0.99}, "is_fraudulent": True, }, "B": { "prob": 0.40, "truth": "legitimate", "reveal": "[KYC SCAN] IBAN change confirmed by Acme CFO via signed letter (on file) | Clerk processed with manager pre-approval email chain | PO#83021 matches ERP | Amount within contract", "step0_rewards": {"request_verification": 0.35, "approve_transfer": 0.30, "flag_fraud": -0.30, "reject_transfer": -0.10}, "is_fraudulent": False, }, }, }, { "id": "fr_H005", "ambiguity": 0.78, "visible": { "request_type": "expense_reimbursement", "amount_usd": 12400, "requestor": "senior sales director", "destination": "[LOCKED — investigate to reveal]", "signals": ["3.1x above department average reimbursement"], }, "hidden_states": { "A": { "prob": 0.45, "truth": "inflated_fraud", "reveal": "[KYC SCAN] Receipts: 47% missing or duplicated | $4,200 'client dinner' receipt: restaurant closed that day | Director on PIP for expense policy violations | HR flagged 2 prior incidents", "step0_rewards": {"flag_fraud": 0.40, "hold_for_review": 0.30, "escalate_compliance": 0.25, "approve_transfer": -0.70}, "is_fraudulent": True, }, "B": { "prob": 0.55, "truth": "legitimate", "reveal": "[KYC SCAN] Receipts: all verified | Quarter-end client entertainment | Director closed $2.1M deal this quarter | VP pre-approved via email | Amount matches policy for deal size", "step0_rewards": {"approve_transfer": 0.40, "request_verification": 0.20, "hold_for_review": -0.10, "flag_fraud": -0.40}, "is_fraudulent": False, }, }, }, # ═══════════════════════════════════════════════════════════════════════════ # MEDIUM AMBIGUITY (0.40–0.65): INVESTIGATION HELPFUL # ═══════════════════════════════════════════════════════════════════════════ { "id": "fr_M001", "ambiguity": 0.60, "visible": { "request_type": "payroll_disbursement", "amount_usd": 258000, "requestor": "CFO via enterprise portal", "destination": "[LOCKED — investigate to reveal]", "signals": ["6.2% higher than previous cycle"], }, "hidden_states": { "A": { "prob": 0.75, "truth": "legitimate", "reveal": "[KYC SCAN] CFO 2FA: PASSED (biometric) | Amount variance: 2 new hires + annual raise cycle | Destination: verified payroll account (8 years) | Matches HR headcount report", "step0_rewards": {"approve_transfer": 0.40, "request_verification": 0.25, "flag_fraud": -0.35, "reject_transfer": -0.20}, "is_fraudulent": False, }, "B": { "prob": 0.25, "truth": "manipulation", "reveal": "[KYC SCAN] CFO account: 2FA device replaced 2 days ago | Destination account: 18% of funds routing to NEW split account added last week | HR headcount: unchanged", "step0_rewards": {"flag_fraud": 0.40, "request_verification": 0.30, "hold_for_review": 0.20, "approve_transfer": -0.80}, "is_fraudulent": True, }, }, }, { "id": "fr_M002", "ambiguity": 0.55, "visible": { "request_type": "supplier_payment", "amount_usd": 18750, "requestor": "procurement department", "destination": "[LOCKED — investigate to reveal]", "signals": ["Existing supplier relationship"], }, "hidden_states": { "A": { "prob": 0.70, "truth": "legitimate", "reveal": "[KYC SCAN] Supplier: Bostwick Logistics (5-year relationship) | Bank details: unchanged from last 23 payments | PO#91204 matches | CFO counter-signed", "step0_rewards": {"approve_transfer": 0.40, "request_verification": 0.15, "flag_fraud": -0.25, "reject_transfer": -0.15}, "is_fraudulent": False, }, "B": { "prob": 0.30, "truth": "fraud", "reveal": "[KYC SCAN] Supplier bank account: IBAN changed 5 days ago | Supplier contacted: denies changing account | Change request came via spoofed email from supplier domain | Classic supplier fraud", "step0_rewards": {"flag_fraud": 0.40, "request_verification": 0.30, "hold_for_review": 0.20, "approve_transfer": -0.90}, "is_fraudulent": True, }, }, }, { "id": "fr_M003", "ambiguity": 0.48, "visible": { "request_type": "consulting_invoice", "amount_usd": 9500, "requestor": "marketing department", "destination": "[LOCKED — investigate to reveal]", "signals": ["First payment to this consultant"], }, "hidden_states": { "A": { "prob": 0.65, "truth": "legitimate", "reveal": "[KYC SCAN] Freelancer: Sarah Chen (LinkedIn verified, 847 reviews) | Contract signed via DocuSign | SOW matches deliverables | Tax form: W-9 on file", "step0_rewards": {"approve_transfer": 0.40, "request_verification": 0.20, "flag_fraud": -0.20, "reject_transfer": -0.10}, "is_fraudulent": False, }, "B": { "prob": 0.35, "truth": "ghost_vendor", "reveal": "[KYC SCAN] Freelancer profile: created 14 days ago | No prior work history | SSN matches deceased individual | Marketing manager is connected on LinkedIn | Ghost vendor fraud", "step0_rewards": {"flag_fraud": 0.40, "escalate_compliance": 0.30, "request_verification": 0.20, "approve_transfer": -0.75}, "is_fraudulent": True, }, }, }, # ═══════════════════════════════════════════════════════════════════════════ # LOW AMBIGUITY (0.05–0.20): SIGNALS ARE CLEAR — ACT WITHOUT INVESTIGATING # ═══════════════════════════════════════════════════════════════════════════ { "id": "fr_L001", "ambiguity": 0.10, "visible": { "request_type": "wire_transfer", "amount_usd": 247000, "requestor": "CFO via enterprise portal", "destination": "US domestic payroll account #XXXX-4821 (verified 6 years)", # VISIBLE "signals": ["Matches bi-weekly payroll exactly", "CFO 2FA: PASSED", "Amount matches HR headcount report"], }, "hidden_states": { "A": { "prob": 1.0, "truth": "legitimate", "reveal": "[KYC SCAN] All signals verified | Payroll account: 6 years stable | 2FA biometric confirmed | Amount matches to the cent | Matches automated payroll run #PR-2024-089", "step0_rewards": {"approve_transfer": 0.45, "request_verification": 0.10, "flag_fraud": -0.50, "reject_transfer": -0.40}, "is_fraudulent": False, }, }, }, { "id": "fr_L002", "ambiguity": 0.08, "visible": { "request_type": "wire_transfer", "amount_usd": 500000, "requestor": "unknown (no authentication)", "destination": "Account in sanctioned country (OFAC list)", # VISIBLE "signals": ["No 2FA", "Requestor not in employee directory", "OFAC match: sanctioned entity", "Submitted via public API without auth"], }, "hidden_states": { "A": { "prob": 1.0, "truth": "fraud", "reveal": "[KYC SCAN] OFAC positive match: sanctioned entity | Submission IP: known fraud ring (Interpol DB) | Zero authentication | Regulatory violation if processed — instant license revocation", "step0_rewards": {"flag_fraud": 0.50, "reject_transfer": 0.40, "escalate_compliance": 0.35, "approve_transfer": -0.99}, "is_fraudulent": True, }, }, }, ] # ────────────────────────────────────────────────────────────────────────────── # ACTION SETS # ────────────────────────────────────────────────────────────────────────────── _S0_BASE = ["request_verification", "flag_fraud", "approve_transfer", "reject_transfer", "hold_for_review", "escalate_compliance"] _S0_WITH_INVEST = ["investigate"] + _S0_BASE _S1 = ["approve_after_verification", "reject_failed_verification", "escalate_compliance"] _S2 = ["proceed_low_risk", "proceed_high_risk_override", "hold_for_review"] _S3 = ["log_standard", "log_suspicious_activity", "log_fraud_report"] _S4 = ["close_completed", "close_pending_review"] def _pick_hidden_state(scenario: dict, seed: Optional[int], ep: int) -> str: states = scenario["hidden_states"] if len(states) == 1: return list(states.keys())[0] key = f"{scenario['id']}_ep{ep}_seed{seed if seed is not None else 'none'}" h = int(hashlib.md5(key.encode()).hexdigest(), 16) r = (h % 10_000) / 10_000.0 cumulative = 0.0 for k, v in states.items(): cumulative += v["prob"] if r < cumulative: return k return list(states.keys())[-1] class FinancialRequestTask(BaseTask): task_id = "financial_request" max_steps = 5 # assess → secondary → classify → log → close (INVESTIGATE does not consume a step) def __init__(self): self._ep = -1 self._seed: Optional[int] = None self._scenario: dict = {} self._active_state_key: str = "A" self._active_state: dict = {} self._step = 0 self._api_calls = 0 self._history: list = [] self._done = False self._investigated = False self._decisions: list = [] def reset(self, seed: Optional[int] = None): self._ep += 1 self._seed = seed if seed is not None: random.seed(seed) self._scenario = _SCENARIO_CLASSES[self._ep % len(_SCENARIO_CLASSES)] self._active_state_key = _pick_hidden_state(self._scenario, seed, self._ep) self._active_state = self._scenario["hidden_states"][self._active_state_key] self._step = 0 self._api_calls = 0 self._history = [] self._done = False self._investigated = False self._decisions = [] return self._obs() def step(self, action: Action): if self._done: raise RuntimeError("Episode done. Call reset().") t = action.type self._api_calls += 1 # ── INVESTIGATE: reveals KYC/AML data, does NOT advance _step ──────── if t == "investigate": if self._step != 0: return self._obs(), Reward(value=0.01, breakdown={"error": "investigate_invalid_step"}, raw=0.01), False, {} self._investigated = True r = investigation_reward(self._scenario["ambiguity"]) self._history.append({ "api_call": self._api_calls, "step": self._step, "action": "investigate", "reward": {"value": r, "breakdown": {"investigation": r}}, "reveal": self._active_state["reveal"], }) return self._obs(), Reward(value=r, breakdown={"investigation": r}, raw=r), False, { "info": "KYC/AML scan complete. Make your initial assessment.", "reveal": self._active_state["reveal"], } # ── STEP 0: Initial Assessment ─────────────────────────────────────── if self._step == 0: if t not in _S0_BASE: t = "request_verification" base_r = self._active_state["step0_rewards"].get(t, -0.10) correct = ( (t in ["flag_fraud", "reject_transfer", "escalate_compliance", "hold_for_review"] and self._active_state["is_fraudulent"]) or (t in ["approve_transfer", "request_verification"] and not self._active_state["is_fraudulent"]) ) # RECKLESS PENALTY: blind approve on fraudulent high-ambiguity scenario if (t == "approve_transfer" and self._active_state["is_fraudulent"] and not self._investigated and self._scenario["ambiguity"] >= 0.60): base_r = min(base_r, -0.80) cal_r = calibration_reward(correct, self._scenario["ambiguity"], self._investigated) rval = max(0.01, min(0.99, (base_r * 0.30) + (cal_r * 0.70))) breakdown = { "decision_base": base_r, "calibration_reward": cal_r, "investigated": self._investigated, "ambiguity": self._scenario["ambiguity"], "is_fraudulent": self._active_state["is_fraudulent"], } self._decisions.append(t) self._history.append({ "api_call": self._api_calls, "step": self._step, "action": t, "correct": correct, "investigated": self._investigated, "reward": {"value": round(rval, 4), "breakdown": breakdown}, "autonomy": _AUTONOMY_MAP.get(t), }) self._step += 1 if self._step >= self.max_steps: self._done = True reward = Reward(value=round(rval, 4), breakdown=breakdown, raw=rval) return self._obs(), reward, self._done, { "step": 0, "action": t, "correct": correct, "investigated": self._investigated, "hidden_truth": self._active_state["truth"] if self._investigated else "LOCKED", "autonomy": _AUTONOMY_MAP.get(t), "episode_score": self.grade_episode(self._history) if self._done else None, } # ── STEPS 1–4: Secondary decisions (simpler, ambiguity fades) ──────── step_configs = { 1: (_S1, [("approve_after_verification", 0.25), ("reject_failed_verification", 0.20), ("escalate_compliance", 0.15)], 0.40), 2: (_S2, [("hold_for_review", 0.20), ("proceed_low_risk", 0.15), ("proceed_high_risk_override", 0.10)], 0.30), 3: (_S3, [("log_fraud_report", 0.15), ("log_suspicious_activity", 0.12), ("log_standard", 0.10)], 0.35), 4: (_S4, [("close_pending_review", 0.15), ("close_completed", 0.12)], 0.25), } if self._step in step_configs: valid_actions, reward_map, default_r = step_configs[self._step] if t not in valid_actions: t = valid_actions[0] rval = next((r for a, r in reward_map if a == t), default_r * 0.5) breakdown = {f"step{self._step}": rval} if self._step == 4: self._done = True self._decisions.append(t) self._history.append({ "api_call": self._api_calls, "step": self._step, "action": t, "reward": {"value": rval, "breakdown": breakdown}, "autonomy": _AUTONOMY_MAP.get(t), }) self._step += 1 if self._step >= self.max_steps: self._done = True reward = Reward(value=round(rval, 4), breakdown=breakdown, raw=rval) return self._obs(), reward, self._done, { "step": self._step - 1, "action": t, "episode_score": self.grade_episode(self._history) if self._done else None, } return self._obs(), Reward(value=0.01, breakdown={}, raw=0.01), True, {} def state(self): return { "task_id": self.task_id, "step": self._step, "done": self._done, "scenario_id": self._scenario.get("id", ""), "ambiguity": self._scenario.get("ambiguity", 0.0), "investigated": self._investigated, "hidden_truth": self._active_state.get("truth", "LOCKED") if self._investigated else "LOCKED", "decisions": list(self._decisions), } def grade_episode(self, history): total = sum(e.get("reward", {}).get("value", 0.0) for e in history) return clamp(total / 1.0) def _obs(self) -> Observation: s = self._scenario v = s["visible"] step_actions = { 0: _S0_WITH_INVEST if not self._investigated else _S0_BASE, 1: _S1, 2: _S2, 3: _S3, 4: _S4, } avail = step_actions.get(self._step, []) if not self._done else [] if self._step == 0: # Show destination only if investigated OR naturally visible (low ambiguity) if self._investigated: dest_display = self._active_state["reveal"] kyc_info = f"\n[KYC UNLOCKED]: {self._active_state['reveal']}" elif s["ambiguity"] < 0.30: # Destination always visible on obvious cases dest_display = v["destination"] kyc_info = "\n[SIGNALS CLEAR] — You may act directly based on visible signals" else: dest_display = v["destination"] # shows "[LOCKED]" kyc_info = "\n[KYC LOCKED] — Use 'investigate' to run KYC/AML scan and reveal beneficiary details" ambiguity = s["ambiguity"] if ambiguity >= 0.70: confidence = "⚠️ LOW — Beneficiary not verified. Investigation strongly recommended." elif ambiguity >= 0.40: confidence = "⚡ MEDIUM — Partial signals present. Investigation recommended." else: confidence = "✅ HIGH — Signals are clear. You may act directly." prompt = ( f"Request Type: {v['request_type']} | Amount: ${v['amount_usd']:,}\n" f"Requestor: {v['requestor']}\n" f"Destination: {dest_display}\n" f"Risk Signals: {v['signals']}\n" f"Signal Confidence: {confidence}" f"{kyc_info}\n\n" f"Initial assessment actions: {avail}" ) state_dict = { "request": { "type": v["request_type"], "amount_usd": v["amount_usd"], "requestor": v["requestor"], "destination": dest_display, "signals": v["signals"], }, "kyc_status": "UNLOCKED" if self._investigated else "LOCKED", "ambiguity": s["ambiguity"], "confidence_hint": confidence, "decisions_so_far": list(self._decisions), "investigated": self._investigated, } else: ctx = ( f"Request: {v['request_type']} ${v['amount_usd']:,} | " f"Decisions so far: {self._decisions}" ) prompts = { 1: f"{ctx}\n\nStep 1 — Secondary review. Available: {avail}", 2: f"{ctx}\n\nStep 2 — Risk classification. Available: {avail}", 3: f"{ctx}\n\nStep 3 — Activity logging. Available: {avail}", 4: f"{ctx}\n\nStep 4 — Case closure. Available: {avail}", } prompt = prompts.get(self._step, f"{ctx}\n\nAvailable: {avail}") state_dict = { "request": {"type": v["request_type"], "amount_usd": v["amount_usd"]}, "decisions_so_far": list(self._decisions), "step": self._step, } return Observation( task_id=self.task_id, step=self._step, state=state_dict, history=list(self._history), available_actions=avail, done=self._done, prompt=prompt, context=prompt, task=self.task_id, action_to_evaluate="Evaluating agent response...", )