Spaces:
Paused
Paused
File size: 11,583 Bytes
d727210 279779a d727210 622e841 d727210 622e841 d727210 9c003f0 d727210 279779a d727210 9c003f0 d727210 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Action space
# ---------------------------------------------------------------------------
class PayOpsAction(BaseModel):
"""
Action submitted by the agent for a single transaction.
action_type choices
-------------------
approve – mark transaction as legitimate and allow it through
reject – block the transaction outright
flag – mark for manual review with a soft hold
escalate – route to senior compliance officer / fraud team
inspect – pull additional signals (logs, KYC data, velocity)
hold – temporary hold pending more information
request_docs – ask sender for supporting documents (e.g. invoice, contract)
verify_kyc – trigger an active KYC re-verification check
contact_sender – contact the sender directly to confirm intent
file_sar – file a Suspicious Activity Report to regulator
"""
action_type: str = Field(
...,
description=(
"One of: approve | reject | flag | escalate | inspect | hold "
"| request_docs | verify_kyc | contact_sender | file_sar"
),
)
transaction_id: str = Field(..., description="ID of the transaction being acted on")
reason: Optional[str] = Field(
default=None, description="Free-text rationale from the agent"
)
confidence: Optional[float] = Field(
default=None,
ge=0.0,
le=1.0,
description="Agent self-reported confidence [0, 1]. Used in reward shaping.",
)
metadata: Optional[Dict[str, Any]] = Field(
default=None,
description="Optional pass-through metadata (openenv.core.Action compatibility).",
)
# ---------------------------------------------------------------------------
# Observation space
# ---------------------------------------------------------------------------
class PayOpsObservation(BaseModel):
"""
Structured observation returned after each step (and on reset).
Describes the current transaction visible to the agent.
"""
# --- transaction identity ---
transaction_id: str
amount: float = Field(..., description="Transaction amount in the stated currency")
currency: str = Field(..., description="ISO-4217 currency code, e.g. USD, EUR")
sender: str = Field(..., description="Sender identifier (email / account / alias)")
receiver: str = Field(..., description="Receiver identifier")
transaction_type: str = Field(
default="transfer",
description="Type: transfer | payment | withdrawal | refund | internal | loan_repayment | payroll",
)
# --- risk signals ---
status: str = Field(
default="pending",
description=(
"Current status: pending | approved | rejected | flagged | escalated "
"| held | inspected | docs_requested | kyc_triggered | sender_contacted | sar_filed"
),
)
risk_score: float = Field(
..., ge=0.0, le=1.0, description="Composite ML risk score [0=low, 1=high]"
)
ml_confidence: float = Field(
default=0.9,
ge=0.0,
le=1.0,
description="Model's self-reported confidence in its own risk_score. Low = possibly poisoned.",
)
flags: List[str] = Field(
default_factory=list,
description="Active risk flags e.g. high_value, unknown_sender, velocity_breach",
)
# --- sender behaviour signals ---
velocity_1h: Optional[int] = Field(
default=None,
description="Number of transactions from this sender in the past hour",
)
velocity_24h: Optional[int] = Field(
default=None,
description="Number of transactions from this sender in the past 24 hours",
)
avg_transaction_amount: Optional[float] = Field(
default=None,
description="Sender's historical average transaction amount",
)
account_age_days: Optional[int] = Field(
default=None,
description="Age of the sender account in days",
)
# --- counterparty / geography ---
country_risk: Optional[str] = Field(
default=None,
description="Receiver country risk tier: low | medium | high | sanctioned",
)
kyc_status: Optional[str] = Field(
default=None,
description="KYC verification status: verified | pending | failed | none | expired",
)
kyc_expiry_days: Optional[int] = Field(
default=None,
description="Days until KYC expires (negative = already expired)",
)
previous_violations: Optional[int] = Field(
default=None,
description="Number of prior compliance violations for this sender",
)
previous_sars: Optional[int] = Field(
default=None,
description="Number of Suspicious Activity Reports previously filed for this sender",
)
counterparty_risk: Optional[str] = Field(
default=None,
description="Known risk profile of the receiver: clean | unknown | watchlist | blacklist",
)
# --- chain context (multi-hop investigation) ---
chain_step: int = Field(
default=1,
description="Which step within a multi-hop investigation chain (1=initial presentation)",
)
chain_total: int = Field(
default=1,
description="Total number of chained investigation steps for this task",
)
chain_context: Optional[str] = Field(
default=None,
description="Summary of findings from earlier chain steps",
)
# --- resource tracking ---
steps_remaining: Optional[int] = Field(
default=None,
description="How many investigation sub-steps remain before a terminal decision is required",
)
action_cost: float = Field(
default=0.0,
description="Operational cost penalty incurred by the last action",
)
budget_remaining: float = Field(
default=5.0,
description="Remaining investigation budget (starts at 5.0; each investigation action deducts its cost)",
)
# --- context from prior investigation actions ---
inspection_notes: Optional[str] = Field(
default=None,
description="Additional details revealed after an 'inspect' action",
)
docs_notes: Optional[str] = Field(
default=None,
description="Document review findings after a 'request_docs' action",
)
kyc_notes: Optional[str] = Field(
default=None,
description="KYC re-verification outcome after a 'verify_kyc' action",
)
contact_notes: Optional[str] = Field(
default=None,
description="Outcome of contacting the sender via 'contact_sender' action",
)
# --- recommended investigation sub-actions for this task ---
investigation_hints: List[str] = Field(
default_factory=list,
description=(
"Sub-actions recommended for this task (non-exhaustive). "
"Using them before the terminal decision earns bonus reward and may reveal "
"decisive evidence. Empty list = no specific investigation required."
),
)
# --- recent decision context (last 3 decisions in this episode) ---
recent_decisions: List[Dict[str, Any]] = Field(
default_factory=list,
description="Last up to 3 completed decisions in this episode for pattern context",
)
# --- episode bookkeeping ---
task_id: str = Field(default="", description="Identifier of the active task")
task_difficulty: str = Field(
default="easy", description="Difficulty tier: easy | medium | hard | critical"
)
step_in_episode: int = Field(
default=0, description="How many steps have elapsed in this episode"
)
reward: float = Field(default=0.0, description="Reward from the last action")
reward_breakdown: Dict[str, float] = Field(
default_factory=dict,
description="Itemised reward components: base, time_penalty, confidence_bonus, cost_penalty",
)
cumulative_reward: float = Field(
default=0.0, description="Total reward accumulated so far in this episode"
)
done: bool = Field(default=False, description="Whether the episode has ended")
network_graph: Optional[Dict[str, Any]] = Field(
default=None,
description="Mule-chain / correspondent-bank relationship graph for tasks where present",
)
info: Dict[str, Any] = Field(
default_factory=dict,
description="Extra diagnostic information (action taken, correct action, etc.)",
)
metadata: Optional[Dict[str, Any]] = Field(
default=None,
description="Optional pass-through metadata (openenv.core.Observation compatibility).",
)
# ---------------------------------------------------------------------------
# Reward breakdown (typed model for openenv.core spec compliance)
# ---------------------------------------------------------------------------
class PayOpsReward(BaseModel):
"""
Typed reward model returned alongside each observation.
``value`` is the normalised reward \u2208 [0.0, 1.0] for the episode so far.
``breakdown`` itemises the components that contributed to the raw score.
"""
value: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="Normalised episode reward \u2208 [0.0, 1.0]",
)
breakdown: Dict[str, float] = Field(
default_factory=dict,
description=(
"Per-component reward breakdown: terminal_correct, investigation_bonus, "
"flag_identification_bonus, confidence_bonus, duplicate_penalty, budget_penalty"
),
)
raw_total: float = Field(
default=0.0,
description="Raw (un-normalised) sum of reward components before clamping",
)
max_possible: float = Field(
default=1.0,
description="Maximum achievable raw reward for this episode",
)
# ---------------------------------------------------------------------------
# Internal state (used by the server's /state endpoint)
# ---------------------------------------------------------------------------
class PayOpsState(BaseModel):
episode_id: Optional[str] = None
step_count: int = 0
current_task_id: str = ""
transactions_processed: int = 0
total_tasks: int = 0
cumulative_reward: float = 0.0
budget_spent: float = Field(default=0.0, description="Total action costs accumulated")
budget_limit: float = Field(default=5.0, description="Max investigation budget per episode")
actions_taken: List[str] = Field(default_factory=list)
last_action: Optional[str] = None
investigation_actions_used: List[str] = Field(
default_factory=list,
description="All investigation sub-actions used this episode (inspect, request_docs, etc.)",
)
correct_decisions: int = Field(default=0, description="Terminal decisions that matched ground truth")
wrong_high_cost: int = Field(
default=0, description="Count of approve-on-fraud type mistakes"
)
recent_decisions: List[Dict[str, Any]] = Field(
default_factory=list,
description="Recent completed task outcomes for analytics",
)
done: bool = False
episode_seed: Optional[int] = Field(
default=None,
description="Random seed used to jitter task parameters this episode (for reproducibility)",
)
|