payops_env

Paused

File size: 11,583 Bytes

from __future__ import annotations

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# Action space
# ---------------------------------------------------------------------------

class PayOpsAction(BaseModel):
    """
    Action submitted by the agent for a single transaction.

    action_type choices
    -------------------
    approve        – mark transaction as legitimate and allow it through
    reject         – block the transaction outright
    flag           – mark for manual review with a soft hold
    escalate       – route to senior compliance officer / fraud team
    inspect        – pull additional signals (logs, KYC data, velocity)
    hold           – temporary hold pending more information
    request_docs   – ask sender for supporting documents (e.g. invoice, contract)
    verify_kyc     – trigger an active KYC re-verification check
    contact_sender – contact the sender directly to confirm intent
    file_sar       – file a Suspicious Activity Report to regulator
    """

    action_type: str = Field(
        ...,
        description=(
            "One of: approve | reject | flag | escalate | inspect | hold "
            "| request_docs | verify_kyc | contact_sender | file_sar"
        ),
    )
    transaction_id: str = Field(..., description="ID of the transaction being acted on")
    reason: Optional[str] = Field(
        default=None, description="Free-text rationale from the agent"
    )
    confidence: Optional[float] = Field(
        default=None,
        ge=0.0,
        le=1.0,
        description="Agent self-reported confidence [0, 1]. Used in reward shaping.",
    )
    metadata: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Optional pass-through metadata (openenv.core.Action compatibility).",
    )


# ---------------------------------------------------------------------------
# Observation space
# ---------------------------------------------------------------------------

class PayOpsObservation(BaseModel):
    """
    Structured observation returned after each step (and on reset).
    Describes the current transaction visible to the agent.
    """

    # --- transaction identity ---
    transaction_id: str
    amount: float = Field(..., description="Transaction amount in the stated currency")
    currency: str = Field(..., description="ISO-4217 currency code, e.g. USD, EUR")
    sender: str = Field(..., description="Sender identifier (email / account / alias)")
    receiver: str = Field(..., description="Receiver identifier")
    transaction_type: str = Field(
        default="transfer",
        description="Type: transfer | payment | withdrawal | refund | internal | loan_repayment | payroll",
    )

    # --- risk signals ---
    status: str = Field(
        default="pending",
        description=(
            "Current status: pending | approved | rejected | flagged | escalated "
            "| held | inspected | docs_requested | kyc_triggered | sender_contacted | sar_filed"
        ),
    )
    risk_score: float = Field(
        ..., ge=0.0, le=1.0, description="Composite ML risk score [0=low, 1=high]"
    )
    ml_confidence: float = Field(
        default=0.9,
        ge=0.0,
        le=1.0,
        description="Model's self-reported confidence in its own risk_score. Low = possibly poisoned.",
    )
    flags: List[str] = Field(
        default_factory=list,
        description="Active risk flags e.g. high_value, unknown_sender, velocity_breach",
    )

    # --- sender behaviour signals ---
    velocity_1h: Optional[int] = Field(
        default=None,
        description="Number of transactions from this sender in the past hour",
    )
    velocity_24h: Optional[int] = Field(
        default=None,
        description="Number of transactions from this sender in the past 24 hours",
    )
    avg_transaction_amount: Optional[float] = Field(
        default=None,
        description="Sender's historical average transaction amount",
    )
    account_age_days: Optional[int] = Field(
        default=None,
        description="Age of the sender account in days",
    )

    # --- counterparty / geography ---
    country_risk: Optional[str] = Field(
        default=None,
        description="Receiver country risk tier: low | medium | high | sanctioned",
    )
    kyc_status: Optional[str] = Field(
        default=None,
        description="KYC verification status: verified | pending | failed | none | expired",
    )
    kyc_expiry_days: Optional[int] = Field(
        default=None,
        description="Days until KYC expires (negative = already expired)",
    )
    previous_violations: Optional[int] = Field(
        default=None,
        description="Number of prior compliance violations for this sender",
    )
    previous_sars: Optional[int] = Field(
        default=None,
        description="Number of Suspicious Activity Reports previously filed for this sender",
    )
    counterparty_risk: Optional[str] = Field(
        default=None,
        description="Known risk profile of the receiver: clean | unknown | watchlist | blacklist",
    )

    # --- chain context (multi-hop investigation) ---
    chain_step: int = Field(
        default=1,
        description="Which step within a multi-hop investigation chain (1=initial presentation)",
    )
    chain_total: int = Field(
        default=1,
        description="Total number of chained investigation steps for this task",
    )
    chain_context: Optional[str] = Field(
        default=None,
        description="Summary of findings from earlier chain steps",
    )

    # --- resource tracking ---
    steps_remaining: Optional[int] = Field(
        default=None,
        description="How many investigation sub-steps remain before a terminal decision is required",
    )
    action_cost: float = Field(
        default=0.0,
        description="Operational cost penalty incurred by the last action",
    )
    budget_remaining: float = Field(
        default=5.0,
        description="Remaining investigation budget (starts at 5.0; each investigation action deducts its cost)",
    )

    # --- context from prior investigation actions ---
    inspection_notes: Optional[str] = Field(
        default=None,
        description="Additional details revealed after an 'inspect' action",
    )
    docs_notes: Optional[str] = Field(
        default=None,
        description="Document review findings after a 'request_docs' action",
    )
    kyc_notes: Optional[str] = Field(
        default=None,
        description="KYC re-verification outcome after a 'verify_kyc' action",
    )
    contact_notes: Optional[str] = Field(
        default=None,
        description="Outcome of contacting the sender via 'contact_sender' action",
    )

    # --- recommended investigation sub-actions for this task ---
    investigation_hints: List[str] = Field(
        default_factory=list,
        description=(
            "Sub-actions recommended for this task (non-exhaustive). "
            "Using them before the terminal decision earns bonus reward and may reveal "
            "decisive evidence. Empty list = no specific investigation required."
        ),
    )

    # --- recent decision context (last 3 decisions in this episode) ---
    recent_decisions: List[Dict[str, Any]] = Field(
        default_factory=list,
        description="Last up to 3 completed decisions in this episode for pattern context",
    )

    # --- episode bookkeeping ---
    task_id: str = Field(default="", description="Identifier of the active task")
    task_difficulty: str = Field(
        default="easy", description="Difficulty tier: easy | medium | hard | critical"
    )
    step_in_episode: int = Field(
        default=0, description="How many steps have elapsed in this episode"
    )
    reward: float = Field(default=0.0, description="Reward from the last action")
    reward_breakdown: Dict[str, float] = Field(
        default_factory=dict,
        description="Itemised reward components: base, time_penalty, confidence_bonus, cost_penalty",
    )
    cumulative_reward: float = Field(
        default=0.0, description="Total reward accumulated so far in this episode"
    )
    done: bool = Field(default=False, description="Whether the episode has ended")
    network_graph: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Mule-chain / correspondent-bank relationship graph for tasks where present",
    )
    info: Dict[str, Any] = Field(
        default_factory=dict,
        description="Extra diagnostic information (action taken, correct action, etc.)",
    )
    metadata: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Optional pass-through metadata (openenv.core.Observation compatibility).",
    )


# ---------------------------------------------------------------------------
# Reward breakdown (typed model for openenv.core spec compliance)
# ---------------------------------------------------------------------------

class PayOpsReward(BaseModel):
    """
    Typed reward model returned alongside each observation.

    ``value`` is the normalised reward \u2208 [0.0, 1.0] for the episode so far.
    ``breakdown`` itemises the components that contributed to the raw score.
    """

    value: float = Field(
        default=0.0,
        ge=0.0,
        le=1.0,
        description="Normalised episode reward \u2208 [0.0, 1.0]",
    )
    breakdown: Dict[str, float] = Field(
        default_factory=dict,
        description=(
            "Per-component reward breakdown: terminal_correct, investigation_bonus, "
            "flag_identification_bonus, confidence_bonus, duplicate_penalty, budget_penalty"
        ),
    )
    raw_total: float = Field(
        default=0.0,
        description="Raw (un-normalised) sum of reward components before clamping",
    )
    max_possible: float = Field(
        default=1.0,
        description="Maximum achievable raw reward for this episode",
    )


# ---------------------------------------------------------------------------
# Internal state (used by the server's /state endpoint)
# ---------------------------------------------------------------------------

class PayOpsState(BaseModel):
    episode_id: Optional[str] = None
    step_count: int = 0
    current_task_id: str = ""
    transactions_processed: int = 0
    total_tasks: int = 0
    cumulative_reward: float = 0.0
    budget_spent: float = Field(default=0.0, description="Total action costs accumulated")
    budget_limit: float = Field(default=5.0, description="Max investigation budget per episode")
    actions_taken: List[str] = Field(default_factory=list)
    last_action: Optional[str] = None
    investigation_actions_used: List[str] = Field(
        default_factory=list,
        description="All investigation sub-actions used this episode (inspect, request_docs, etc.)",
    )
    correct_decisions: int = Field(default=0, description="Terminal decisions that matched ground truth")
    wrong_high_cost: int = Field(
        default=0, description="Count of approve-on-fraud type mistakes"
    )
    recent_decisions: List[Dict[str, Any]] = Field(
        default_factory=list,
        description="Recent completed task outcomes for analytics",
    )
    done: bool = False
    episode_seed: Optional[int] = Field(
        default=None,
        description="Random seed used to jitter task parameters this episode (for reproducibility)",
    )