claims-env / models.py
pramodmisra's picture
Fix reward serialization - add reward field to ClaimsObservation
a7386cd
"""
Insurance Claims Processing Environment - Data Models
This module defines the Action, Observation, and State models for the
insurance claims processing environment. Designed for OpenEnv hackathon
Statement 3.1: Professional Tasks + Scaler AI Labs sub-theme.
Uses Pydantic models to match OpenEnv base classes.
"""
from typing import List, Dict, Optional, Any
from pydantic import Field
from openenv.core import Action, Observation, State
class ClaimsAction(Action):
"""
Agent actions for insurance claims processing.
The agent can perform various actions to gather information and make decisions.
Each action simulates real enterprise workflow steps.
"""
action_type: str = Field(description="One of the valid action types")
claim_id: str = Field(default="", description="Claim ID being processed")
parameters: Dict[str, Any] = Field(default_factory=dict, description="Action-specific parameters")
# Valid action types:
# - "query_policy": Look up policy details (coverage, limits, status)
# - "query_claim_history": Check claimant's past claims
# - "check_fraud": Run fraud detection analysis
# - "request_documents": Request supporting documents (photos, reports)
# - "verify_coverage": Check if damage type is covered
# - "calculate_payout": Calculate the payout amount
# - "approve": Approve the claim with specified amount
# - "deny": Deny the claim with reason
# - "escalate": Escalate to senior adjuster
class ClaimsObservation(Observation):
"""
What the agent observes after each action.
Implements partial observability - agent doesn't see everything at once.
Must query systems to reveal information progressively.
"""
# Current claim summary
claim_id: str = Field(default="", description="Claim identifier")
claim_type: str = Field(default="", description="Type of claim")
claim_amount_requested: float = Field(default=0.0, description="Amount claimed")
claimant_name: str = Field(default="", description="Name of claimant")
incident_date: str = Field(default="", description="Date of incident")
description: str = Field(default="", description="Claim description")
# System response from last action
system_response: str = Field(default="", description="Response from last action")
action_success: bool = Field(default=True, description="Whether last action succeeded")
# Revealed information (grows as agent queries)
revealed_info: Dict[str, Any] = Field(default_factory=dict, description="Information revealed so far")
# Available actions based on current state
available_actions: List[str] = Field(default_factory=list, description="Valid next actions")
# Processing metrics
time_elapsed_minutes: int = Field(default=0, description="Simulated processing time")
queries_made: int = Field(default=0, description="Number of queries made")
# Terminal state info
is_terminal: bool = Field(default=False, description="Whether episode is done")
terminal_reason: str = Field(default="", description="Why episode ended")
# Reward (required for OpenEnv serialization)
reward: float = Field(default=0.0, description="Reward from this step")
class ClaimsState(State):
"""
Full episode state (includes hidden ground truth for reward calculation).
This state is used server-side for reward computation.
The agent only sees ClaimsObservation.
"""
# Visible state
claim_id: str = Field(default="", description="Claim identifier")
claim_type: str = Field(default="", description="Type of claim")
claim_amount_requested: float = Field(default=0.0, description="Amount claimed")
# Hidden ground truth (agent must discover through queries)
true_verdict: str = Field(default="", description="Correct decision")
correct_payout: float = Field(default=0.0, description="Correct payout amount")
is_fraud: bool = Field(default=False, description="Whether claim is fraudulent")
fraud_type: Optional[str] = Field(default=None, description="Type of fraud if applicable")
# Policy details (hidden until queried)
policy_coverage_limit: float = Field(default=0.0, description="Max coverage amount")
policy_deductible: float = Field(default=0.0, description="Deductible amount")
policy_status: str = Field(default="", description="Policy status")
coverage_exclusions: List[str] = Field(default_factory=list, description="Excluded coverage types")
# Claim complexity
complexity: str = Field(default="standard", description="Claim complexity level")
requires_documents: List[str] = Field(default_factory=list, description="Required documents")
requires_escalation: bool = Field(default=False, description="Whether escalation is needed")
# Episode tracking
actions_taken: int = Field(default=0, description="Number of actions taken")
queries_made: int = Field(default=0, description="Number of queries made")
time_elapsed_minutes: int = Field(default=0, description="Simulated time elapsed")
# Revealed information tracker
policy_queried: bool = Field(default=False, description="Policy info queried")
history_queried: bool = Field(default=False, description="Claim history queried")
fraud_checked: bool = Field(default=False, description="Fraud check done")
documents_requested: bool = Field(default=False, description="Documents requested")
coverage_verified: bool = Field(default=False, description="Coverage verified")
payout_calculated: bool = Field(default=False, description="Payout calculated")
# Final decision tracking
agent_decision: str = Field(default="", description="Agent's decision")
agent_payout: float = Field(default=0.0, description="Agent's payout amount")
decision_reason: str = Field(default="", description="Reason for decision")
# Reward components (for analysis)
correctness_reward: float = Field(default=0.0, description="Reward for correct decision")
efficiency_reward: float = Field(default=0.0, description="Reward for efficiency")
fraud_detection_reward: float = Field(default=0.0, description="Reward for fraud detection")
total_reward: float = Field(default=0.0, description="Total episode reward")