disaster-relief-env / models.py
rajx77's picture
Hackathon Finale: Command Center UI, Groq Print Export, and Security Pass
af37c1f
"""
models.py β€” Pydantic contracts for the Disaster Relief OpenEnv environment.
Owner: Krish Potanwar
FROZEN after first push. Do not rename fields without team sync.
"""
from __future__ import annotations
from typing import Optional, Literal
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Zone-level models
# ---------------------------------------------------------------------------
class ZoneState(BaseModel):
"""Full internal state of one zone. Returned by state() only β€” NOT observation."""
zone_id: str
casualties_total: int
casualties_rescued: int = 0
casualties_critical: int # HIDDEN from agent β€” only severity exposed
critical_deadline: int # step by which critical casualties expire
supply_needed: int
supply_received: int = 0
supply_wasted: int = 0
road_blocked: bool = False
severity: float = Field(ge=0.0, le=1.0) # computed each step
teams_present: int = 0
is_false_sos: bool = False # HIDDEN from agent
completed: bool = False
@property
def casualties_remaining(self) -> int:
return self.casualties_total - self.casualties_rescued
@property
def supply_gap(self) -> int:
return max(0, self.supply_needed - self.supply_received)
class ZoneObs(BaseModel):
"""Filtered zone view exposed to the agent. No hidden fields."""
zone_id: str
casualties_remaining: int
supply_gap: int
severity: float = Field(ge=0.0, le=1.0)
road_blocked: bool
teams_present: int
sos_active: bool # True for real AND false SOS zones β€” agent can't tell
# ---------------------------------------------------------------------------
# Observation model β€” what the agent sees each step
# ---------------------------------------------------------------------------
class ResourcesObs(BaseModel):
teams_available: int
supply_stock: int
airlifts_remaining: int
teams_in_transit: dict[str, int] # {"zone_id": count} β€” returning teams, not yet at HQ
class ObservationModel(BaseModel):
zones: list[ZoneObs]
resources: ResourcesObs
step_number: int
steps_remaining: int
weather: Literal["clear", "storm", "flood"]
last_action_result: Literal["success", "invalid", "blocked", "insufficient_resources", "none"]
# ---------------------------------------------------------------------------
# Action model β€” what the agent sends
# ---------------------------------------------------------------------------
class ActionModel(BaseModel):
action: Literal["deploy_team", "send_supplies", "airlift", "recall_team", "wait"]
to_zone: Optional[str] = None
from_zone: Optional[str] = None
units: Optional[int] = Field(default=None, ge=1)
type: Optional[Literal["rescue", "supply"]] = None # for airlift only
# ---------------------------------------------------------------------------
# Reward model β€” structured breakdown returned in info dict
# ---------------------------------------------------------------------------
class RewardBreakdown(BaseModel):
r_rescue: float = 0.0
r_supply: float = 0.0
r_zone_complete: float = 0.0
r_critical_rescue: float = 0.0
r_airlift_precision: float = 0.0
p_critical_deaths: float = 0.0
p_urgency_decay: float = 0.0
p_overcommitment: float = 0.0
p_supply_waste: float = 0.0
p_false_sos: float = 0.0
p_wait: float = 0.0
total: float = 0.0 # clamped to [βˆ’1.0, 1.0]
# ---------------------------------------------------------------------------
# Step result β€” returned from environment.step()
# ---------------------------------------------------------------------------
class StepResult(BaseModel):
observation: ObservationModel
reward: float # clamped step reward
done: bool
info: dict # includes reward_breakdown, event_log tail