from pydantic import BaseModel from typing import List, Dict, Optional class DeliveryRequest(BaseModel): id: int destination: int deadline: float # time units by which delivery must complete reward_value: float = 10.0 class Observation(BaseModel): current_location: int pending_deliveries: List[Dict] # list of DeliveryRequest dicts time: float fuel: float traffic: Dict # edge -> multiplier, e.g. {(0,1): 1.5} class Action(BaseModel): next_node: int class Reward(BaseModel): value: float class StepResult(BaseModel): observation: Observation reward: Reward done: bool info: Dict