import random, uuid from models import DeliveryObservation, DeliveryState TASKS = { "task_1": {"name":"Standard Delivery","difficulty":"easy","description":"Low-value package. Locker nearby, good weather.","scenario":{"otp_attempts":0.3,"recipient_available":0.0,"alternate_available":0.8,"package_value":0.2,"package_fragile":0.0,"time_remaining":0.9,"weather_risk":0.1,"location_safety":0.9,"locker_nearby":1.0}}, "task_2": {"name":"Fragile Package in Bad Weather","difficulty":"medium","description":"Fragile item, bad weather, no locker.","scenario":{"otp_attempts":0.5,"recipient_available":0.0,"alternate_available":0.6,"package_value":0.5,"package_fragile":1.0,"time_remaining":0.5,"weather_risk":0.8,"location_safety":0.4,"locker_nearby":0.0}}, "task_3": {"name":"High-Value Critical Delivery","difficulty":"hard","description":"High-value, unsafe area, time running out.","scenario":{"otp_attempts":0.8,"recipient_available":0.0,"alternate_available":0.2,"package_value":0.95,"package_fragile":1.0,"time_remaining":0.1,"weather_risk":0.7,"location_safety":0.2,"locker_nearby":0.0}} } VALID_ACTIONS=["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"] class DeliveryDecisionEnvironment: def __init__(self, task_id="task_1"): self.task_id = task_id self._state = DeliveryState(task_id=task_id) self._scene = {} self._score = 0.5 def reset(self): self._state = DeliveryState(episode_id=str(uuid.uuid4()), step_count=0, task_id=self.task_id, outcome="in_progress") self._scene = dict(TASKS[self.task_id]["scenario"]) self._score = 0.5 return self._obs(False, None, "Package arrived. Recipient unavailable.") def step(self, action_type): self._state.step_count += 1 act = action_type.lower().strip() if act not in VALID_ACTIONS: return self._obs(False, -1.0, "Invalid action.") outcome, terminal, reward, msg = self._resolve(act) self._state.outcome = outcome self._scene["time_remaining"] = max(0.0, self._scene["time_remaining"]-0.15) if act == "retry": self._scene["otp_attempts"] = min(1.0, self._scene["otp_attempts"]+0.25) elif act == "contact_support": self._scene["recipient_available"] = min(1.0, self._scene["recipient_available"]+0.3) if self._state.step_count >= 10 and not terminal: terminal=True; reward=-15.0 self._state.outcome="failed"; msg="Max steps." if terminal: self._score = self._calc(outcome) return self._obs(terminal, reward, msg) def grade(self): # ALWAYS strictly between 0 and 1 — never 0.0 or 1.0 return self._score @property def state(self): return self._state def _calc(self, outcome): """Score strictly between 0 and 1 — never exactly 0.0 or 1.0""" steps = self._state.step_count if outcome == "success": if steps <= 2: return 0.95 elif steps <= 4: return 0.85 elif steps <= 6: return 0.75 else: return 0.65 elif outcome == "failed": return 0.05 else: return 0.45 def _resolve(self, action): s=self._scene; bon=2.0 if self._state.step_count<=3 else 0.0 if action=="retry": if s["otp_attempts"]>=0.9: return "in_progress",False,-3.0,"OTP maxed." if random.random()<0.3+(1-s["otp_attempts"])*0.4: return "success",True,10.0+bon,"Delivered!" return "in_progress",False,-0.5,"No answer." elif action=="safe_drop": ok=(s["location_safety"]>0.5 and s["weather_risk"]<0.6 and s["package_fragile"]<0.5 and s["package_value"]<0.7) pen=(-8 if s["location_safety"]<0.4 else 0)+ (-6 if s["package_fragile"]>0.5 else 0)+ (-5 if s["weather_risk"]>0.6 else 0) if ok: return "success",True,10.0+bon,"Safe drop done!" return "in_progress",False,-0.5+pen,"Unsafe." elif action=="locker_drop": if s["locker_nearby"]>0.5: return "success",True,6.0+bon,"Locker!" return "in_progress",False,-0.5,"No locker." elif action=="neighbor_handoff": if s["alternate_available"]>0.5: return "success",True,7.0+bon,"Neighbor accepted!" return "in_progress",False,-0.5,"No neighbor." elif action=="contact_support": return "in_progress",False,-0.5,"Support contacted." elif action=="return": return "failed",True,-15.0,"Returned." return "in_progress",False,-0.5,"Unknown." def _obs(self, done, reward, message): s = self._scene return DeliveryObservation(done=done,reward=reward,message=message, otp_attempts=s.get("otp_attempts",0), recipient_available=s.get("recipient_available",0), alternate_available=s.get("alternate_available",0), package_value=s.get("package_value",0), package_fragile=s.get("package_fragile",0), time_remaining=s.get("time_remaining",1), weather_risk=s.get("weather_risk",0), location_safety=s.get("location_safety",1), locker_nearby=s.get("locker_nearby",0))