Spaces:
Sleeping
Sleeping
| import random, uuid | |
| from models import DeliveryObservation, DeliveryState | |
| TASKS = { | |
| "task_1": {"name":"Standard Delivery","difficulty":"easy","description":"Low-value package. Locker nearby, good weather.","scenario":{"otp_attempts":0.3,"recipient_available":0.0,"alternate_available":0.8,"package_value":0.2,"package_fragile":0.0,"time_remaining":0.9,"weather_risk":0.1,"location_safety":0.9,"locker_nearby":1.0}}, | |
| "task_2": {"name":"Fragile Package in Bad Weather","difficulty":"medium","description":"Fragile item, bad weather, no locker.","scenario":{"otp_attempts":0.5,"recipient_available":0.0,"alternate_available":0.6,"package_value":0.5,"package_fragile":1.0,"time_remaining":0.5,"weather_risk":0.8,"location_safety":0.4,"locker_nearby":0.0}}, | |
| "task_3": {"name":"High-Value Critical Delivery","difficulty":"hard","description":"High-value, unsafe area, time running out.","scenario":{"otp_attempts":0.8,"recipient_available":0.0,"alternate_available":0.2,"package_value":0.95,"package_fragile":1.0,"time_remaining":0.1,"weather_risk":0.7,"location_safety":0.2,"locker_nearby":0.0}} | |
| } | |
| VALID_ACTIONS=["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"] | |
| class DeliveryDecisionEnvironment: | |
| def __init__(self, task_id="task_1"): | |
| self.task_id = task_id | |
| self._state = DeliveryState(task_id=task_id) | |
| self._scene = {} | |
| self._score = 0.5 | |
| def reset(self): | |
| self._state = DeliveryState(episode_id=str(uuid.uuid4()), | |
| step_count=0, task_id=self.task_id, outcome="in_progress") | |
| self._scene = dict(TASKS[self.task_id]["scenario"]) | |
| self._score = 0.5 | |
| return self._obs(False, None, "Package arrived. Recipient unavailable.") | |
| def step(self, action_type): | |
| self._state.step_count += 1 | |
| act = action_type.lower().strip() | |
| if act not in VALID_ACTIONS: | |
| return self._obs(False, -1.0, "Invalid action.") | |
| outcome, terminal, reward, msg = self._resolve(act) | |
| self._state.outcome = outcome | |
| self._scene["time_remaining"] = max(0.0, self._scene["time_remaining"]-0.15) | |
| if act == "retry": | |
| self._scene["otp_attempts"] = min(1.0, self._scene["otp_attempts"]+0.25) | |
| elif act == "contact_support": | |
| self._scene["recipient_available"] = min(1.0, self._scene["recipient_available"]+0.3) | |
| if self._state.step_count >= 10 and not terminal: | |
| terminal=True; reward=-15.0 | |
| self._state.outcome="failed"; msg="Max steps." | |
| if terminal: | |
| self._score = self._calc(outcome) | |
| return self._obs(terminal, reward, msg) | |
| def grade(self): | |
| # ALWAYS strictly between 0 and 1 — never 0.0 or 1.0 | |
| return self._score | |
| def state(self): return self._state | |
| def _calc(self, outcome): | |
| """Score strictly between 0 and 1 — never exactly 0.0 or 1.0""" | |
| steps = self._state.step_count | |
| if outcome == "success": | |
| if steps <= 2: return 0.95 | |
| elif steps <= 4: return 0.85 | |
| elif steps <= 6: return 0.75 | |
| else: return 0.65 | |
| elif outcome == "failed": | |
| return 0.05 | |
| else: | |
| return 0.45 | |
| def _resolve(self, action): | |
| s=self._scene; bon=2.0 if self._state.step_count<=3 else 0.0 | |
| if action=="retry": | |
| if s["otp_attempts"]>=0.9: return "in_progress",False,-3.0,"OTP maxed." | |
| if random.random()<0.3+(1-s["otp_attempts"])*0.4: | |
| return "success",True,10.0+bon,"Delivered!" | |
| return "in_progress",False,-0.5,"No answer." | |
| elif action=="safe_drop": | |
| ok=(s["location_safety"]>0.5 and s["weather_risk"]<0.6 and | |
| s["package_fragile"]<0.5 and s["package_value"]<0.7) | |
| pen=(-8 if s["location_safety"]<0.4 else 0)+ (-6 if s["package_fragile"]>0.5 else 0)+ (-5 if s["weather_risk"]>0.6 else 0) | |
| if ok: return "success",True,10.0+bon,"Safe drop done!" | |
| return "in_progress",False,-0.5+pen,"Unsafe." | |
| elif action=="locker_drop": | |
| if s["locker_nearby"]>0.5: return "success",True,6.0+bon,"Locker!" | |
| return "in_progress",False,-0.5,"No locker." | |
| elif action=="neighbor_handoff": | |
| if s["alternate_available"]>0.5: return "success",True,7.0+bon,"Neighbor accepted!" | |
| return "in_progress",False,-0.5,"No neighbor." | |
| elif action=="contact_support": return "in_progress",False,-0.5,"Support contacted." | |
| elif action=="return": return "failed",True,-15.0,"Returned." | |
| return "in_progress",False,-0.5,"Unknown." | |
| def _obs(self, done, reward, message): | |
| s = self._scene | |
| return DeliveryObservation(done=done,reward=reward,message=message, | |
| otp_attempts=s.get("otp_attempts",0), | |
| recipient_available=s.get("recipient_available",0), | |
| alternate_available=s.get("alternate_available",0), | |
| package_value=s.get("package_value",0), | |
| package_fragile=s.get("package_fragile",0), | |
| time_remaining=s.get("time_remaining",1), | |
| weather_risk=s.get("weather_risk",0), | |
| location_safety=s.get("location_safety",1), | |
| locker_nearby=s.get("locker_nearby",0)) | |