File size: 5,341 Bytes
23c53d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import random, uuid
from models import DeliveryObservation, DeliveryState

TASKS = {
    "task_1": {"name":"Standard Delivery","difficulty":"easy","description":"Low-value package. Locker nearby, good weather.","scenario":{"otp_attempts":0.3,"recipient_available":0.0,"alternate_available":0.8,"package_value":0.2,"package_fragile":0.0,"time_remaining":0.9,"weather_risk":0.1,"location_safety":0.9,"locker_nearby":1.0}},
    "task_2": {"name":"Fragile Package in Bad Weather","difficulty":"medium","description":"Fragile item, bad weather, no locker.","scenario":{"otp_attempts":0.5,"recipient_available":0.0,"alternate_available":0.6,"package_value":0.5,"package_fragile":1.0,"time_remaining":0.5,"weather_risk":0.8,"location_safety":0.4,"locker_nearby":0.0}},
    "task_3": {"name":"High-Value Critical Delivery","difficulty":"hard","description":"High-value, unsafe area, time running out.","scenario":{"otp_attempts":0.8,"recipient_available":0.0,"alternate_available":0.2,"package_value":0.95,"package_fragile":1.0,"time_remaining":0.1,"weather_risk":0.7,"location_safety":0.2,"locker_nearby":0.0}}
}
VALID_ACTIONS=["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"]

class DeliveryDecisionEnvironment:
    def __init__(self, task_id="task_1"):
        self.task_id = task_id
        self._state = DeliveryState(task_id=task_id)
        self._scene = {}
        self._score = 0.5

    def reset(self):
        self._state = DeliveryState(episode_id=str(uuid.uuid4()),
            step_count=0, task_id=self.task_id, outcome="in_progress")
        self._scene = dict(TASKS[self.task_id]["scenario"])
        self._score = 0.5
        return self._obs(False, None, "Package arrived. Recipient unavailable.")

    def step(self, action_type):
        self._state.step_count += 1
        act = action_type.lower().strip()
        if act not in VALID_ACTIONS:
            return self._obs(False, -1.0, "Invalid action.")
        outcome, terminal, reward, msg = self._resolve(act)
        self._state.outcome = outcome
        self._scene["time_remaining"] = max(0.0, self._scene["time_remaining"]-0.15)
        if act == "retry":
            self._scene["otp_attempts"] = min(1.0, self._scene["otp_attempts"]+0.25)
        elif act == "contact_support":
            self._scene["recipient_available"] = min(1.0, self._scene["recipient_available"]+0.3)
        if self._state.step_count >= 10 and not terminal:
            terminal=True; reward=-15.0
            self._state.outcome="failed"; msg="Max steps."
        if terminal:
            self._score = self._calc(outcome)
        return self._obs(terminal, reward, msg)

    def grade(self):
        # ALWAYS strictly between 0 and 1 — never 0.0 or 1.0
        return self._score

    @property
    def state(self): return self._state

    def _calc(self, outcome):
        """Score strictly between 0 and 1 — never exactly 0.0 or 1.0"""
        steps = self._state.step_count
        if outcome == "success":
            if steps <= 2:   return 0.95
            elif steps <= 4: return 0.85
            elif steps <= 6: return 0.75
            else:            return 0.65
        elif outcome == "failed":
            return 0.05
        else:
            return 0.45

    def _resolve(self, action):
        s=self._scene; bon=2.0 if self._state.step_count<=3 else 0.0
        if action=="retry":
            if s["otp_attempts"]>=0.9: return "in_progress",False,-3.0,"OTP maxed."
            if random.random()<0.3+(1-s["otp_attempts"])*0.4:
                return "success",True,10.0+bon,"Delivered!"
            return "in_progress",False,-0.5,"No answer."
        elif action=="safe_drop":
            ok=(s["location_safety"]>0.5 and s["weather_risk"]<0.6 and
                s["package_fragile"]<0.5 and s["package_value"]<0.7)
            pen=(-8 if s["location_safety"]<0.4 else 0)+                (-6 if s["package_fragile"]>0.5 else 0)+                (-5 if s["weather_risk"]>0.6 else 0)
            if ok: return "success",True,10.0+bon,"Safe drop done!"
            return "in_progress",False,-0.5+pen,"Unsafe."
        elif action=="locker_drop":
            if s["locker_nearby"]>0.5: return "success",True,6.0+bon,"Locker!"
            return "in_progress",False,-0.5,"No locker."
        elif action=="neighbor_handoff":
            if s["alternate_available"]>0.5: return "success",True,7.0+bon,"Neighbor accepted!"
            return "in_progress",False,-0.5,"No neighbor."
        elif action=="contact_support": return "in_progress",False,-0.5,"Support contacted."
        elif action=="return": return "failed",True,-15.0,"Returned."
        return "in_progress",False,-0.5,"Unknown."

    def _obs(self, done, reward, message):
        s = self._scene
        return DeliveryObservation(done=done,reward=reward,message=message,
            otp_attempts=s.get("otp_attempts",0),
            recipient_available=s.get("recipient_available",0),
            alternate_available=s.get("alternate_available",0),
            package_value=s.get("package_value",0),
            package_fragile=s.get("package_fragile",0),
            time_remaining=s.get("time_remaining",1),
            weather_risk=s.get("weather_risk",0),
            location_safety=s.get("location_safety",1),
            locker_nearby=s.get("locker_nearby",0))