File size: 5,139 Bytes
13ac338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from typing import List, Dict, Optional
from models import Email, CalendarEvent, Observation, Action

class EmailEnv(gym.Env):
    """
    Email Triage & Scheduling Assistant: A real-world human-task environment.
    Simulates inbox management, spam filtering, and meeting coordination.
    """
    def __init__(self):
        super(EmailEnv, self).__init__()
        self.action_space = spaces.Discrete(10) # Placeholder for discrete actions if needed
        self._setup_inbox()
        self.max_steps = 30
        self.reset()

    def _setup_inbox(self):
        # Sample structured data for tasks
        self.sample_emails = [
            Email(id=1, sender="spam@bott.io", subject="CASH NOW!!", body="Claim your 1M dollars", folder="Inbox", priority=3),
            Email(id=2, sender="boss@corp.com", subject="Urgent: Project Update", body="Send the report by 5 PM.", folder="Inbox", priority=1),
            Email(id=3, sender="calendar@corp.com", subject="Meeting Request: Sync", body="Let's sync at 2 PM.", folder="Inbox", priority=2),
            Email(id=4, sender="news@daily.com", subject="Daily Briefing", body="Top stories of the day.", folder="Inbox", priority=3),
            Email(id=5, sender="friend@web.com", subject="Coffee?", body="Are you free tomorrow at 10 AM?", folder="Inbox", priority=3)
        ]
        self.sample_calendar = [
            CalendarEvent(title="Sprint Review", start_time="10:00", end_time="11:00"),
            CalendarEvent(title="Lunch", start_time="12:00", end_time="13:00")
        ]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_level = options.get("level", 1) if options else 1
        self.inbox = [e.model_copy() for e in self.sample_emails]
        self.calendar = [c.model_copy() for c in self.sample_calendar]
        self.steps = 0
        self.completed_tasks = 0
        return self._get_observation(), {}

    def _get_observation(self) -> Dict:
        # Pydantic conversion to dict for Gym-compatible step/reset returns
        obs = Observation(
            inbox_count=len([e for e in self.inbox if e.folder == "Inbox"]),
            current_email=self.inbox[0] if self.inbox else None,
            calendar=self.calendar
        )
        return obs.model_dump()

    def step(self, action_dict: Dict):
        """
        Receives an Action model mapping (dict) and applies it to the state.
        Returns: observation, reward, terminated, truncated, info
        """
        self.steps += 1
        action = Action(**action_dict)
        reward = 0.0
        terminated = False
        info = {"is_success": False}

        # Logic for Task 1: Deleting Spam (Easy)
        if self.current_level == 1:
            if action.type == "MOVE" and action.email_id == 1 and action.target_folder == "Spam":
                reward = 1.0 # Solved Task 1
                self.inbox[0].folder = "Spam"
                terminated = True
                info["is_success"] = True
            else:
                reward = -0.1 # Logical error penalty

        # Logic for Task 2: Categorization (Medium)
        elif self.current_level == 2:
            target_ids = [2, 4] # Boss to Work, News to Archive
            if action.type == "MOVE":
                email = next((e for e in self.inbox if e.id == action.email_id), None)
                if email:
                    if email.id == 2 and action.target_folder == "Work":
                        reward += 0.4
                        email.folder = "Work"
                    elif email.id == 4 and action.target_folder == "Archive":
                        reward += 0.4
                        email.folder = "Archive"
                    else:
                        reward -= 0.1
                
                # Check for completion
                if all(e.folder != "Inbox" for e in self.inbox if e.id in target_ids):
                    reward += 0.2 # Completion bonus
                    terminated = True
                    info["is_success"] = True

        # Logic for Task 3: Scheduling (Hard)
        elif self.current_level == 3:
            # Task: Schedule a meeting at 2 PM (No conflict) vs Avoiding 10 AM (Conflict)
            if action.type == "SCHEDULE":
                if "2 PM" in (action.reply_text or ""):
                    reward = 1.0
                    terminated = True
                    info["is_success"] = True
                elif "10 AM" in (action.reply_text or ""):
                    reward = -0.5 # Fail: Calendar conflict!
                    terminated = True
                else:
                    reward = -0.1

        truncated = self.steps >= self.max_steps
        return self._get_observation(), reward, terminated, truncated, info

    def state(self) -> Dict:
        """Required by OpenEnv for full state snapshot."""
        return {
            "inbox_snapshot": [e.model_dump() for e in self.inbox],
            "calendar_snapshot": [c.model_dump() for c in self.calendar],
            "level": self.current_level
        }