Spaces:
Sleeping
Sleeping
File size: 5,139 Bytes
85768b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | import gymnasium as gym
from gymnasium import spaces
import numpy as np
from typing import List, Dict, Optional
from models import Email, CalendarEvent, Observation, Action
class EmailEnv(gym.Env):
"""
Email Triage & Scheduling Assistant: A real-world human-task environment.
Simulates inbox management, spam filtering, and meeting coordination.
"""
def __init__(self):
super(EmailEnv, self).__init__()
self.action_space = spaces.Discrete(10) # Placeholder for discrete actions if needed
self._setup_inbox()
self.max_steps = 30
self.reset()
def _setup_inbox(self):
# Sample structured data for tasks
self.sample_emails = [
Email(id=1, sender="spam@bott.io", subject="CASH NOW!!", body="Claim your 1M dollars", folder="Inbox", priority=3),
Email(id=2, sender="boss@corp.com", subject="Urgent: Project Update", body="Send the report by 5 PM.", folder="Inbox", priority=1),
Email(id=3, sender="calendar@corp.com", subject="Meeting Request: Sync", body="Let's sync at 2 PM.", folder="Inbox", priority=2),
Email(id=4, sender="news@daily.com", subject="Daily Briefing", body="Top stories of the day.", folder="Inbox", priority=3),
Email(id=5, sender="friend@web.com", subject="Coffee?", body="Are you free tomorrow at 10 AM?", folder="Inbox", priority=3)
]
self.sample_calendar = [
CalendarEvent(title="Sprint Review", start_time="10:00", end_time="11:00"),
CalendarEvent(title="Lunch", start_time="12:00", end_time="13:00")
]
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.current_level = options.get("level", 1) if options else 1
self.inbox = [e.model_copy() for e in self.sample_emails]
self.calendar = [c.model_copy() for c in self.sample_calendar]
self.steps = 0
self.completed_tasks = 0
return self._get_observation(), {}
def _get_observation(self) -> Dict:
# Pydantic conversion to dict for Gym-compatible step/reset returns
obs = Observation(
inbox_count=len([e for e in self.inbox if e.folder == "Inbox"]),
current_email=self.inbox[0] if self.inbox else None,
calendar=self.calendar
)
return obs.model_dump()
def step(self, action_dict: Dict):
"""
Receives an Action model mapping (dict) and applies it to the state.
Returns: observation, reward, terminated, truncated, info
"""
self.steps += 1
action = Action(**action_dict)
reward = 0.0
terminated = False
info = {"is_success": False}
# Logic for Task 1: Deleting Spam (Easy)
if self.current_level == 1:
if action.type == "MOVE" and action.email_id == 1 and action.target_folder == "Spam":
reward = 1.0 # Solved Task 1
self.inbox[0].folder = "Spam"
terminated = True
info["is_success"] = True
else:
reward = -0.1 # Logical error penalty
# Logic for Task 2: Categorization (Medium)
elif self.current_level == 2:
target_ids = [2, 4] # Boss to Work, News to Archive
if action.type == "MOVE":
email = next((e for e in self.inbox if e.id == action.email_id), None)
if email:
if email.id == 2 and action.target_folder == "Work":
reward += 0.4
email.folder = "Work"
elif email.id == 4 and action.target_folder == "Archive":
reward += 0.4
email.folder = "Archive"
else:
reward -= 0.1
# Check for completion
if all(e.folder != "Inbox" for e in self.inbox if e.id in target_ids):
reward += 0.2 # Completion bonus
terminated = True
info["is_success"] = True
# Logic for Task 3: Scheduling (Hard)
elif self.current_level == 3:
# Task: Schedule a meeting at 2 PM (No conflict) vs Avoiding 10 AM (Conflict)
if action.type == "SCHEDULE":
if "2 PM" in (action.reply_text or ""):
reward = 1.0
terminated = True
info["is_success"] = True
elif "10 AM" in (action.reply_text or ""):
reward = -0.5 # Fail: Calendar conflict!
terminated = True
else:
reward = -0.1
truncated = self.steps >= self.max_steps
return self._get_observation(), reward, terminated, truncated, info
def state(self) -> Dict:
"""Required by OpenEnv for full state snapshot."""
return {
"inbox_snapshot": [e.model_dump() for e in self.inbox],
"calendar_snapshot": [c.model_dump() for c in self.calendar],
"level": self.current_level
}
|