SushCodex's picture
Upload 14 files
85768b6 verified
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from typing import List, Dict, Optional
from models import Email, CalendarEvent, Observation, Action
class EmailEnv(gym.Env):
"""
Email Triage & Scheduling Assistant: A real-world human-task environment.
Simulates inbox management, spam filtering, and meeting coordination.
"""
def __init__(self):
super(EmailEnv, self).__init__()
self.action_space = spaces.Discrete(10) # Placeholder for discrete actions if needed
self._setup_inbox()
self.max_steps = 30
self.reset()
def _setup_inbox(self):
# Sample structured data for tasks
self.sample_emails = [
Email(id=1, sender="spam@bott.io", subject="CASH NOW!!", body="Claim your 1M dollars", folder="Inbox", priority=3),
Email(id=2, sender="boss@corp.com", subject="Urgent: Project Update", body="Send the report by 5 PM.", folder="Inbox", priority=1),
Email(id=3, sender="calendar@corp.com", subject="Meeting Request: Sync", body="Let's sync at 2 PM.", folder="Inbox", priority=2),
Email(id=4, sender="news@daily.com", subject="Daily Briefing", body="Top stories of the day.", folder="Inbox", priority=3),
Email(id=5, sender="friend@web.com", subject="Coffee?", body="Are you free tomorrow at 10 AM?", folder="Inbox", priority=3)
]
self.sample_calendar = [
CalendarEvent(title="Sprint Review", start_time="10:00", end_time="11:00"),
CalendarEvent(title="Lunch", start_time="12:00", end_time="13:00")
]
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.current_level = options.get("level", 1) if options else 1
self.inbox = [e.model_copy() for e in self.sample_emails]
self.calendar = [c.model_copy() for c in self.sample_calendar]
self.steps = 0
self.completed_tasks = 0
return self._get_observation(), {}
def _get_observation(self) -> Dict:
# Pydantic conversion to dict for Gym-compatible step/reset returns
obs = Observation(
inbox_count=len([e for e in self.inbox if e.folder == "Inbox"]),
current_email=self.inbox[0] if self.inbox else None,
calendar=self.calendar
)
return obs.model_dump()
def step(self, action_dict: Dict):
"""
Receives an Action model mapping (dict) and applies it to the state.
Returns: observation, reward, terminated, truncated, info
"""
self.steps += 1
action = Action(**action_dict)
reward = 0.0
terminated = False
info = {"is_success": False}
# Logic for Task 1: Deleting Spam (Easy)
if self.current_level == 1:
if action.type == "MOVE" and action.email_id == 1 and action.target_folder == "Spam":
reward = 1.0 # Solved Task 1
self.inbox[0].folder = "Spam"
terminated = True
info["is_success"] = True
else:
reward = -0.1 # Logical error penalty
# Logic for Task 2: Categorization (Medium)
elif self.current_level == 2:
target_ids = [2, 4] # Boss to Work, News to Archive
if action.type == "MOVE":
email = next((e for e in self.inbox if e.id == action.email_id), None)
if email:
if email.id == 2 and action.target_folder == "Work":
reward += 0.4
email.folder = "Work"
elif email.id == 4 and action.target_folder == "Archive":
reward += 0.4
email.folder = "Archive"
else:
reward -= 0.1
# Check for completion
if all(e.folder != "Inbox" for e in self.inbox if e.id in target_ids):
reward += 0.2 # Completion bonus
terminated = True
info["is_success"] = True
# Logic for Task 3: Scheduling (Hard)
elif self.current_level == 3:
# Task: Schedule a meeting at 2 PM (No conflict) vs Avoiding 10 AM (Conflict)
if action.type == "SCHEDULE":
if "2 PM" in (action.reply_text or ""):
reward = 1.0
terminated = True
info["is_success"] = True
elif "10 AM" in (action.reply_text or ""):
reward = -0.5 # Fail: Calendar conflict!
terminated = True
else:
reward = -0.1
truncated = self.steps >= self.max_steps
return self._get_observation(), reward, terminated, truncated, info
def state(self) -> Dict:
"""Required by OpenEnv for full state snapshot."""
return {
"inbox_snapshot": [e.model_dump() for e in self.inbox],
"calendar_snapshot": [c.model_dump() for c in self.calendar],
"level": self.current_level
}