import gymnasium as gym from gymnasium import spaces import numpy as np from typing import List, Dict, Optional from models import Email, CalendarEvent, Observation, Action class EmailEnv(gym.Env): """ Email Triage & Scheduling Assistant: A real-world human-task environment. Simulates inbox management, spam filtering, and meeting coordination. """ def __init__(self): super(EmailEnv, self).__init__() self.action_space = spaces.Discrete(10) # Placeholder for discrete actions if needed self._setup_inbox() self.max_steps = 30 self.reset() def _setup_inbox(self): # Sample structured data for tasks self.sample_emails = [ Email(id=1, sender="spam@bott.io", subject="CASH NOW!!", body="Claim your 1M dollars", folder="Inbox", priority=3), Email(id=2, sender="boss@corp.com", subject="Urgent: Project Update", body="Send the report by 5 PM.", folder="Inbox", priority=1), Email(id=3, sender="calendar@corp.com", subject="Meeting Request: Sync", body="Let's sync at 2 PM.", folder="Inbox", priority=2), Email(id=4, sender="news@daily.com", subject="Daily Briefing", body="Top stories of the day.", folder="Inbox", priority=3), Email(id=5, sender="friend@web.com", subject="Coffee?", body="Are you free tomorrow at 10 AM?", folder="Inbox", priority=3) ] self.sample_calendar = [ CalendarEvent(title="Sprint Review", start_time="10:00", end_time="11:00"), CalendarEvent(title="Lunch", start_time="12:00", end_time="13:00") ] def reset(self, seed=None, options=None): super().reset(seed=seed) self.current_level = options.get("level", 1) if options else 1 self.inbox = [e.model_copy() for e in self.sample_emails] self.calendar = [c.model_copy() for c in self.sample_calendar] self.steps = 0 self.completed_tasks = 0 return self._get_observation(), {} def _get_observation(self) -> Dict: # Pydantic conversion to dict for Gym-compatible step/reset returns obs = Observation( inbox_count=len([e for e in self.inbox if e.folder == "Inbox"]), current_email=self.inbox[0] if self.inbox else None, calendar=self.calendar ) return obs.model_dump() def step(self, action_dict: Dict): """ Receives an Action model mapping (dict) and applies it to the state. Returns: observation, reward, terminated, truncated, info """ self.steps += 1 action = Action(**action_dict) reward = 0.0 terminated = False info = {"is_success": False} # Logic for Task 1: Deleting Spam (Easy) if self.current_level == 1: if action.type == "MOVE" and action.email_id == 1 and action.target_folder == "Spam": reward = 1.0 # Solved Task 1 self.inbox[0].folder = "Spam" terminated = True info["is_success"] = True else: reward = -0.1 # Logical error penalty # Logic for Task 2: Categorization (Medium) elif self.current_level == 2: target_ids = [2, 4] # Boss to Work, News to Archive if action.type == "MOVE": email = next((e for e in self.inbox if e.id == action.email_id), None) if email: if email.id == 2 and action.target_folder == "Work": reward += 0.4 email.folder = "Work" elif email.id == 4 and action.target_folder == "Archive": reward += 0.4 email.folder = "Archive" else: reward -= 0.1 # Check for completion if all(e.folder != "Inbox" for e in self.inbox if e.id in target_ids): reward += 0.2 # Completion bonus terminated = True info["is_success"] = True # Logic for Task 3: Scheduling (Hard) elif self.current_level == 3: # Task: Schedule a meeting at 2 PM (No conflict) vs Avoiding 10 AM (Conflict) if action.type == "SCHEDULE": if "2 PM" in (action.reply_text or ""): reward = 1.0 terminated = True info["is_success"] = True elif "10 AM" in (action.reply_text or ""): reward = -0.5 # Fail: Calendar conflict! terminated = True else: reward = -0.1 truncated = self.steps >= self.max_steps return self._get_observation(), reward, terminated, truncated, info def state(self) -> Dict: """Required by OpenEnv for full state snapshot.""" return { "inbox_snapshot": [e.model_dump() for e in self.inbox], "calendar_snapshot": [c.model_dump() for c in self.calendar], "level": self.current_level }