Spaces:

SushCodex
/

OPENSPEC_Hackhathon

Sleeping

File size: 5,139 Bytes

85768b6

import gymnasium as gym
from gymnasium import spaces
import numpy as np
from typing import List, Dict, Optional
from models import Email, CalendarEvent, Observation, Action

class EmailEnv(gym.Env):
    """
    Email Triage & Scheduling Assistant: A real-world human-task environment.
    Simulates inbox management, spam filtering, and meeting coordination.
    """
    def __init__(self):
        super(EmailEnv, self).__init__()
        self.action_space = spaces.Discrete(10) # Placeholder for discrete actions if needed
        self._setup_inbox()
        self.max_steps = 30
        self.reset()

    def _setup_inbox(self):
        # Sample structured data for tasks
        self.sample_emails = [
            Email(id=1, sender="spam@bott.io", subject="CASH NOW!!", body="Claim your 1M dollars", folder="Inbox", priority=3),
            Email(id=2, sender="boss@corp.com", subject="Urgent: Project Update", body="Send the report by 5 PM.", folder="Inbox", priority=1),
            Email(id=3, sender="calendar@corp.com", subject="Meeting Request: Sync", body="Let's sync at 2 PM.", folder="Inbox", priority=2),
            Email(id=4, sender="news@daily.com", subject="Daily Briefing", body="Top stories of the day.", folder="Inbox", priority=3),
            Email(id=5, sender="friend@web.com", subject="Coffee?", body="Are you free tomorrow at 10 AM?", folder="Inbox", priority=3)
        ]
        self.sample_calendar = [
            CalendarEvent(title="Sprint Review", start_time="10:00", end_time="11:00"),
            CalendarEvent(title="Lunch", start_time="12:00", end_time="13:00")
        ]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_level = options.get("level", 1) if options else 1
        self.inbox = [e.model_copy() for e in self.sample_emails]
        self.calendar = [c.model_copy() for c in self.sample_calendar]
        self.steps = 0
        self.completed_tasks = 0
        return self._get_observation(), {}

    def _get_observation(self) -> Dict:
        # Pydantic conversion to dict for Gym-compatible step/reset returns
        obs = Observation(
            inbox_count=len([e for e in self.inbox if e.folder == "Inbox"]),
            current_email=self.inbox[0] if self.inbox else None,
            calendar=self.calendar
        )
        return obs.model_dump()

    def step(self, action_dict: Dict):
        """
        Receives an Action model mapping (dict) and applies it to the state.
        Returns: observation, reward, terminated, truncated, info
        """
        self.steps += 1
        action = Action(**action_dict)
        reward = 0.0
        terminated = False
        info = {"is_success": False}

        # Logic for Task 1: Deleting Spam (Easy)
        if self.current_level == 1:
            if action.type == "MOVE" and action.email_id == 1 and action.target_folder == "Spam":
                reward = 1.0 # Solved Task 1
                self.inbox[0].folder = "Spam"
                terminated = True
                info["is_success"] = True
            else:
                reward = -0.1 # Logical error penalty

        # Logic for Task 2: Categorization (Medium)
        elif self.current_level == 2:
            target_ids = [2, 4] # Boss to Work, News to Archive
            if action.type == "MOVE":
                email = next((e for e in self.inbox if e.id == action.email_id), None)
                if email:
                    if email.id == 2 and action.target_folder == "Work":
                        reward += 0.4
                        email.folder = "Work"
                    elif email.id == 4 and action.target_folder == "Archive":
                        reward += 0.4
                        email.folder = "Archive"
                    else:
                        reward -= 0.1
                
                # Check for completion
                if all(e.folder != "Inbox" for e in self.inbox if e.id in target_ids):
                    reward += 0.2 # Completion bonus
                    terminated = True
                    info["is_success"] = True

        # Logic for Task 3: Scheduling (Hard)
        elif self.current_level == 3:
            # Task: Schedule a meeting at 2 PM (No conflict) vs Avoiding 10 AM (Conflict)
            if action.type == "SCHEDULE":
                if "2 PM" in (action.reply_text or ""):
                    reward = 1.0
                    terminated = True
                    info["is_success"] = True
                elif "10 AM" in (action.reply_text or ""):
                    reward = -0.5 # Fail: Calendar conflict!
                    terminated = True
                else:
                    reward = -0.1

        truncated = self.steps >= self.max_steps
        return self._get_observation(), reward, terminated, truncated, info

    def state(self) -> Dict:
        """Required by OpenEnv for full state snapshot."""
        return {
            "inbox_snapshot": [e.model_dump() for e in self.inbox],
            "calendar_snapshot": [c.model_dump() for c in self.calendar],
            "level": self.current_level
        }