from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field from openenv.core.env_server.types import Action as OpenEnvAction from openenv.core.env_server.types import Observation as OpenEnvObservation from openenv.core.env_server.types import State as OpenEnvState from app.schemas import EmailMessage, Intent, Tone ActionType = Literal["classify", "prioritize", "reply", "send", "skip"] EmailAssistantActionType = Literal[ "categorize", "respond", "schedule", "create_folder", "move_email", "tag_email", "archive", "delete", ] class Action(OpenEnvAction): type: ActionType payload: Dict[str, Any] = Field(default_factory=dict) class InboxSummaryItem(BaseModel): message_id: str from_email: str subject: str deadline_minutes: Optional[int] = Field(default=None, description="Minutes remaining until SLA deadline, if any") urgency_score: float = Field(default=0.0, ge=0.0, le=1.0) predicted_intent: Optional[Intent] = None handled: bool = False class ActionRecord(BaseModel): step: int action: Action ok: bool = True notes: str = "" tool_output: Dict[str, Any] = Field(default_factory=dict) class Observation(OpenEnvObservation): current_email: EmailMessage inbox_summary: List[InboxSummaryItem] previous_actions: List[ActionRecord] step_count: int time_elapsed_minutes: float = Field( default=0.0, ge=0.0, description="Simulated minutes elapsed since reset; used for SLA/deadline rewards.", ) class RewardComponents(BaseModel): classification_score: float = 0.0 priority_score: float = 0.0 reply_score: float = 0.0 penalties: Dict[str, float] = Field(default_factory=dict) class Reward(BaseModel): value: float components: RewardComponents class StepInfo(BaseModel): task_id: str done_reason: Optional[str] = None episode_score: Optional[float] = None scores: Dict[str, float] = Field(default_factory=dict) state: Dict[str, Any] = Field(default_factory=dict) class StepResult(BaseModel): observation: Observation reward: Reward done: bool info: StepInfo class ReplyDraft(BaseModel): reply_subject: str reply_body: str tone: Tone = "neutral" class EmailEnvState(OpenEnvState): task: Dict[str, Any] time_elapsed_minutes: float = 0.0 current_message_id: str = "" selected_priority_message_id: Optional[str] = None escalated: bool = False escalation_reason: str = "" priority_analysis: Dict[str, Any] = Field(default_factory=dict) handled: Dict[str, bool] = Field(default_factory=dict) handled_at_minutes: Dict[str, float] = Field(default_factory=dict) predicted_intent: Dict[str, Intent] = Field(default_factory=dict) drafts: Dict[str, Dict[str, Any]] = Field(default_factory=dict) sent: Dict[str, bool] = Field(default_factory=dict) sent_at_minutes: Dict[str, float] = Field(default_factory=dict) history: List[Dict[str, Any]] = Field(default_factory=list) @dataclass class EmailAssistantState: """ Internal state tracker for the email assistant environment. This is intentionally a dataclass for lightweight in-memory state updates. """ current_email_index: int = 0 processed_emails: List[str] = field(default_factory=list) folders_created: List[str] = field(default_factory=list) emails_organized: Dict[str, str] = field(default_factory=dict) drafted_replies: Dict[str, str] = field(default_factory=dict) sent_emails: List[str] = field(default_factory=list) skipped_emails: List[str] = field(default_factory=list) escalated_emails: List[str] = field(default_factory=list) tags_applied: Dict[str, List[str]] = field(default_factory=dict) notes: Dict[str, str] = field(default_factory=dict) @dataclass class EmailAssistantAction: """ Agent-facing action schema for interacting with the email environment. """ action_type: EmailAssistantActionType email_id: int | None = None target_folder: str | None = None response_text: str | None = None schedule_time: str | None = None category: str | None = None tags: List[str] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict)