File size: 3,191 Bytes
dab441f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ad6f9
dab441f
 
 
 
 
 
 
 
 
 
 
b9ad6f9
dab441f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Typed models for the Supermail environment."""

from __future__ import annotations

from typing import Any, Dict, List, Literal

from pydantic import BaseModel, Field

try:
    from openenv.core.env_server.types import Action, Observation, State
except ImportError:  # pragma: no cover - local fallback when OpenEnv is absent
    class Action(BaseModel):
        """Fallback OpenEnv Action model."""

    class Observation(BaseModel):
        """Fallback OpenEnv Observation model."""

        done: bool = False
        reward: float | None = None
        metadata: Dict[str, Any] = Field(default_factory=dict)

    class State(BaseModel):
        """Fallback OpenEnv State model."""

        episode_id: str
        step_count: int = 0


PriorityLabel = Literal["urgent", "normal", "spam"]
CategoryLabel = Literal["billing", "delivery", "technical", "general"]
ResolutionLabel = Literal["respond_immediately", "assign_to_team", "ignore"]


class SupportAction(Action):
    """Action submitted by the agent on each step."""

    priority: PriorityLabel | None = Field(
        default=None,
        description="Priority decision for the email.",
    )
    category: CategoryLabel | None = Field(
        default=None,
        description="Category decision for the email when required.",
    )
    action: ResolutionLabel | None = Field(
        default=None,
        description="Recommended operational action when required.",
    )
    notes: str = Field(
        default="",
        description="Optional short explanation for audit logging.",
    )


class SupportObservation(Observation):
    """Observation returned by the environment."""

    task_id: str = Field(default="", description="Stable task identifier.")
    task_type: str = Field(default="", description="Difficulty level.")
    benchmark: str = Field(default="supermail", description="Benchmark name.")
    objective: str = Field(default="", description="What the agent must decide.")
    email: str = Field(default="", description="Incoming support email body.")
    context: Dict[str, str] = Field(
        default_factory=dict,
        description="Structured metadata about the customer or ticket.",
    )
    required_fields: List[str] = Field(
        default_factory=list,
        description="Decision fields required to finish the task.",
    )
    allowed_values: Dict[str, List[str]] = Field(
        default_factory=dict,
        description="Allowed label values for each decision field.",
    )
    history: List[str] = Field(
        default_factory=list,
        description="Compact summaries of prior attempts in the episode.",
    )
    feedback: str = Field(default="", description="Step-level grader feedback.")
    score: float = Field(default=0.01, description="Current cumulative score.")
    attempts_remaining: int = Field(
        default=0,
        description="How many attempts remain before the episode ends.",
    )


class SupportState(State):
    """Server-side state exposed by the environment."""

    task_id: str | None = None
    difficulty: str | None = None
    score: float = 0.01
    matched_fields: List[str] = Field(default_factory=list)
    attempts_remaining: int = 0