File size: 3,618 Bytes
807d5cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""Typed Pydantic models for the ESC OpenEnv environment.

Defines the Action, Observation, Reward, and result envelopes used across the
HTTP boundary (server.py) and the in-process env (env.py).
"""
from __future__ import annotations

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


class Action(BaseModel):
    """Agent action: a free-text conversational reply to the seeker."""

    message: str = Field(..., description="Agent's reply to the seeker.")


class Observation(BaseModel):
    """What the agent sees each turn.

    The seeker's internal state (distress, trust, openness, true_issue) is
    intentionally hidden — partial observability is what makes this env
    RL-native. Only the seeker's *utterance* and coarse hints are exposed.
    """

    seeker_utterance: str = Field(..., description="The seeker's latest message.")
    turn: int = Field(..., description="1-indexed conversation turn.")
    remaining_turns: int = Field(..., description="Turns left before forced close.")
    stage_hint: str = Field(
        ...,
        description=(
            "Coarse public hint about conversational phase: one of "
            "'opening', 'exploring', 'reflecting', 'planning', 'closing'."
        ),
    )
    task_id: str = Field(..., description="Currently active task id.")
    scenario_brief: str = Field(
        ...,
        description="One-line scenario framing shown once at reset (kept in obs for convenience).",
    )


class Reward(BaseModel):
    """Detailed reward breakdown for a single step.

    The scalar `value` is what the agent sees. The decomposition is exposed
    for transparency and debugging.
    """

    value: float = Field(..., ge=0.0, le=1.0, description="Clipped step reward in [0,1].")
    immediate: float = Field(..., description="Immediate turn-level component (empathy, stage-fit).")
    future_oriented: float = Field(
        ...,
        description=(
            "Future-oriented component: k-step lookahead over the deterministic "
            "seeker dynamics, comparing this action's projected resolution "
            "progress against the oracle ceiling (RLFF-ESC style)."
        ),
    )
    penalties: float = Field(..., description="Summed penalties (dismissive, premature advice, loops).")
    components: Dict[str, float] = Field(default_factory=dict, description="Sub-component breakdown.")


class StepResult(BaseModel):
    """Envelope returned by env.step()."""

    observation: Observation
    reward: float
    reward_detail: Reward
    done: bool
    info: Dict[str, Any] = Field(default_factory=dict)


class ResetResult(BaseModel):
    """Envelope returned by env.reset()."""

    observation: Observation
    info: Dict[str, Any] = Field(default_factory=dict)


class EnvState(BaseModel):
    """Public view of environment state returned by env.state().

    Hidden seeker variables are *not* included — only public bookkeeping.
    """

    task_id: str
    turn: int
    max_turns: int
    done: bool
    cumulative_reward: float
    transcript: List[Dict[str, str]] = Field(
        default_factory=list,
        description="List of {'role': 'seeker'|'agent', 'text': str} entries.",
    )


# ------- Request schemas for the HTTP server -------


class ResetRequest(BaseModel):
    task_id: Optional[str] = Field(
        default=None,
        description="Optional task id. If omitted, defaults to 'work_stress_venting'.",
    )
    seed: Optional[int] = Field(default=None, description="Optional seed (reserved; env is deterministic).")


class StepRequest(BaseModel):
    action: Action