File size: 4,452 Bytes
f4e02da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from __future__ import annotations

import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, List

from openai import OpenAI

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from openenv_support_triage.environment import SupportTriageEnv
from openenv_support_triage.models import ActionModel, ObservationModel

MODEL_NAME = os.getenv("OPENAI_MODEL", "gpt-4.1-mini")
TEMPERATURE = 0
MAX_TOKENS = 300
MAX_STEPS = 20
TASK_ID = os.getenv("TASK_ID", "easy_refund_and_login")

SYSTEM_PROMPT = (
    "You are a customer support operations agent. "
    "Pick exactly one next action in JSON to maximize triage quality and completion speed."
)

FALLBACK_ACTION: Dict[str, Any] = {
    "action_type": "noop",
    "ticket_id": None,
    "priority": None,
    "team": None,
    "reply_text": None,
    "resolution_note": None,
}


def build_user_prompt(step: int, observation: ObservationModel, history: List[str]) -> str:
    payload = {
        "step": step,
        "objective": observation.objective,
        "step_index": observation.step_index,
        "max_steps": observation.max_steps,
        "tickets": [ticket.model_dump() for ticket in observation.tickets],
        "history": history[-5:],
        "required_output_json": {
            "action_type": "classify_ticket|draft_reply|resolve_ticket|noop",
            "ticket_id": "ticket id or null",
            "priority": "low|medium|high|urgent or null",
            "team": "support|billing|technical|risk or null",
            "reply_text": "string or null",
            "resolution_note": "string or null",
        },
    }
    return json.dumps(payload)


def parse_model_action(response_text: str) -> Dict[str, Any]:
    try:
        data = json.loads(response_text)
        action = ActionModel.model_validate(data)
        return action.model_dump()
    except Exception:
        return FALLBACK_ACTION


def main() -> None:
    if not os.getenv("OPENAI_API_KEY"):
        raise EnvironmentError("OPENAI_API_KEY is required")

    client = OpenAI()
    env = SupportTriageEnv(task_id=TASK_ID)
    history: List[str] = []

    try:
        observation = env.reset(task_id=TASK_ID)
        done = False
        print(f"Episode goal: {observation.objective}")

        for step in range(1, MAX_STEPS + 1):
            if done:
                print("Environment signalled done. Stopping early.")
                break

            user_prompt = build_user_prompt(step, observation, history)
            messages = [
                {
                    "role": "system",
                    "content": SYSTEM_PROMPT,
                },
                {
                    "role": "user",
                    "content": user_prompt,
                },
            ]

            try:
                completion = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=messages,
                    temperature=TEMPERATURE,
                    max_tokens=MAX_TOKENS,
                    response_format={"type": "json_object"},
                    stream=False,
                )
                response_text = completion.choices[0].message.content or ""
            except Exception as exc:
                print(f"Model request failed ({exc}). Using fallback action.")
                response_text = json.dumps(FALLBACK_ACTION)

            action_payload = parse_model_action(response_text)
            print(f"Step {step}: model suggested -> {action_payload}")

            observation, reward, done, info = env.step(ActionModel.model_validate(action_payload))
            history_line = (
                f"Step {step}: {action_payload} -> reward {reward.value:+.2f}, done={done}, "
                f"running_score={info.get('running_score')}"
            )
            history.append(history_line)
            print(f"  Reward: {reward.value:+.2f} | Done: {done} | Info: {info}")

            if done:
                print("Episode complete.")
                break
        else:
            print(f"Reached max steps ({MAX_STEPS}).")

        final_state = env.state()
        print("Final state summary:")
        print(json.dumps(final_state.model_dump(), indent=2))

    finally:
        # No explicit close() on this environment; this mirrors resource cleanup intent.
        pass


if __name__ == "__main__":
    main()