Spaces:
Sleeping
Sleeping
File size: 4,452 Bytes
f4e02da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, List
from openai import OpenAI
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from openenv_support_triage.environment import SupportTriageEnv
from openenv_support_triage.models import ActionModel, ObservationModel
MODEL_NAME = os.getenv("OPENAI_MODEL", "gpt-4.1-mini")
TEMPERATURE = 0
MAX_TOKENS = 300
MAX_STEPS = 20
TASK_ID = os.getenv("TASK_ID", "easy_refund_and_login")
SYSTEM_PROMPT = (
"You are a customer support operations agent. "
"Pick exactly one next action in JSON to maximize triage quality and completion speed."
)
FALLBACK_ACTION: Dict[str, Any] = {
"action_type": "noop",
"ticket_id": None,
"priority": None,
"team": None,
"reply_text": None,
"resolution_note": None,
}
def build_user_prompt(step: int, observation: ObservationModel, history: List[str]) -> str:
payload = {
"step": step,
"objective": observation.objective,
"step_index": observation.step_index,
"max_steps": observation.max_steps,
"tickets": [ticket.model_dump() for ticket in observation.tickets],
"history": history[-5:],
"required_output_json": {
"action_type": "classify_ticket|draft_reply|resolve_ticket|noop",
"ticket_id": "ticket id or null",
"priority": "low|medium|high|urgent or null",
"team": "support|billing|technical|risk or null",
"reply_text": "string or null",
"resolution_note": "string or null",
},
}
return json.dumps(payload)
def parse_model_action(response_text: str) -> Dict[str, Any]:
try:
data = json.loads(response_text)
action = ActionModel.model_validate(data)
return action.model_dump()
except Exception:
return FALLBACK_ACTION
def main() -> None:
if not os.getenv("OPENAI_API_KEY"):
raise EnvironmentError("OPENAI_API_KEY is required")
client = OpenAI()
env = SupportTriageEnv(task_id=TASK_ID)
history: List[str] = []
try:
observation = env.reset(task_id=TASK_ID)
done = False
print(f"Episode goal: {observation.objective}")
for step in range(1, MAX_STEPS + 1):
if done:
print("Environment signalled done. Stopping early.")
break
user_prompt = build_user_prompt(step, observation, history)
messages = [
{
"role": "system",
"content": SYSTEM_PROMPT,
},
{
"role": "user",
"content": user_prompt,
},
]
try:
completion = client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS,
response_format={"type": "json_object"},
stream=False,
)
response_text = completion.choices[0].message.content or ""
except Exception as exc:
print(f"Model request failed ({exc}). Using fallback action.")
response_text = json.dumps(FALLBACK_ACTION)
action_payload = parse_model_action(response_text)
print(f"Step {step}: model suggested -> {action_payload}")
observation, reward, done, info = env.step(ActionModel.model_validate(action_payload))
history_line = (
f"Step {step}: {action_payload} -> reward {reward.value:+.2f}, done={done}, "
f"running_score={info.get('running_score')}"
)
history.append(history_line)
print(f" Reward: {reward.value:+.2f} | Done: {done} | Info: {info}")
if done:
print("Episode complete.")
break
else:
print(f"Reached max steps ({MAX_STEPS}).")
final_state = env.state()
print("Final state summary:")
print(json.dumps(final_state.model_dump(), indent=2))
finally:
# No explicit close() on this environment; this mirrors resource cleanup intent.
pass
if __name__ == "__main__":
main()
|