Upload 16 files
Browse files- Dockerfile +14 -0
- README.md +252 -3
- __init__.cpython-313.pyc +0 -0
- __init__.py +0 -0
- app.cpython-313.pyc +0 -0
- app.py +19 -0
- client.cpython-313.pyc +0 -0
- graders.cpython-313.pyc +0 -0
- inference.cpython-313.pyc +0 -0
- inference.py +275 -0
- models.cpython-313.pyc +0 -0
- openenv.yaml +8 -0
- pyproject.toml +23 -0
- requirements.txt +4 -0
- tasks.cpython-313.pyc +0 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 4 |
+
ENV PYTHONUNBUFFERED=1
|
| 5 |
+
ENV PORT=8000
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
COPY requirements.txt /app/requirements.txt
|
| 10 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY . /app
|
| 13 |
+
|
| 14 |
+
CMD ["python", "-m", "server.app"]
|
README.md
CHANGED
|
@@ -1,3 +1,252 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: support-triage-openenv
|
| 3 |
+
sdk: docker
|
| 4 |
+
app_port: 8000
|
| 5 |
+
tags:
|
| 6 |
+
- openenv
|
| 7 |
+
- reinforcement-learning
|
| 8 |
+
- customer-support
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Support Triage OpenEnv
|
| 12 |
+
|
| 13 |
+
A real-world OpenEnv environment where an agent performs customer support triage: prioritization, routing, tagging, information gathering, and response drafting.
|
| 14 |
+
|
| 15 |
+
This project is designed for Round 1 style hackathon evaluation:
|
| 16 |
+
- Full typed OpenEnv models
|
| 17 |
+
- `reset()` / `step()` / `state()` API
|
| 18 |
+
- 3 deterministic graded tasks (easy/medium/hard)
|
| 19 |
+
- Dense reward shaping with partial progress
|
| 20 |
+
- Baseline `inference.py` using OpenAI client and required env vars
|
| 21 |
+
- Docker + Hugging Face Spaces deployment files
|
| 22 |
+
|
| 23 |
+
## Why This Environment Has Real Utility
|
| 24 |
+
|
| 25 |
+
Teams actually do this workflow in support operations and trust/safety queues. This environment evaluates whether an agent can:
|
| 26 |
+
- classify urgency
|
| 27 |
+
- route to the right team
|
| 28 |
+
- attach relevant operational tags
|
| 29 |
+
- ask for required evidence
|
| 30 |
+
- draft safe and useful customer responses
|
| 31 |
+
- close only when resolution criteria are met
|
| 32 |
+
|
| 33 |
+
## Module-Aligned Build Guide (From Your Course)
|
| 34 |
+
|
| 35 |
+
### Module 1: Why OpenEnv?
|
| 36 |
+
- We treat the environment as a service with typed contracts.
|
| 37 |
+
- Core loop follows RL structure: observe -> act -> reward.
|
| 38 |
+
|
| 39 |
+
### Module 2: Using Existing Environments
|
| 40 |
+
- `support_triage_env/models.py` defines typed `Action`, `Observation`, `State`.
|
| 41 |
+
- `support_triage_env/client.py` gives a reusable typed client.
|
| 42 |
+
|
| 43 |
+
### Module 3: Deploying Environments
|
| 44 |
+
- `server/app.py` is the OpenEnv validator-compatible entrypoint (`main()` + callable script).
|
| 45 |
+
- `server/Dockerfile` provides reproducible container runtime.
|
| 46 |
+
- `openenv.yaml` defines deployment metadata.
|
| 47 |
+
|
| 48 |
+
### Module 4: Building Your Own Environment
|
| 49 |
+
- `support_triage_env/server/environment.py` implements task simulation.
|
| 50 |
+
- `support_triage_env/tasks.py` defines deterministic fixtures.
|
| 51 |
+
- `support_triage_env/graders.py` implements 0.0-1.0 grading.
|
| 52 |
+
|
| 53 |
+
### Module 5: Training with OpenEnv + Reward Signals
|
| 54 |
+
- Reward shaping is dense and trajectory-aware.
|
| 55 |
+
- `inference.py` runs model-based episodes and exports reproducible baseline scores.
|
| 56 |
+
|
| 57 |
+
## Action Space
|
| 58 |
+
|
| 59 |
+
Action model: `SupportTriageAction`
|
| 60 |
+
|
| 61 |
+
```text
|
| 62 |
+
set_priority(value)
|
| 63 |
+
route_team(value)
|
| 64 |
+
add_tag(value)
|
| 65 |
+
draft_reply(value)
|
| 66 |
+
request_info(value)
|
| 67 |
+
close_ticket()
|
| 68 |
+
noop()
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Valid priorities: `low | medium | high | urgent`
|
| 72 |
+
|
| 73 |
+
Valid teams: `billing | technical | account | trust_safety | shipping`
|
| 74 |
+
|
| 75 |
+
## Observation Space
|
| 76 |
+
|
| 77 |
+
Observation model: `SupportTriageObservation`
|
| 78 |
+
|
| 79 |
+
Key fields:
|
| 80 |
+
- `task_id`, `difficulty`, `objective`
|
| 81 |
+
- `title`, `customer_tier`, `customer_message`
|
| 82 |
+
- current working state: `priority`, `routed_team`, `tags`, `draft_reply`, `info_requested`
|
| 83 |
+
- `steps_remaining`, `last_feedback`, `allowed_actions`
|
| 84 |
+
- inherited `reward`, `done`
|
| 85 |
+
|
| 86 |
+
## State Space
|
| 87 |
+
|
| 88 |
+
State model: `SupportTriageState`
|
| 89 |
+
|
| 90 |
+
Contains episode metadata and full workflow state:
|
| 91 |
+
- `episode_id`, `step_count`
|
| 92 |
+
- `task_id`, `difficulty`, `objective`, `max_steps`
|
| 93 |
+
- `priority`, `routed_team`, `tags`
|
| 94 |
+
- `info_requested`, `closed`, `close_valid`
|
| 95 |
+
- `history`
|
| 96 |
+
|
| 97 |
+
## Tasks and Graders
|
| 98 |
+
|
| 99 |
+
### Easy: `easy_password_reset`
|
| 100 |
+
- Scenario: login token failure after password reset
|
| 101 |
+
- Expected routing: `account`
|
| 102 |
+
- Expected priority: `medium`
|
| 103 |
+
- Required tags: `password-reset`, `login`
|
| 104 |
+
|
| 105 |
+
### Medium: `medium_double_charge`
|
| 106 |
+
- Scenario: premium customer charged twice
|
| 107 |
+
- Expected routing: `billing`
|
| 108 |
+
- Expected priority: `high`
|
| 109 |
+
- Required tags: `refund`, `double-charge`, `vip`
|
| 110 |
+
- Needs additional evidence request
|
| 111 |
+
|
| 112 |
+
### Hard: `hard_account_takeover`
|
| 113 |
+
- Scenario: possible account takeover + fraud + abusive content
|
| 114 |
+
- Expected routing: `trust_safety`
|
| 115 |
+
- Expected priority: `urgent`
|
| 116 |
+
- Required tags: `security`, `account-takeover`, `fraud`, `content-abuse`
|
| 117 |
+
- Needs security-safe communication and evidence collection
|
| 118 |
+
|
| 119 |
+
### Grading Design
|
| 120 |
+
|
| 121 |
+
`support_triage_env/graders.py` computes deterministic component scores:
|
| 122 |
+
- priority correctness
|
| 123 |
+
- routing correctness
|
| 124 |
+
- required tags coverage
|
| 125 |
+
- reply quality (required/forbidden phrase logic)
|
| 126 |
+
- process quality (info request + closure quality + efficiency)
|
| 127 |
+
|
| 128 |
+
Final score is normalized to `[0.0, 1.0]`.
|
| 129 |
+
|
| 130 |
+
## Reward Function
|
| 131 |
+
|
| 132 |
+
The environment provides dense rewards at each step:
|
| 133 |
+
- positive reward for correct priority/routing/tagging
|
| 134 |
+
- incremental reward for improving draft response quality
|
| 135 |
+
- positive signal for meaningful information requests when required
|
| 136 |
+
- strong bonus for valid close
|
| 137 |
+
- penalties for invalid actions, repeated loops, no-op behavior, or premature close
|
| 138 |
+
- small per-step cost to discourage inefficient trajectories
|
| 139 |
+
|
| 140 |
+
## Windows Setup
|
| 141 |
+
|
| 142 |
+
```powershell
|
| 143 |
+
py -3.11 -m venv .venv
|
| 144 |
+
.\.venv\Scripts\Activate.ps1
|
| 145 |
+
python -m pip install -U pip
|
| 146 |
+
pip install -r requirements.txt
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
Optional: if `openenv` command is not found, use:
|
| 150 |
+
|
| 151 |
+
```powershell
|
| 152 |
+
& "$env:APPDATA\Python\Python313\Scripts\openenv.exe" --help
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
## Run Locally
|
| 156 |
+
|
| 157 |
+
### Start API server
|
| 158 |
+
|
| 159 |
+
```powershell
|
| 160 |
+
python -m uvicorn support_triage_env.server.app:app --host 0.0.0.0 --port 8000 --reload
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
### Validate with OpenEnv tooling
|
| 164 |
+
|
| 165 |
+
```powershell
|
| 166 |
+
openenv validate --verbose
|
| 167 |
+
openenv validate --url http://localhost:8000
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## Baseline Inference
|
| 171 |
+
|
| 172 |
+
`inference.py` is at project root as required.
|
| 173 |
+
|
| 174 |
+
Set env vars first:
|
| 175 |
+
|
| 176 |
+
```powershell
|
| 177 |
+
$env:API_BASE_URL = "https://router.huggingface.co/v1"
|
| 178 |
+
$env:MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
|
| 179 |
+
$env:HF_TOKEN = "<your_hf_token>"
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
Run:
|
| 183 |
+
|
| 184 |
+
```powershell
|
| 185 |
+
python .\inference.py
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
Output:
|
| 189 |
+
- per-task scores
|
| 190 |
+
- average score
|
| 191 |
+
- `baseline_scores.json`
|
| 192 |
+
|
| 193 |
+
## Docker
|
| 194 |
+
|
| 195 |
+
Build:
|
| 196 |
+
|
| 197 |
+
```powershell
|
| 198 |
+
docker build -t support-triage-openenv:latest -f server/Dockerfile .
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
Run:
|
| 202 |
+
|
| 203 |
+
```powershell
|
| 204 |
+
docker run --rm -p 8000:8000 support-triage-openenv:latest
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
## Deploy to Hugging Face Spaces
|
| 208 |
+
|
| 209 |
+
```powershell
|
| 210 |
+
openenv push --repo-id <your-username>/support-triage-openenv
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
Then set in Space settings:
|
| 214 |
+
- `API_BASE_URL`
|
| 215 |
+
- `MODEL_NAME`
|
| 216 |
+
- `HF_TOKEN`
|
| 217 |
+
|
| 218 |
+
## Suggested Baseline Reporting Format
|
| 219 |
+
|
| 220 |
+
Include in submission:
|
| 221 |
+
- model name
|
| 222 |
+
- per-task score table
|
| 223 |
+
- average score
|
| 224 |
+
- runtime in minutes
|
| 225 |
+
- commit hash
|
| 226 |
+
|
| 227 |
+
## Project Structure
|
| 228 |
+
|
| 229 |
+
```text
|
| 230 |
+
support-triage-openenv/
|
| 231 |
+
|- server/
|
| 232 |
+
| |- __init__.py
|
| 233 |
+
| |- app.py
|
| 234 |
+
| |- Dockerfile
|
| 235 |
+
|- support_triage_env/
|
| 236 |
+
| |- __init__.py
|
| 237 |
+
| |- models.py
|
| 238 |
+
| |- client.py
|
| 239 |
+
| |- tasks.py
|
| 240 |
+
| |- graders.py
|
| 241 |
+
| |- server/
|
| 242 |
+
| |- __init__.py
|
| 243 |
+
| |- app.py
|
| 244 |
+
| |- environment.py
|
| 245 |
+
| |- Dockerfile
|
| 246 |
+
|- inference.py
|
| 247 |
+
|- openenv.yaml
|
| 248 |
+
|- pyproject.toml
|
| 249 |
+
|- requirements.txt
|
| 250 |
+
|- uv.lock
|
| 251 |
+
|- README.md
|
| 252 |
+
```
|
__init__.cpython-313.pyc
ADDED
|
Binary file (115 Bytes). View file
|
|
|
__init__.py
ADDED
|
File without changes
|
app.cpython-313.pyc
ADDED
|
Binary file (795 Bytes). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Root server entrypoint expected by OpenEnv validator."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
import uvicorn
|
| 8 |
+
|
| 9 |
+
from support_triage_env.server.app import app
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def main() -> None:
|
| 13 |
+
host = os.getenv("HOST", "0.0.0.0")
|
| 14 |
+
port = int(os.getenv("PORT", "8000"))
|
| 15 |
+
uvicorn.run("server.app:app", host=host, port=port)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
if __name__ == "__main__":
|
| 19 |
+
main()
|
client.cpython-313.pyc
ADDED
|
Binary file (4.58 kB). View file
|
|
|
graders.cpython-313.pyc
ADDED
|
Binary file (5.66 kB). View file
|
|
|
inference.cpython-313.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
inference.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Baseline inference for support-triage-openenv.
|
| 3 |
+
|
| 4 |
+
Required environment variables before submission:
|
| 5 |
+
- API_BASE_URL
|
| 6 |
+
- MODEL_NAME
|
| 7 |
+
- HF_TOKEN
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
import re
|
| 15 |
+
from dataclasses import asdict, dataclass
|
| 16 |
+
from typing import Dict, Optional
|
| 17 |
+
|
| 18 |
+
from openai import OpenAI
|
| 19 |
+
|
| 20 |
+
from support_triage_env.models import SupportTriageAction, SupportTriageObservation
|
| 21 |
+
from support_triage_env.server.environment import SupportTriageEnvironment
|
| 22 |
+
from support_triage_env.tasks import TASK_ORDER
|
| 23 |
+
|
| 24 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 25 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
|
| 26 |
+
MODEL_NAME = os.getenv("MODEL_NAME")
|
| 27 |
+
|
| 28 |
+
MAX_STEPS = 14
|
| 29 |
+
TEMPERATURE = 0.1
|
| 30 |
+
MAX_TOKENS = 220
|
| 31 |
+
|
| 32 |
+
ACTION_TYPES = {
|
| 33 |
+
"set_priority",
|
| 34 |
+
"route_team",
|
| 35 |
+
"add_tag",
|
| 36 |
+
"draft_reply",
|
| 37 |
+
"request_info",
|
| 38 |
+
"close_ticket",
|
| 39 |
+
"noop",
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
SYSTEM_PROMPT = (
|
| 43 |
+
"You are a customer support triage agent operating in an RL environment. "
|
| 44 |
+
"Return exactly one JSON object with keys action_type and value. "
|
| 45 |
+
"Valid action_type values are: set_priority, route_team, add_tag, "
|
| 46 |
+
"draft_reply, request_info, close_ticket, noop. "
|
| 47 |
+
"Do not include markdown, explanations, or extra text."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class EpisodeReport:
|
| 53 |
+
task_id: str
|
| 54 |
+
steps: int
|
| 55 |
+
score: float
|
| 56 |
+
breakdown: Dict[str, float]
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def build_user_prompt(step: int, obs: SupportTriageObservation) -> str:
|
| 60 |
+
return (
|
| 61 |
+
f"Step: {step}\n"
|
| 62 |
+
f"Task: {obs.task_id} ({obs.difficulty})\n"
|
| 63 |
+
f"Objective: {obs.objective}\n"
|
| 64 |
+
f"Title: {obs.title}\n"
|
| 65 |
+
f"Customer tier: {obs.customer_tier}\n"
|
| 66 |
+
f"Customer message: {obs.customer_message}\n"
|
| 67 |
+
f"Current priority: {obs.priority}\n"
|
| 68 |
+
f"Current team: {obs.routed_team}\n"
|
| 69 |
+
f"Current tags: {obs.tags}\n"
|
| 70 |
+
f"Info requested: {obs.info_requested}\n"
|
| 71 |
+
f"Current draft reply: {obs.draft_reply}\n"
|
| 72 |
+
f"Steps remaining: {obs.steps_remaining}\n"
|
| 73 |
+
f"Last feedback: {obs.last_feedback}\n"
|
| 74 |
+
f"Allowed actions: {obs.allowed_actions}\n"
|
| 75 |
+
"Respond with JSON only."
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _extract_json(text: str) -> Optional[Dict[str, object]]:
|
| 80 |
+
text = (text or "").strip()
|
| 81 |
+
if not text:
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
try:
|
| 85 |
+
parsed = json.loads(text)
|
| 86 |
+
if isinstance(parsed, dict):
|
| 87 |
+
return parsed
|
| 88 |
+
except json.JSONDecodeError:
|
| 89 |
+
pass
|
| 90 |
+
|
| 91 |
+
match = re.search(r"\{.*\}", text, re.DOTALL)
|
| 92 |
+
if not match:
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
parsed = json.loads(match.group(0))
|
| 97 |
+
except json.JSONDecodeError:
|
| 98 |
+
return None
|
| 99 |
+
|
| 100 |
+
return parsed if isinstance(parsed, dict) else None
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def fallback_action(obs: SupportTriageObservation) -> SupportTriageAction:
|
| 104 |
+
# Deterministic fallback keeps runs reproducible if model output is malformed.
|
| 105 |
+
if not obs.priority:
|
| 106 |
+
mapping = {
|
| 107 |
+
"easy_password_reset": "medium",
|
| 108 |
+
"medium_double_charge": "high",
|
| 109 |
+
"hard_account_takeover": "urgent",
|
| 110 |
+
}
|
| 111 |
+
return SupportTriageAction(action_type="set_priority", value=mapping.get(obs.task_id, "medium"))
|
| 112 |
+
|
| 113 |
+
if not obs.routed_team:
|
| 114 |
+
mapping = {
|
| 115 |
+
"easy_password_reset": "account",
|
| 116 |
+
"medium_double_charge": "billing",
|
| 117 |
+
"hard_account_takeover": "trust_safety",
|
| 118 |
+
}
|
| 119 |
+
return SupportTriageAction(action_type="route_team", value=mapping.get(obs.task_id, "technical"))
|
| 120 |
+
|
| 121 |
+
if obs.task_id == "easy_password_reset" and "password-reset" not in obs.tags:
|
| 122 |
+
return SupportTriageAction(action_type="add_tag", value="password-reset")
|
| 123 |
+
if obs.task_id == "easy_password_reset" and "login" not in obs.tags:
|
| 124 |
+
return SupportTriageAction(action_type="add_tag", value="login")
|
| 125 |
+
|
| 126 |
+
if obs.task_id == "medium_double_charge" and "refund" not in obs.tags:
|
| 127 |
+
return SupportTriageAction(action_type="add_tag", value="refund")
|
| 128 |
+
if obs.task_id == "medium_double_charge" and "double-charge" not in obs.tags:
|
| 129 |
+
return SupportTriageAction(action_type="add_tag", value="double-charge")
|
| 130 |
+
if obs.task_id == "medium_double_charge" and "vip" not in obs.tags:
|
| 131 |
+
return SupportTriageAction(action_type="add_tag", value="vip")
|
| 132 |
+
|
| 133 |
+
if obs.task_id == "hard_account_takeover" and "security" not in obs.tags:
|
| 134 |
+
return SupportTriageAction(action_type="add_tag", value="security")
|
| 135 |
+
if obs.task_id == "hard_account_takeover" and "account-takeover" not in obs.tags:
|
| 136 |
+
return SupportTriageAction(action_type="add_tag", value="account-takeover")
|
| 137 |
+
if obs.task_id == "hard_account_takeover" and "fraud" not in obs.tags:
|
| 138 |
+
return SupportTriageAction(action_type="add_tag", value="fraud")
|
| 139 |
+
if obs.task_id == "hard_account_takeover" and "content-abuse" not in obs.tags:
|
| 140 |
+
return SupportTriageAction(action_type="add_tag", value="content-abuse")
|
| 141 |
+
|
| 142 |
+
if obs.task_id == "easy_password_reset" and not obs.draft_reply:
|
| 143 |
+
return SupportTriageAction(
|
| 144 |
+
action_type="draft_reply",
|
| 145 |
+
value=(
|
| 146 |
+
"Sorry for the login trouble. Please use the reset link again and "
|
| 147 |
+
"enable 2FA after login. If this continues, support can verify your token."
|
| 148 |
+
),
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if obs.task_id == "medium_double_charge" and not obs.info_requested:
|
| 152 |
+
return SupportTriageAction(
|
| 153 |
+
action_type="request_info",
|
| 154 |
+
value="Please share the transaction ID and last 4 digits of the charged card.",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if obs.task_id == "medium_double_charge" and not obs.draft_reply:
|
| 158 |
+
return SupportTriageAction(
|
| 159 |
+
action_type="draft_reply",
|
| 160 |
+
value=(
|
| 161 |
+
"Sorry for this frustration. Our billing team will investigate the "
|
| 162 |
+
"double charge and process any eligible refund after verification."
|
| 163 |
+
),
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
if obs.task_id == "hard_account_takeover" and not obs.info_requested:
|
| 167 |
+
return SupportTriageAction(
|
| 168 |
+
action_type="request_info",
|
| 169 |
+
value="Please share screenshot evidence, timestamps, and the suspicious order ID.",
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
if obs.task_id == "hard_account_takeover" and not obs.draft_reply:
|
| 173 |
+
return SupportTriageAction(
|
| 174 |
+
action_type="draft_reply",
|
| 175 |
+
value=(
|
| 176 |
+
"We have escalated this security case. Please secure your account, reset "
|
| 177 |
+
"password now, and enable two-factor authentication immediately."
|
| 178 |
+
),
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
return SupportTriageAction(action_type="close_ticket", value="")
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def parse_action(response_text: str, obs: SupportTriageObservation) -> SupportTriageAction:
|
| 185 |
+
parsed = _extract_json(response_text)
|
| 186 |
+
if not parsed:
|
| 187 |
+
return fallback_action(obs)
|
| 188 |
+
|
| 189 |
+
action_type = str(parsed.get("action_type", "noop")).strip()
|
| 190 |
+
value_obj = parsed.get("value")
|
| 191 |
+
value = "" if value_obj is None else str(value_obj)
|
| 192 |
+
|
| 193 |
+
if action_type not in ACTION_TYPES:
|
| 194 |
+
return fallback_action(obs)
|
| 195 |
+
|
| 196 |
+
return SupportTriageAction(action_type=action_type, value=value)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def run_task(client: OpenAI, task_id: str) -> EpisodeReport:
|
| 200 |
+
env = SupportTriageEnvironment()
|
| 201 |
+
obs = env.reset(task_id=task_id)
|
| 202 |
+
|
| 203 |
+
for step in range(1, MAX_STEPS + 1):
|
| 204 |
+
if obs.done:
|
| 205 |
+
break
|
| 206 |
+
|
| 207 |
+
user_prompt = build_user_prompt(step, obs)
|
| 208 |
+
|
| 209 |
+
try:
|
| 210 |
+
completion = client.chat.completions.create(
|
| 211 |
+
model=MODEL_NAME,
|
| 212 |
+
messages=[
|
| 213 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 214 |
+
{"role": "user", "content": user_prompt},
|
| 215 |
+
],
|
| 216 |
+
temperature=TEMPERATURE,
|
| 217 |
+
max_tokens=MAX_TOKENS,
|
| 218 |
+
stream=False,
|
| 219 |
+
)
|
| 220 |
+
response_text = completion.choices[0].message.content or ""
|
| 221 |
+
except Exception as exc: # noqa: BLE001
|
| 222 |
+
print(f"Model call failed on {task_id} step {step}: {exc}. Falling back to heuristic.")
|
| 223 |
+
response_text = ""
|
| 224 |
+
|
| 225 |
+
action = parse_action(response_text, obs)
|
| 226 |
+
obs = env.step(action)
|
| 227 |
+
|
| 228 |
+
print(
|
| 229 |
+
f"[{task_id}] step={step} action={action.action_type}:{action.value} "
|
| 230 |
+
f"reward={obs.reward:+.3f} done={obs.done}"
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
if obs.done:
|
| 234 |
+
break
|
| 235 |
+
|
| 236 |
+
final = env.evaluate()
|
| 237 |
+
return EpisodeReport(
|
| 238 |
+
task_id=task_id,
|
| 239 |
+
steps=int(final["steps"]),
|
| 240 |
+
score=float(final["score"]),
|
| 241 |
+
breakdown=dict(final["breakdown"]),
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def main() -> None:
|
| 246 |
+
if not API_KEY:
|
| 247 |
+
raise RuntimeError("Missing HF_TOKEN (or OPENAI_API_KEY fallback) environment variable.")
|
| 248 |
+
if not MODEL_NAME:
|
| 249 |
+
raise RuntimeError("Missing MODEL_NAME environment variable.")
|
| 250 |
+
|
| 251 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 252 |
+
|
| 253 |
+
reports = [run_task(client, task_id) for task_id in TASK_ORDER]
|
| 254 |
+
|
| 255 |
+
avg_score = sum(r.score for r in reports) / len(reports)
|
| 256 |
+
print("\n=== Baseline Scores ===")
|
| 257 |
+
for report in reports:
|
| 258 |
+
print(f"{report.task_id}: score={report.score:.4f} steps={report.steps}")
|
| 259 |
+
print(f"Average score: {avg_score:.4f}")
|
| 260 |
+
|
| 261 |
+
payload = {
|
| 262 |
+
"model": MODEL_NAME,
|
| 263 |
+
"api_base_url": API_BASE_URL,
|
| 264 |
+
"average_score": round(avg_score, 4),
|
| 265 |
+
"tasks": [asdict(report) for report in reports],
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
with open("baseline_scores.json", "w", encoding="utf-8") as f:
|
| 269 |
+
json.dump(payload, f, indent=2)
|
| 270 |
+
|
| 271 |
+
print("Saved baseline_scores.json")
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
if __name__ == "__main__":
|
| 275 |
+
main()
|
models.cpython-313.pyc
ADDED
|
Binary file (2.67 kB). View file
|
|
|
openenv.yaml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: support-triage-openenv
|
| 3 |
+
version: "0.1.0"
|
| 4 |
+
description: "Customer support triage environment for OpenEnv"
|
| 5 |
+
type: space
|
| 6 |
+
runtime: fastapi
|
| 7 |
+
app: server.app:app
|
| 8 |
+
port: 8000
|
pyproject.toml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=68.0", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "support-triage-openenv"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "OpenEnv environment for customer support triage and response drafting"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.10"
|
| 11 |
+
authors = [{ name = "Hackathon Team" }]
|
| 12 |
+
dependencies = [
|
| 13 |
+
"openenv-core>=0.2.0",
|
| 14 |
+
"openai>=1.35.0",
|
| 15 |
+
"pydantic>=2.7.0",
|
| 16 |
+
"uvicorn>=0.30.0",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
[project.scripts]
|
| 20 |
+
server = "server.app:main"
|
| 21 |
+
|
| 22 |
+
[tool.setuptools.packages.find]
|
| 23 |
+
include = ["support_triage_env*", "server*"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core>=0.2.0
|
| 2 |
+
openai>=1.35.0
|
| 3 |
+
pydantic>=2.7.0
|
| 4 |
+
uvicorn>=0.30.0
|
tasks.cpython-313.pyc
ADDED
|
Binary file (3.81 kB). View file
|
|
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|