Commit ·
41bb27c
0
Parent(s):
Initialize OpenEnv Customer Support Project
Browse files- .gitignore +23 -0
- Dockerfile +18 -0
- README.md +83 -0
- app/__init__.py +1 -0
- app/env.py +167 -0
- app/grader.py +80 -0
- app/main.py +99 -0
- app/models.py +14 -0
- app/tasks.py +23 -0
- frontend +1 -0
- inference.py +110 -0
- openenv.yaml +8 -0
- push_to_hf.py +41 -0
- requirements.txt +4 -0
.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environments
|
| 2 |
+
.env
|
| 3 |
+
.venv/
|
| 4 |
+
venv/
|
| 5 |
+
ENV/
|
| 6 |
+
env/
|
| 7 |
+
|
| 8 |
+
# Python
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*$py.class
|
| 12 |
+
*.so
|
| 13 |
+
|
| 14 |
+
# Node
|
| 15 |
+
node_modules/
|
| 16 |
+
|
| 17 |
+
# Next.js
|
| 18 |
+
.next/
|
| 19 |
+
out/
|
| 20 |
+
build/
|
| 21 |
+
|
| 22 |
+
# Mac
|
| 23 |
+
.DS_Store
|
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
# Hugging Face explicitly requires a non-root user mapped to 1000
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
USER user
|
| 6 |
+
ENV HOME=/home/user \
|
| 7 |
+
PATH=/home/user/.local/bin:$PATH
|
| 8 |
+
|
| 9 |
+
WORKDIR $HOME/app
|
| 10 |
+
|
| 11 |
+
COPY --chown=user requirements.txt .
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
COPY --chown=user . $HOME/app
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI Customer Support Decision-Making Environment (OpenEnv)
|
| 2 |
+
|
| 3 |
+
## Environment Description
|
| 4 |
+
This is a mathematically constrained OpenEnv Customer Support simulation. It simulates real-world workflows encompassing complex decision-making processes for an AI customer support agent. The environment maps dynamic ticket properties (sentiment, issues) and evaluates sequential agent logic to determine correct problem resolutions across constrained pipelines.
|
| 5 |
+
|
| 6 |
+
## Action Space
|
| 7 |
+
The AI agent interacts with the environment sequentially by emitting precise JSON payloads defined dynamically inside string schemas.
|
| 8 |
+
|
| 9 |
+
**Available Actions:**
|
| 10 |
+
- `classify_ticket`: Evaluates string. Payload mapping: `{"classification": "refund" | "general_inquiry" | "login_issue" | "feedback"}`
|
| 11 |
+
- `assign_priority`: Assigns structural tier. Payload mapping: `{"priority": "low" | "medium" | "high"}`
|
| 12 |
+
- `generate_response`: Drafts contextual replies to tickets. Enforces checking `sentiment` logic constraint requirements (e.g., apologies). Payload mapping: `{"response": "<text>"}`
|
| 13 |
+
- `escalate`: Maps directly to manual priority bypass. Empty payload: `{}`
|
| 14 |
+
- `resolve`: Pipeline completion call mapped to constraints checking complete scenario data. Empty payload: `{}`
|
| 15 |
+
|
| 16 |
+
## Observation Space
|
| 17 |
+
State mapping delivers deterministic JSON payload snapshots back to the interacting agent across the workflow sequence.
|
| 18 |
+
|
| 19 |
+
**State Fields Include:**
|
| 20 |
+
- `ticket_text`: Read-only simulated user input sequence string.
|
| 21 |
+
- `sentiment`: Customer evaluation mapped (e.g., `angry`, `neutral`, `happy`).
|
| 22 |
+
- `priority`: Active assignment tracker (`null` until categorized).
|
| 23 |
+
- `status`: Global environment variable matching `open` or `closed` lifecycle mapping.
|
| 24 |
+
- `steps_taken`: Numeric iteration tracker mapping execution costs.
|
| 25 |
+
- `classification`: Evaluated class.
|
| 26 |
+
- `response`: Text payload caching assigned generated scripts.
|
| 27 |
+
|
| 28 |
+
## Task Descriptions
|
| 29 |
+
The environment exposes explicit evaluation goals mapping standard deterministic task metrics (`0.0` - `1.0`):
|
| 30 |
+
|
| 31 |
+
- **EASY** (`task_easy_1`): Only classify the issue correctly.
|
| 32 |
+
- **MEDIUM** (`task_medium_1`): Classify the ticket completely and generate an appropriate text response (measuring conditional logic like providing empathy for an explicitly 'angry' sentiment).
|
| 33 |
+
- **HARD** (`task_hard_1`): The agent structurally navigates the entire pipeline: 1. Correctly classify mapping 2. Assign priority mappings accurately 3. Emit valid appropriate responses 4. Correctly resolve the closed ticket state.
|
| 34 |
+
|
| 35 |
+
## Reward Design
|
| 36 |
+
Densely structured continuous math map designed for precision agent behavior shaping:
|
| 37 |
+
|
| 38 |
+
**Sequential Rewards (Partial Progress):**
|
| 39 |
+
- **`+0.3`**: Correct ticket classification.
|
| 40 |
+
- **`+0.2`**: Accurate priority assignment.
|
| 41 |
+
- **`+0.2`**: Successful appropriate response generation.
|
| 42 |
+
- **`+0.3`**: Officially verified constraint-approved ticket structural resolution.
|
| 43 |
+
|
| 44 |
+
**Penalties:**
|
| 45 |
+
- **`-0.2`**: Punishes specifically wrong actions (assigning the wrong classification, failing empathy mappings, bypassing procedures).
|
| 46 |
+
- **`-0.1`**: Punishment for immediately duplicating/repeating explicit prior script actions iteratively.
|
| 47 |
+
- **`-0.1`**: Incurred purely per sequential step count scaling execution delays to promote processing speed organically.
|
| 48 |
+
|
| 49 |
+
## Setup Instructions
|
| 50 |
+
Use pip explicitly isolating a lightweight requirement instance natively natively configuring local systems:
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
pip install -r requirements.txt
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
Start the interactive API server natively linking locally:
|
| 57 |
+
```bash
|
| 58 |
+
uvicorn app.main:app --host 0.0.0.0 --port 7860
|
| 59 |
+
```
|
| 60 |
+
### Running Via Docker (Hugging Face Ready)
|
| 61 |
+
This build incorporates standard non-root target configs enabling seamless capability deploys onto HF.
|
| 62 |
+
```bash
|
| 63 |
+
docker build -t openenv-support .
|
| 64 |
+
docker run -p 7860:7860 openenv-support
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
## How to Run Inference (Hugging Face / OpenAI)
|
| 68 |
+
Trigger our automated evaluate pipeline structurally invoking evaluations cleanly recording constraints logic directly via precise print hooks dynamically validating execution.
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
export MODEL_NAME="meta-llama/Meta-Llama-3-8B-Instruct"
|
| 72 |
+
export HF_TOKEN="hf_..."
|
| 73 |
+
python inference.py --task task_hard_1
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## Push to Hugging Face Hub
|
| 77 |
+
A helper script allows you to rapidly publish your entire codebase straight out into your Hugging Face space repository.
|
| 78 |
+
|
| 79 |
+
Run the automated publisher to upload this space seamlessly into `vivekvish2004/openenv-customer-support`:
|
| 80 |
+
```bash
|
| 81 |
+
python push_to_hf.py
|
| 82 |
+
```
|
| 83 |
+
*(This gracefully detects existing tokens and ignores large bloat directories like node_modules to ensure a fast, clean upload!)*
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Init module."""
|
app/env.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import copy
|
| 3 |
+
from typing import Tuple
|
| 4 |
+
from app.models import Action, Observation, Reward
|
| 5 |
+
|
| 6 |
+
# Pre-defined real-world support tickets for the workflow simulation
|
| 7 |
+
SCENARIOS = [
|
| 8 |
+
{
|
| 9 |
+
"ticket_text": "I bought a premium subscription but it's not working. I want my money back right now!",
|
| 10 |
+
"sentiment": "angry",
|
| 11 |
+
"expected_classification": "refund",
|
| 12 |
+
"expected_priority": "high",
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"ticket_text": "How do I change my profile picture? I tried looking in the settings but couldn't find it.",
|
| 16 |
+
"sentiment": "neutral",
|
| 17 |
+
"expected_classification": "general_inquiry",
|
| 18 |
+
"expected_priority": "low",
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"ticket_text": "I can't log into my account, and I have a huge presentation in 10 minutes that needs the data!",
|
| 22 |
+
"sentiment": "angry",
|
| 23 |
+
"expected_classification": "login_issue",
|
| 24 |
+
"expected_priority": "high",
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"ticket_text": "Thank you so much for fixing the bug! Everything is running perfectly today.",
|
| 28 |
+
"sentiment": "happy",
|
| 29 |
+
"expected_classification": "feedback",
|
| 30 |
+
"expected_priority": "low",
|
| 31 |
+
}
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
class CustomerSupportEnv:
|
| 35 |
+
def __init__(self):
|
| 36 |
+
"""Initialize the AI Customer Support environment."""
|
| 37 |
+
self.current_state = None
|
| 38 |
+
self.ground_truth = None
|
| 39 |
+
self.max_steps = 10
|
| 40 |
+
self.current_step = 0
|
| 41 |
+
self.actions_taken = set()
|
| 42 |
+
|
| 43 |
+
def reset(self) -> Observation:
|
| 44 |
+
"""Reset the environment to a new random customer support ticket."""
|
| 45 |
+
self.ground_truth = random.choice(SCENARIOS)
|
| 46 |
+
self.current_step = 0
|
| 47 |
+
self.actions_taken = set()
|
| 48 |
+
|
| 49 |
+
# State strictly matching requirements
|
| 50 |
+
self.current_state = {
|
| 51 |
+
"ticket_text": self.ground_truth["ticket_text"],
|
| 52 |
+
"sentiment": self.ground_truth["sentiment"],
|
| 53 |
+
"priority": None, # AI will assign (low / medium / high)
|
| 54 |
+
"status": "open", # Track ticket lifecycle
|
| 55 |
+
"steps_taken": 0,
|
| 56 |
+
"classification": None, # AI will classify
|
| 57 |
+
"response": None # AI's generated reply
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
return self.state()
|
| 61 |
+
|
| 62 |
+
def state(self) -> Observation:
|
| 63 |
+
"""Return the current state of the environment."""
|
| 64 |
+
return Observation(
|
| 65 |
+
state=self.current_state,
|
| 66 |
+
info={"max_steps": self.max_steps}
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
def step(self, action: Action) -> Tuple[Observation, Reward, bool, dict]:
|
| 70 |
+
"""Apply an action to process the ticket workflow and evaluate using dense rewards."""
|
| 71 |
+
self.current_step += 1
|
| 72 |
+
self.current_state["steps_taken"] += 1
|
| 73 |
+
|
| 74 |
+
reward_val = 0.0
|
| 75 |
+
is_terminal = False
|
| 76 |
+
message = ""
|
| 77 |
+
|
| 78 |
+
a_type = action.action_type
|
| 79 |
+
payload = action.payload
|
| 80 |
+
|
| 81 |
+
# Ensure ticket is open to take steps
|
| 82 |
+
if self.current_state["status"] == "closed":
|
| 83 |
+
return self.state(), Reward(value=-0.2, is_terminal=True), True, {"error": "Ticket is already closed"}
|
| 84 |
+
|
| 85 |
+
# Penalize repeated actions (-0.1)
|
| 86 |
+
if a_type in self.actions_taken:
|
| 87 |
+
reward_val -= 0.1
|
| 88 |
+
message += "Repeated action penalty. "
|
| 89 |
+
else:
|
| 90 |
+
self.actions_taken.add(a_type)
|
| 91 |
+
|
| 92 |
+
# Penalize taking too many steps (-0.1 step cost)
|
| 93 |
+
reward_val -= 0.1
|
| 94 |
+
|
| 95 |
+
# Simulate the structured workflows with standard explicit dense reward triggers
|
| 96 |
+
if a_type == "classify_ticket":
|
| 97 |
+
category = payload.get("classification")
|
| 98 |
+
self.current_state["classification"] = category
|
| 99 |
+
if category == self.ground_truth["expected_classification"]:
|
| 100 |
+
reward_val += 0.3 # Correct classification
|
| 101 |
+
message += "Correct classification."
|
| 102 |
+
else:
|
| 103 |
+
reward_val -= 0.2 # Wrong action penalty
|
| 104 |
+
message += "Incorrect classification."
|
| 105 |
+
|
| 106 |
+
elif a_type == "assign_priority":
|
| 107 |
+
level = payload.get("priority")
|
| 108 |
+
self.current_state["priority"] = level
|
| 109 |
+
if level == self.ground_truth["expected_priority"]:
|
| 110 |
+
reward_val += 0.2 # Correct priority
|
| 111 |
+
message += "Correct priority assignment."
|
| 112 |
+
else:
|
| 113 |
+
reward_val -= 0.2 # Wrong action penalty
|
| 114 |
+
message += "Suboptimal priority assignment."
|
| 115 |
+
|
| 116 |
+
elif a_type == "generate_response":
|
| 117 |
+
response_text = payload.get("response")
|
| 118 |
+
self.current_state["response"] = response_text
|
| 119 |
+
|
| 120 |
+
if self.current_state["sentiment"] == "angry" and "sorry" not in response_text.lower():
|
| 121 |
+
reward_val -= 0.2 # Wrong action penalty (lacked empathy)
|
| 122 |
+
message += "Response lacked empathy for an angry customer."
|
| 123 |
+
else:
|
| 124 |
+
reward_val += 0.2 # Useful response
|
| 125 |
+
message += "Useful response generated."
|
| 126 |
+
|
| 127 |
+
elif a_type == "escalate":
|
| 128 |
+
if self.current_state["sentiment"] == "angry" and self.ground_truth["expected_priority"] == "high":
|
| 129 |
+
reward_val += 0.3 # Act as successful resolution of urgent cases
|
| 130 |
+
message += "Successfully escalated urgent issue."
|
| 131 |
+
else:
|
| 132 |
+
reward_val -= 0.2 # Wrong action penalty
|
| 133 |
+
message += "Wrong action: Unnecessary escalation for low-priority issue."
|
| 134 |
+
self.current_state["status"] = "closed"
|
| 135 |
+
is_terminal = True
|
| 136 |
+
|
| 137 |
+
elif a_type == "resolve":
|
| 138 |
+
if not self.current_state["classification"] or not self.current_state["priority"] or not self.current_state["response"]:
|
| 139 |
+
reward_val -= 0.2 # Wrong action penalty
|
| 140 |
+
message += "Wrong action: Attempted resolution without completing workflow steps."
|
| 141 |
+
else:
|
| 142 |
+
reward_val += 0.3 # Resolution success
|
| 143 |
+
message += "Ticket resolved successfully."
|
| 144 |
+
|
| 145 |
+
self.current_state["status"] = "closed"
|
| 146 |
+
is_terminal = True
|
| 147 |
+
|
| 148 |
+
else:
|
| 149 |
+
reward_val -= 0.2 # Wrong action explicitly
|
| 150 |
+
message += f"Wrong action: Unknown action '{a_type}'."
|
| 151 |
+
|
| 152 |
+
# Failsafe limit checking
|
| 153 |
+
if self.current_step >= self.max_steps and not is_terminal:
|
| 154 |
+
is_terminal = True
|
| 155 |
+
if self.current_state["status"] == "open":
|
| 156 |
+
self.current_state["status"] = "closed"
|
| 157 |
+
|
| 158 |
+
obs = self.state()
|
| 159 |
+
reward = Reward(value=reward_val, is_terminal=is_terminal)
|
| 160 |
+
|
| 161 |
+
info = {
|
| 162 |
+
"message": message,
|
| 163 |
+
"expected_classification": self.ground_truth["expected_classification"],
|
| 164 |
+
"expected_priority": self.ground_truth["expected_priority"]
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
return obs, reward, is_terminal, info
|
app/grader.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, List
|
| 2 |
+
|
| 3 |
+
def score_easy(state: Dict[str, Any], ground_truth: Dict[str, Any]) -> float:
|
| 4 |
+
"""EASY: Only classify issue correctly (1.0 or 0.0)"""
|
| 5 |
+
if state.get("classification") == ground_truth.get("expected_classification"):
|
| 6 |
+
return 1.0
|
| 7 |
+
return 0.0
|
| 8 |
+
|
| 9 |
+
def score_medium(state: Dict[str, Any], ground_truth: Dict[str, Any]) -> float:
|
| 10 |
+
"""MEDIUM: classify (0.5) + generate correct response (0.5)"""
|
| 11 |
+
score = 0.0
|
| 12 |
+
|
| 13 |
+
# 1. Classification
|
| 14 |
+
if state.get("classification") == ground_truth.get("expected_classification"):
|
| 15 |
+
score += 0.5
|
| 16 |
+
|
| 17 |
+
# 2. Response
|
| 18 |
+
response = state.get("response")
|
| 19 |
+
if response:
|
| 20 |
+
# Check empathy constraint for angry customers
|
| 21 |
+
if state.get("sentiment") == "angry" and "sorry" not in response.lower():
|
| 22 |
+
pass # Missing empathy
|
| 23 |
+
else:
|
| 24 |
+
score += 0.5
|
| 25 |
+
|
| 26 |
+
return score
|
| 27 |
+
|
| 28 |
+
def score_hard(state: Dict[str, Any], ground_truth: Dict[str, Any]) -> float:
|
| 29 |
+
"""HARD: classify (0.25) -> prioritize (0.25) -> respond (0.25) -> resolve (0.25)"""
|
| 30 |
+
score = 0.0
|
| 31 |
+
|
| 32 |
+
# 1. Classification
|
| 33 |
+
if state.get("classification") == ground_truth.get("expected_classification"):
|
| 34 |
+
score += 0.25
|
| 35 |
+
|
| 36 |
+
# 2. Prioritize
|
| 37 |
+
if state.get("priority") == ground_truth.get("expected_priority"):
|
| 38 |
+
score += 0.25
|
| 39 |
+
|
| 40 |
+
# 3. Respond
|
| 41 |
+
response = state.get("response")
|
| 42 |
+
if response:
|
| 43 |
+
if state.get("sentiment") == "angry" and "sorry" not in response.lower():
|
| 44 |
+
pass # Missing empathy
|
| 45 |
+
else:
|
| 46 |
+
score += 0.25
|
| 47 |
+
|
| 48 |
+
# 4. Resolve
|
| 49 |
+
if state.get("status") == "closed":
|
| 50 |
+
score += 0.25
|
| 51 |
+
|
| 52 |
+
return score
|
| 53 |
+
|
| 54 |
+
def score_episode(task_difficulty: str, history: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> float:
|
| 55 |
+
"""
|
| 56 |
+
Deterministic scoring logic for evaluated episode. Returns a float between 0.0 and 1.0.
|
| 57 |
+
"""
|
| 58 |
+
if not history:
|
| 59 |
+
return 0.0
|
| 60 |
+
|
| 61 |
+
# Analyze the final step in the history
|
| 62 |
+
final_step = history[-1]
|
| 63 |
+
|
| 64 |
+
# Support various OpenEnv standard observation dictionary shapes
|
| 65 |
+
if "observation" in final_step and isinstance(final_step["observation"], dict) and "state" in final_step["observation"]:
|
| 66 |
+
final_state = final_step["observation"]["state"]
|
| 67 |
+
elif "state" in final_step:
|
| 68 |
+
final_state = final_step["state"]
|
| 69 |
+
else:
|
| 70 |
+
final_state = final_step
|
| 71 |
+
|
| 72 |
+
diff = task_difficulty.upper()
|
| 73 |
+
if diff == "EASY":
|
| 74 |
+
return score_easy(final_state, ground_truth)
|
| 75 |
+
elif diff == "MEDIUM":
|
| 76 |
+
return score_medium(final_state, ground_truth)
|
| 77 |
+
elif diff == "HARD":
|
| 78 |
+
return score_hard(final_state, ground_truth)
|
| 79 |
+
|
| 80 |
+
return 0.0
|
app/main.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Query
|
| 2 |
+
from app.env import CustomerSupportEnv
|
| 3 |
+
from app.models import Action, Observation
|
| 4 |
+
from app.tasks import get_all_tasks
|
| 5 |
+
from app.grader import score_episode
|
| 6 |
+
|
| 7 |
+
app = FastAPI(title="OpenEnv Customer Support API")
|
| 8 |
+
|
| 9 |
+
# Global singleton for the environment state lifecycle
|
| 10 |
+
env_instance = CustomerSupportEnv()
|
| 11 |
+
|
| 12 |
+
@app.get("/")
|
| 13 |
+
def read_root():
|
| 14 |
+
return {"message": "Welcome to OpenEnv Customer Support API"}
|
| 15 |
+
|
| 16 |
+
@app.get("/reset", response_model=Observation)
|
| 17 |
+
def reset_env():
|
| 18 |
+
"""Reset the environment and yield the initial observation."""
|
| 19 |
+
return env_instance.reset()
|
| 20 |
+
|
| 21 |
+
@app.post("/step")
|
| 22 |
+
def step_env(action: Action):
|
| 23 |
+
"""Submit an action schema to process the environment workflow."""
|
| 24 |
+
if env_instance.current_state is None:
|
| 25 |
+
raise HTTPException(status_code=400, detail="Environment is not initialized. Please call /reset first.")
|
| 26 |
+
|
| 27 |
+
obs, reward, done, info = env_instance.step(action)
|
| 28 |
+
return {
|
| 29 |
+
"observation": obs.dict(),
|
| 30 |
+
"reward": reward.dict(),
|
| 31 |
+
"done": done,
|
| 32 |
+
"info": info
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
@app.get("/state", response_model=Observation)
|
| 36 |
+
def get_state():
|
| 37 |
+
"""Retrieve the current deterministic state of the environment."""
|
| 38 |
+
if env_instance.current_state is None:
|
| 39 |
+
raise HTTPException(status_code=400, detail="Environment is not initialized. Please call /reset first.")
|
| 40 |
+
return env_instance.state()
|
| 41 |
+
|
| 42 |
+
@app.get("/tasks")
|
| 43 |
+
def get_tasks():
|
| 44 |
+
"""Retrieve all available tasks mapped in the environment."""
|
| 45 |
+
return {"tasks": get_all_tasks()}
|
| 46 |
+
|
| 47 |
+
@app.get("/grader")
|
| 48 |
+
def run_grader(task_id: str = Query(..., description="The matching task ID to score against (e.g. 'task_easy_1')")):
|
| 49 |
+
"""Grade the current state of the ticket interaction strictly using the deterministic grader."""
|
| 50 |
+
if env_instance.current_state is None:
|
| 51 |
+
raise HTTPException(status_code=400, detail="Environment is not initialized. Please call /reset first.")
|
| 52 |
+
|
| 53 |
+
# Map task ID to its logical difficulty tier
|
| 54 |
+
tasks = get_all_tasks()
|
| 55 |
+
task_diff = "EASY"
|
| 56 |
+
for t in tasks:
|
| 57 |
+
if t["id"] == task_id:
|
| 58 |
+
task_diff = t["difficulty"]
|
| 59 |
+
break
|
| 60 |
+
|
| 61 |
+
# Mock history extraction mapped to grader logic expecting the final state
|
| 62 |
+
mock_history = [{"state": env_instance.current_state}]
|
| 63 |
+
|
| 64 |
+
score = score_episode(task_diff, mock_history, env_instance.ground_truth)
|
| 65 |
+
return {"task_id": task_id, "score": score}
|
| 66 |
+
|
| 67 |
+
@app.get("/baseline")
|
| 68 |
+
def run_baseline():
|
| 69 |
+
"""Execute a hardcoded 'perfect' baseline workflow on the current scenario to trace rewards."""
|
| 70 |
+
if env_instance.current_state is None:
|
| 71 |
+
env_instance.reset()
|
| 72 |
+
|
| 73 |
+
gt = env_instance.ground_truth
|
| 74 |
+
|
| 75 |
+
baseline_sequence = [
|
| 76 |
+
{"action_type": "classify_ticket", "payload": {"classification": gt["expected_classification"]}},
|
| 77 |
+
{"action_type": "assign_priority", "payload": {"priority": gt["expected_priority"]}},
|
| 78 |
+
{"action_type": "generate_response", "payload": {"response": "I am so sorry for the inconvenience. That is completely fixed now."}},
|
| 79 |
+
{"action_type": "resolve", "payload": {}}
|
| 80 |
+
]
|
| 81 |
+
|
| 82 |
+
trace_results = []
|
| 83 |
+
|
| 84 |
+
for step_logic in baseline_sequence:
|
| 85 |
+
action = Action(**step_logic)
|
| 86 |
+
obs, reward, done, info = env_instance.step(action)
|
| 87 |
+
trace_results.append({
|
| 88 |
+
"action": step_logic,
|
| 89 |
+
"reward_earned": reward.value,
|
| 90 |
+
"done": done
|
| 91 |
+
})
|
| 92 |
+
if done:
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
return {
|
| 96 |
+
"message": "Baseline ideal sequence successfully executed against ground truth.",
|
| 97 |
+
"trace": trace_results,
|
| 98 |
+
"final_state": env_instance.current_state
|
| 99 |
+
}
|
app/models.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Any, Optional, Dict
|
| 3 |
+
|
| 4 |
+
class Action(BaseModel):
|
| 5 |
+
action_type: str
|
| 6 |
+
payload: Dict[str, Any]
|
| 7 |
+
|
| 8 |
+
class Observation(BaseModel):
|
| 9 |
+
state: Dict[str, Any]
|
| 10 |
+
info: Optional[Dict[str, Any]] = None
|
| 11 |
+
|
| 12 |
+
class Reward(BaseModel):
|
| 13 |
+
value: float
|
| 14 |
+
is_terminal: bool
|
app/tasks.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
|
| 3 |
+
TASKS = [
|
| 4 |
+
{
|
| 5 |
+
"id": "task_easy_1",
|
| 6 |
+
"difficulty": "EASY",
|
| 7 |
+
"objective": "Only classify the issue correctly. You do not need to assign priority or resolve the ticket.",
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"id": "task_medium_1",
|
| 11 |
+
"difficulty": "MEDIUM",
|
| 12 |
+
"objective": "Classify the ticket issue correctly and generate an appropriate response. If the customer is angry, ensure the response includes empathy (e.g., 'sorry').",
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"id": "task_hard_1",
|
| 16 |
+
"difficulty": "HARD",
|
| 17 |
+
"objective": "Complete the full support workflow: 1. Correctly classify the issue, 2. Accurately assign priority, 3. Generate a correct, empathetic response, and 4. Officially resolve (close) the ticket.",
|
| 18 |
+
}
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
def get_all_tasks() -> List[Dict[str, str]]:
|
| 22 |
+
"""Retrieve list of all registered tasks."""
|
| 23 |
+
return TASKS
|
frontend
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit db65d9cb06efad6a3c9a5b6c0f0d3d38822bcf0f
|
inference.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import argparse
|
| 4 |
+
from typing import Any
|
| 5 |
+
from huggingface_hub import InferenceClient
|
| 6 |
+
from app.env import CustomerSupportEnv
|
| 7 |
+
from app.models import Action
|
| 8 |
+
|
| 9 |
+
def evaluate_llm(task_id: str):
|
| 10 |
+
"""Deterministically evaluate a Hugging Face LLM agent against the support environment."""
|
| 11 |
+
# Strict dynamic config parsing against Hugging Face requirements
|
| 12 |
+
model_name = os.environ.get("MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 13 |
+
hf_token = os.environ.get("HF_TOKEN", "")
|
| 14 |
+
|
| 15 |
+
# Initialize Strict standard OpenEnv tracking log
|
| 16 |
+
print(f"[START] task={task_id} env=customer-support-env model={model_name}")
|
| 17 |
+
|
| 18 |
+
if not hf_token:
|
| 19 |
+
print("Warning: HF_TOKEN is explicitly empty. Hugging Face Inference may fail or rate limit.")
|
| 20 |
+
|
| 21 |
+
# Execute via Hugging Face Hub natively mapped to their architecture
|
| 22 |
+
client = InferenceClient(
|
| 23 |
+
model=model_name,
|
| 24 |
+
token=hf_token
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
env = CustomerSupportEnv()
|
| 28 |
+
obs = env.reset()
|
| 29 |
+
|
| 30 |
+
rewards_history = []
|
| 31 |
+
|
| 32 |
+
# Inject formal logic constraints prompting reliable JSON interactions
|
| 33 |
+
system_prompt = """You are a highly structured AI customer support agent resolving a ticket pipeline.
|
| 34 |
+
Available actions list:
|
| 35 |
+
1. classify_ticket (payload format: {"classification": "refund" | "general_inquiry" | "login_issue" | "feedback"})
|
| 36 |
+
2. assign_priority (payload format: {"priority": "low" | "medium" | "high"})
|
| 37 |
+
3. generate_response (payload format: {"response": "<text>"})
|
| 38 |
+
4. escalate (payload format: {})
|
| 39 |
+
5. resolve (payload format: {})
|
| 40 |
+
|
| 41 |
+
You MUST return ONLY a fully valid JSON format mapping this dict schema:
|
| 42 |
+
{
|
| 43 |
+
"action_type": "<action_name>",
|
| 44 |
+
"payload": { ... }
|
| 45 |
+
}"""
|
| 46 |
+
|
| 47 |
+
done = False
|
| 48 |
+
step_count = 0
|
| 49 |
+
conversation_messages = [
|
| 50 |
+
{"role": "system", "content": system_prompt}
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
# Interaction Loop
|
| 54 |
+
while not done and step_count < env.max_steps:
|
| 55 |
+
step_count += 1
|
| 56 |
+
|
| 57 |
+
obs_stringified = json.dumps(obs.dict()["state"])
|
| 58 |
+
conversation_messages.append({"role": "user", "content": f"Current Ticket State: {obs_stringified}\nProvide your next action strictly in JSON:"})
|
| 59 |
+
|
| 60 |
+
error_msg = ""
|
| 61 |
+
action_type = "unknown"
|
| 62 |
+
reward_val = 0.0
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
# Deterministic, reproducible call explicitly leveraging HF formats
|
| 66 |
+
response = client.chat_completion(
|
| 67 |
+
messages=conversation_messages,
|
| 68 |
+
temperature=0.01, # Hugging Face often crashes on explicitly 0.0 depending on the endpoint model deployed
|
| 69 |
+
max_tokens=256,
|
| 70 |
+
response_format={"type": "json"} if hasattr(client, "chat_completion") else None
|
| 71 |
+
# Note: Not all HF hosted models support automatic JSON constraints, but instructions prompt for it natively.
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
action_text = response.choices[0].message.content
|
| 75 |
+
action_data = json.loads(action_text)
|
| 76 |
+
|
| 77 |
+
action_type = action_data.get("action_type", "unknown")
|
| 78 |
+
action = Action(**action_data)
|
| 79 |
+
|
| 80 |
+
# Step the mathematical environment
|
| 81 |
+
obs, reward, done, info = env.step(action)
|
| 82 |
+
reward_val = reward.value
|
| 83 |
+
|
| 84 |
+
# Provide reflection feedback to AI
|
| 85 |
+
conversation_messages.append({"role": "assistant", "content": action_text})
|
| 86 |
+
conversation_messages.append({"role": "system", "content": f"Action result mapping: Reward={reward_val}, Done={done}, Info={json.dumps(info)}"})
|
| 87 |
+
|
| 88 |
+
except Exception as e:
|
| 89 |
+
error_msg = str(e).replace("\n", " ").strip()
|
| 90 |
+
reward_val = -1.0
|
| 91 |
+
done = True
|
| 92 |
+
|
| 93 |
+
rewards_history.append(reward_val)
|
| 94 |
+
|
| 95 |
+
# Output Explicit formatted log
|
| 96 |
+
done_str = "true" if done else "false"
|
| 97 |
+
print(f"[STEP] step={step_count} action={action_type} reward={reward_val:.2f} done={done_str} error={error_msg}")
|
| 98 |
+
|
| 99 |
+
# Output Explicit formatted termination log
|
| 100 |
+
# True metric determined by pipeline resolution logic
|
| 101 |
+
success_str = "true" if (env.current_state and env.current_state.get("status") == "closed" and rewards_history and rewards_history[-1] > 0) else "false"
|
| 102 |
+
r_mapped = ",".join(f"{r:.2f}" for r in rewards_history)
|
| 103 |
+
print(f"[END] success={success_str} steps={step_count} rewards={r_mapped}")
|
| 104 |
+
|
| 105 |
+
if __name__ == "__main__":
|
| 106 |
+
parser = argparse.ArgumentParser()
|
| 107 |
+
parser.add_argument("--task", type=str, default="task_hard_1", help="Task ID sequence to execute logic against.")
|
| 108 |
+
args = parser.parse_args()
|
| 109 |
+
|
| 110 |
+
evaluate_llm(args.task)
|
openenv.yaml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "customer-support-env"
|
| 2 |
+
version: "1.0.0"
|
| 3 |
+
description: "Customer Support Environment for OpenEnv"
|
| 4 |
+
|
| 5 |
+
environment:
|
| 6 |
+
type: "custom"
|
| 7 |
+
package: "app.env"
|
| 8 |
+
class: "CustomerSupportEnv"
|
push_to_hf.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import login, upload_folder
|
| 3 |
+
|
| 4 |
+
def main():
|
| 5 |
+
print("Welcome to the Hugging Face Upload Hook!")
|
| 6 |
+
|
| 7 |
+
# Check if we already have the token in the environment to avoid blocking if possible
|
| 8 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 9 |
+
|
| 10 |
+
if hf_token:
|
| 11 |
+
print("HF_TOKEN detected. Logging in securely...")
|
| 12 |
+
login(token=hf_token)
|
| 13 |
+
else:
|
| 14 |
+
print("No HF_TOKEN found in environment variables.")
|
| 15 |
+
print("Please securely provide your Hugging Face Access Token when prompted:")
|
| 16 |
+
login()
|
| 17 |
+
|
| 18 |
+
repo_target = "vivekvish2004/openenv-customer-support"
|
| 19 |
+
print(f"\nPushing current workspace files to Hugging Face Model Repository -> {repo_target}")
|
| 20 |
+
|
| 21 |
+
# Push the model files
|
| 22 |
+
upload_folder(
|
| 23 |
+
folder_path=".",
|
| 24 |
+
repo_id=repo_target,
|
| 25 |
+
repo_type="model",
|
| 26 |
+
ignore_patterns=[
|
| 27 |
+
"frontend/node_modules/**",
|
| 28 |
+
"frontend/.next/**",
|
| 29 |
+
"__pycache__/**",
|
| 30 |
+
".git/**",
|
| 31 |
+
".venv/**",
|
| 32 |
+
"venv/**",
|
| 33 |
+
"env/**",
|
| 34 |
+
".DS_Store"
|
| 35 |
+
] # Keeps the upload lightweight preventing the 504 Timeout
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
print("\n✅ Successfully uploaded OpenEnv Customer Support to Hugging Face!")
|
| 39 |
+
|
| 40 |
+
if __name__ == "__main__":
|
| 41 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
pydantic
|
| 4 |
+
openai
|