Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +3 -0
- README.md +172 -175
- __init__.py +3 -2
- client.py +27 -68
- models.py +53 -10
- openenv.yaml +5 -1
- server/__init__.py +1 -1
- server/app.py +25 -31
- server/ecom_environment.py +794 -63
Dockerfile
CHANGED
|
@@ -27,6 +27,9 @@ ARG ENV_NAME=ecom
|
|
| 27 |
# Copy environment code (always at root of build context)
|
| 28 |
COPY . /app/env
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
WORKDIR /app/env
|
|
|
|
| 27 |
# Copy environment code (always at root of build context)
|
| 28 |
COPY . /app/env
|
| 29 |
|
| 30 |
+
# Remove host virtual environment if present in build context.
|
| 31 |
+
RUN rm -rf /app/env/.venv
|
| 32 |
+
|
| 33 |
# For in-repo builds, openenv is already vendored in the build context
|
| 34 |
# For standalone builds, openenv will be installed via pyproject.toml
|
| 35 |
WORKDIR /app/env
|
README.md
CHANGED
|
@@ -1,255 +1,252 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
app_port: 8000
|
| 9 |
base_path: /web
|
| 10 |
tags:
|
| 11 |
- openenv
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
|
| 20 |
-
|
| 21 |
|
| 22 |
-
|
| 23 |
-
from ecom import EcomAction, EcomEnv
|
| 24 |
-
|
| 25 |
-
try:
|
| 26 |
-
# Create environment from Docker image
|
| 27 |
-
ecomenv = EcomEnv.from_docker_image("ecom-env:latest")
|
| 28 |
|
| 29 |
-
|
| 30 |
-
result = ecomenv.reset()
|
| 31 |
-
print(f"Reset: {result.observation.echoed_message}")
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
result = ecomenv.step(EcomAction(message=msg))
|
| 38 |
-
print(f"Sent: '{msg}'")
|
| 39 |
-
print(f" → Echoed: '{result.observation.echoed_message}'")
|
| 40 |
-
print(f" → Length: {result.observation.message_length}")
|
| 41 |
-
print(f" → Reward: {result.reward}")
|
| 42 |
|
| 43 |
-
|
| 44 |
-
# Always clean up
|
| 45 |
-
ecomenv.close()
|
| 46 |
-
```
|
| 47 |
|
| 48 |
-
|
| 49 |
-
- Starting the Docker container
|
| 50 |
-
- Waiting for the server to be ready
|
| 51 |
-
- Connecting to the environment
|
| 52 |
-
- Container cleanup when you call `close()`
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
|
| 58 |
-
|
| 59 |
-
# From project root
|
| 60 |
-
docker build -t ecom-env:latest -f server/Dockerfile .
|
| 61 |
-
```
|
| 62 |
|
| 63 |
-
|
| 64 |
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
# From the environment directory (where openenv.yaml is located)
|
| 69 |
-
openenv push
|
| 70 |
|
| 71 |
-
|
| 72 |
-
openenv push --namespace my-org --private
|
| 73 |
-
```
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
| 81 |
|
| 82 |
-
|
| 83 |
|
| 84 |
-
|
| 85 |
|
| 86 |
-
- `
|
| 87 |
-
- `
|
| 88 |
-
- `
|
| 89 |
-
- `
|
| 90 |
|
| 91 |
-
|
| 92 |
|
| 93 |
-
|
| 94 |
-
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 95 |
-
openenv push
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
-
#
|
| 101 |
-
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 102 |
|
| 103 |
-
|
| 104 |
-
openenv push --private
|
| 105 |
|
| 106 |
-
|
| 107 |
-
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 108 |
-
```
|
| 109 |
|
| 110 |
-
|
| 111 |
-
`
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 115 |
-
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 116 |
-
- **Health Check** at `/health` - Container health monitoring
|
| 117 |
-
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 118 |
|
| 119 |
-
##
|
| 120 |
|
| 121 |
-
|
| 122 |
-
**EcomAction**: Contains a single field
|
| 123 |
-
- `message` (str) - The message to echo back
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
- `
|
| 129 |
-
- `
|
| 130 |
-
- `
|
| 131 |
-
|
|
|
|
| 132 |
|
| 133 |
-
|
| 134 |
-
The reward is calculated as: `message_length × 0.1`
|
| 135 |
-
- "Hi" → reward: 0.2
|
| 136 |
-
- "Hello, World!" → reward: 1.3
|
| 137 |
-
- Empty message → reward: 0.0
|
| 138 |
|
| 139 |
-
##
|
| 140 |
|
| 141 |
-
|
| 142 |
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
``
|
| 146 |
-
from ecom import EcomEnv
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
result = ecomenv.reset()
|
| 153 |
-
result = ecomenv.step(EcomAction(message="Hello!"))
|
| 154 |
-
```
|
| 155 |
|
| 156 |
-
|
| 157 |
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
|
| 162 |
```python
|
|
|
|
| 163 |
from ecom import EcomAction, EcomEnv
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
result = env.step(EcomAction(
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
```
|
| 174 |
|
| 175 |
-
|
| 176 |
-
- **Lower latency**: No HTTP connection overhead per request
|
| 177 |
-
- **Persistent session**: Server maintains your environment state
|
| 178 |
-
- **Efficient for episodes**: Better for many sequential steps
|
| 179 |
|
| 180 |
-
|
| 181 |
|
| 182 |
-
|
| 183 |
-
modify `server/app.py` to use factory mode:
|
| 184 |
|
| 185 |
-
``
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
```
|
| 194 |
|
| 195 |
-
|
| 196 |
|
| 197 |
-
``
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
```
|
| 212 |
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
-
|
| 218 |
|
| 219 |
```bash
|
| 220 |
-
|
| 221 |
-
python3 server/ecom_environment.py
|
| 222 |
```
|
| 223 |
|
| 224 |
-
|
| 225 |
-
- Environment resets correctly
|
| 226 |
-
- Step executes actions properly
|
| 227 |
-
- State tracking works
|
| 228 |
-
- Rewards are calculated correctly
|
| 229 |
|
| 230 |
-
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
| 233 |
|
| 234 |
```bash
|
| 235 |
-
|
| 236 |
```
|
| 237 |
|
| 238 |
-
##
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
```
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
├── pyproject.toml # Project metadata and dependencies
|
| 247 |
-
├── uv.lock # Locked dependencies (generated)
|
| 248 |
-
├── client.py # EcomEnv client
|
| 249 |
-
├── models.py # Action and Observation models
|
| 250 |
-
└── server/
|
| 251 |
-
├── __init__.py # Server module exports
|
| 252 |
-
├── ecom_environment.py # Core environment logic
|
| 253 |
-
├── app.py # FastAPI application (HTTP + WebSocket endpoints)
|
| 254 |
-
└── Dockerfile # Container image definition
|
| 255 |
```
|
|
|
|
| 1 |
---
|
| 2 |
+
title: E-commerce Returns Decision Environment
|
| 3 |
+
emoji: 📦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
app_port: 8000
|
| 9 |
base_path: /web
|
| 10 |
tags:
|
| 11 |
- openenv
|
| 12 |
+
- operations
|
| 13 |
+
- decision-making
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# E-commerce Returns Decision Environment
|
| 17 |
|
| 18 |
+
This environment simulates a real operations workflow in online retail: deciding how to handle customer return requests under policy constraints, latent fraud risk, and financial trade-offs.
|
| 19 |
|
| 20 |
+
It is designed as a **partially observable decision problem**, not a classification toy.
|
| 21 |
|
| 22 |
+
## Why this environment matters
|
| 23 |
|
| 24 |
+
Returns handling is a major cost center in e-commerce.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
In production settings, an operations associate (or AI agent) must balance:
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
- customer satisfaction,
|
| 29 |
+
- policy compliance,
|
| 30 |
+
- fraud prevention,
|
| 31 |
+
- and cost efficiency.
|
| 32 |
|
| 33 |
+
This environment captures that exact tension with structured observations, hidden variables, and deterministic graders.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
## Environment API (OpenEnv)
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
The environment follows the OpenEnv simulation API:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
- `reset(...)` -> initial observation
|
| 40 |
+
- `step(action)` -> observation, reward, done, info
|
| 41 |
+
- `state` -> current episode state
|
| 42 |
|
| 43 |
+
The `step` info channel is exposed via `observation.info`.
|
| 44 |
|
| 45 |
+
## Action space
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
`EcomAction`:
|
| 48 |
|
| 49 |
+
- `action_type`: one of `APPROVE`, `REJECT`, `ESCALATE`, `REQUEST_INFO`
|
| 50 |
+
- `reason_code`: required only when `action_type == REJECT`
|
| 51 |
+
- `TIME_EXPIRED`
|
| 52 |
+
- `POLICY_VIOLATION`
|
| 53 |
+
- `SUSPECTED_FRAUD`
|
| 54 |
|
| 55 |
+
## Observation space
|
|
|
|
|
|
|
| 56 |
|
| 57 |
+
`EcomObservation` fields:
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
- `return_reason`
|
| 60 |
+
- `product_category`
|
| 61 |
+
- `product_value` (`low | medium | high`)
|
| 62 |
+
- `days_since_purchase`
|
| 63 |
+
- `user_account_age_days`
|
| 64 |
+
- `product_condition_notes`
|
| 65 |
+
- `return_rate` (0.0 to 1.0)
|
| 66 |
+
- `total_orders`
|
| 67 |
+
- `policy_summary` (plain text, includes rules and exceptions)
|
| 68 |
+
- `info` (step metadata)
|
| 69 |
|
| 70 |
+
No identifier-only fields are included in the observation.
|
| 71 |
|
| 72 |
+
## Hidden state (grader-only)
|
| 73 |
|
| 74 |
+
The environment keeps the following latent variables hidden from the agent:
|
| 75 |
|
| 76 |
+
- `fraud_risk_score`
|
| 77 |
+
- `true_intent` (`genuine` or `abusive`)
|
| 78 |
+
- `cost_impact` by candidate action
|
| 79 |
+
- `optimal_action`
|
| 80 |
|
| 81 |
+
These are used to compute scores/rewards and evaluate decision quality.
|
| 82 |
|
| 83 |
+
## Episode flow and boundaries
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
- One request per episode.
|
| 86 |
+
- `APPROVE`, `REJECT`, `ESCALATE` are terminal actions (`done=True`).
|
| 87 |
+
- `REQUEST_INFO` is non-terminal on first use and deterministically refines existing observation fields:
|
| 88 |
+
- `product_condition_notes`
|
| 89 |
+
- `return_reason` (optional refinement)
|
| 90 |
+
- slight refinement of `return_rate`
|
| 91 |
+
- No new fields are introduced after `REQUEST_INFO`.
|
| 92 |
|
| 93 |
+
## Scenario generation
|
|
|
|
| 94 |
|
| 95 |
+
Scenarios are generated programmatically from controlled distributions.
|
|
|
|
| 96 |
|
| 97 |
+
The generator includes mandatory realism correlations:
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
- higher `return_rate` -> higher fraud likelihood,
|
| 100 |
+
- lower `return_rate` -> lower fraud likelihood,
|
| 101 |
+
- higher `product_value` -> higher fraud likelihood,
|
| 102 |
+
- lower `product_value` -> lower fraud likelihood.
|
| 103 |
|
| 104 |
+
Difficulty is not just fraud probability; it also changes ambiguity and signal conflict.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
## Reward design
|
| 107 |
|
| 108 |
+
Reward is deterministic and normalized to `[0.0, 1.0]`.
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
1. **Policy gate** (hard constraint)
|
| 111 |
+
- policy violation => reward `0.0`
|
| 112 |
+
2. Component scores are bounded independently:
|
| 113 |
+
- `financial_score in [0,1]`
|
| 114 |
+
- `fraud_score in [0,1]`
|
| 115 |
+
- `efficiency_score in [0,1]`
|
| 116 |
+
3. Weighted final score:
|
| 117 |
+
- `0.5 * financial + 0.3 * fraud + 0.2 * efficiency`
|
| 118 |
|
| 119 |
+
This avoids overflow and grader instability.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
## Tasks and graders (easy -> medium -> hard)
|
| 122 |
|
| 123 |
+
The environment ships with 3 deterministic benchmark tasks, each with fixed seed + threshold:
|
| 124 |
|
| 125 |
+
1. `easy_policy_compliance`
|
| 126 |
+
- clear low-risk case
|
| 127 |
+
- success threshold: `0.75`
|
| 128 |
+
2. `medium_balanced_judgment`
|
| 129 |
+
- ambiguous policy/risk trade-off
|
| 130 |
+
- success threshold: `0.68`
|
| 131 |
+
3. `hard_conflicting_signals`
|
| 132 |
+
- high-value conflicting signals + exception pressure
|
| 133 |
+
- success threshold: `0.62`
|
| 134 |
|
| 135 |
+
Terminal observation includes grader outputs in `info`:
|
|
|
|
| 136 |
|
| 137 |
+
- `grader_score` (0.0 to 1.0)
|
| 138 |
+
- `grader_success` (bool)
|
| 139 |
+
- detailed component `breakdown`
|
| 140 |
|
| 141 |
+
## Quick start
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
### Local dev server
|
| 144 |
|
| 145 |
+
```bash
|
| 146 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 147 |
+
```
|
| 148 |
|
| 149 |
+
### Python usage
|
| 150 |
|
| 151 |
```python
|
| 152 |
+
import asyncio
|
| 153 |
from ecom import EcomAction, EcomEnv
|
| 154 |
|
| 155 |
+
|
| 156 |
+
async def run():
|
| 157 |
+
env = await EcomEnv.from_docker_image("ecom-env:latest")
|
| 158 |
+
try:
|
| 159 |
+
result = await env.reset(task_name="medium_balanced_judgment")
|
| 160 |
+
# optional extra context
|
| 161 |
+
result = await env.step(EcomAction(action_type="REQUEST_INFO"))
|
| 162 |
+
# final decision
|
| 163 |
+
result = await env.step(EcomAction(action_type="REJECT", reason_code="SUSPECTED_FRAUD"))
|
| 164 |
+
print(result.reward, result.done, result.observation.info)
|
| 165 |
+
finally:
|
| 166 |
+
await env.close()
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
asyncio.run(run())
|
| 170 |
```
|
| 171 |
|
| 172 |
+
## Baseline inference
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
`inference.py` is at repo root as required.
|
| 175 |
|
| 176 |
+
Required env vars:
|
|
|
|
| 177 |
|
| 178 |
+
- `MODEL_NAME`
|
| 179 |
+
- `LOCAL_IMAGE_NAME`
|
| 180 |
+
- `HF_TOKEN` (or `OPENAI_API_KEY`)
|
| 181 |
+
|
| 182 |
+
Optional:
|
| 183 |
+
|
| 184 |
+
- `API_BASE_URL` (defaults to `https://api.openai.com/v1`)
|
| 185 |
+
|
| 186 |
+
Run:
|
| 187 |
+
|
| 188 |
+
```bash
|
| 189 |
+
python inference.py
|
| 190 |
```
|
| 191 |
|
| 192 |
+
The script emits strict structured logs:
|
| 193 |
|
| 194 |
+
- `[START] ...`
|
| 195 |
+
- `[STEP] ...`
|
| 196 |
+
- `[END] ...`
|
| 197 |
+
|
| 198 |
+
### Reproducible baseline scores
|
| 199 |
+
|
| 200 |
+
Current deterministic baseline (heuristic fallback) on default task seeds:
|
| 201 |
+
|
| 202 |
+
- `easy_policy_compliance`: `0.7997`
|
| 203 |
+
- `medium_balanced_judgment`: `0.8388`
|
| 204 |
+
- `hard_conflicting_signals`: `0.8253`
|
| 205 |
+
|
| 206 |
+
## Hugging Face Spaces deployment
|
| 207 |
+
|
| 208 |
+
From `ecom/`:
|
| 209 |
+
|
| 210 |
+
```bash
|
| 211 |
+
openenv push
|
| 212 |
```
|
| 213 |
|
| 214 |
+
Or explicit options:
|
| 215 |
|
| 216 |
+
```bash
|
| 217 |
+
openenv push --repo-id <namespace>/<space-name> --private
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
## Docker
|
| 221 |
|
| 222 |
+
Build from the environment root (`ecom/`):
|
| 223 |
|
| 224 |
```bash
|
| 225 |
+
docker build -t ecom-env:latest -f server/Dockerfile .
|
|
|
|
| 226 |
```
|
| 227 |
|
| 228 |
+
Run:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
+
```bash
|
| 231 |
+
docker run --rm -p 8000:8000 ecom-env:latest
|
| 232 |
+
```
|
| 233 |
|
| 234 |
+
Health check:
|
| 235 |
|
| 236 |
```bash
|
| 237 |
+
curl http://localhost:8000/health
|
| 238 |
```
|
| 239 |
|
| 240 |
+
## Validation
|
| 241 |
|
| 242 |
+
From `ecom/`:
|
| 243 |
+
|
| 244 |
+
```bash
|
| 245 |
+
openenv validate .
|
| 246 |
```
|
| 247 |
+
|
| 248 |
+
Optional pre-check from repository root:
|
| 249 |
+
|
| 250 |
+
```bash
|
| 251 |
+
./validate-submission.sh <your-space-url> .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
```
|
__init__.py
CHANGED
|
@@ -4,13 +4,14 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""Ecom
|
| 8 |
|
| 9 |
from .client import EcomEnv
|
| 10 |
-
from .models import EcomAction, EcomObservation
|
| 11 |
|
| 12 |
__all__ = [
|
| 13 |
"EcomAction",
|
| 14 |
"EcomObservation",
|
|
|
|
| 15 |
"EcomEnv",
|
| 16 |
]
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Ecom returns decision environment."""
|
| 8 |
|
| 9 |
from .client import EcomEnv
|
| 10 |
+
from .models import EcomAction, EcomObservation, EcomReward
|
| 11 |
|
| 12 |
__all__ = [
|
| 13 |
"EcomAction",
|
| 14 |
"EcomObservation",
|
| 15 |
+
"EcomReward",
|
| 16 |
"EcomEnv",
|
| 17 |
]
|
client.py
CHANGED
|
@@ -4,9 +4,9 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""Ecom
|
| 8 |
|
| 9 |
-
from typing import Dict
|
| 10 |
|
| 11 |
from openenv.core import EnvClient
|
| 12 |
from openenv.core.client_types import StepResult
|
|
@@ -15,85 +15,44 @@ from openenv.core.env_server.types import State
|
|
| 15 |
from .models import EcomAction, EcomObservation
|
| 16 |
|
| 17 |
|
| 18 |
-
class EcomEnv(
|
| 19 |
-
|
| 20 |
-
):
|
| 21 |
-
"""
|
| 22 |
-
Client for the Ecom Environment.
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
Example:
|
| 29 |
-
>>> # Connect to a running server
|
| 30 |
-
>>> with EcomEnv(base_url="http://localhost:8000") as client:
|
| 31 |
-
... result = client.reset()
|
| 32 |
-
... print(result.observation.echoed_message)
|
| 33 |
-
...
|
| 34 |
-
... result = client.step(EcomAction(message="Hello!"))
|
| 35 |
-
... print(result.observation.echoed_message)
|
| 36 |
-
|
| 37 |
-
Example with Docker:
|
| 38 |
-
>>> # Automatically start container and connect
|
| 39 |
-
>>> client = EcomEnv.from_docker_image("ecom-env:latest")
|
| 40 |
-
>>> try:
|
| 41 |
-
... result = client.reset()
|
| 42 |
-
... result = client.step(EcomAction(message="Test"))
|
| 43 |
-
... finally:
|
| 44 |
-
... client.close()
|
| 45 |
-
"""
|
| 46 |
-
|
| 47 |
-
def _step_payload(self, action: EcomAction) -> Dict:
|
| 48 |
-
"""
|
| 49 |
-
Convert EcomAction to JSON payload for step message.
|
| 50 |
-
|
| 51 |
-
Args:
|
| 52 |
-
action: EcomAction instance
|
| 53 |
-
|
| 54 |
-
Returns:
|
| 55 |
-
Dictionary representation suitable for JSON encoding
|
| 56 |
-
"""
|
| 57 |
-
return {
|
| 58 |
-
"message": action.message,
|
| 59 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
def _parse_result(self, payload: Dict) -> StepResult[EcomObservation]:
|
| 62 |
-
"""
|
| 63 |
-
Parse server response into StepResult[EcomObservation].
|
| 64 |
-
|
| 65 |
-
Args:
|
| 66 |
-
payload: JSON response data from server
|
| 67 |
-
|
| 68 |
-
Returns:
|
| 69 |
-
StepResult with EcomObservation
|
| 70 |
-
"""
|
| 71 |
obs_data = payload.get("observation", {})
|
| 72 |
observation = EcomObservation(
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
reward=payload.get("reward"),
|
| 77 |
-
metadata=obs_data.get("metadata", {}),
|
| 78 |
)
|
| 79 |
|
| 80 |
return StepResult(
|
| 81 |
observation=observation,
|
| 82 |
reward=payload.get("reward"),
|
| 83 |
-
done=payload.get("done", False),
|
| 84 |
)
|
| 85 |
|
| 86 |
-
def _parse_state(self, payload: Dict) -> State:
|
| 87 |
-
"""
|
| 88 |
-
Parse server response into State object.
|
| 89 |
-
|
| 90 |
-
Args:
|
| 91 |
-
payload: JSON response from state request
|
| 92 |
-
|
| 93 |
-
Returns:
|
| 94 |
-
State object with episode_id and step_count
|
| 95 |
-
"""
|
| 96 |
return State(
|
| 97 |
episode_id=payload.get("episode_id"),
|
| 98 |
-
step_count=payload.get("step_count", 0),
|
| 99 |
)
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Ecom returns decision environment client."""
|
| 8 |
|
| 9 |
+
from typing import Any, Dict
|
| 10 |
|
| 11 |
from openenv.core import EnvClient
|
| 12 |
from openenv.core.client_types import StepResult
|
|
|
|
| 15 |
from .models import EcomAction, EcomObservation
|
| 16 |
|
| 17 |
|
| 18 |
+
class EcomEnv(EnvClient[EcomAction, EcomObservation, State]):
|
| 19 |
+
"""Client for the returns decision environment."""
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
def _step_payload(self, action: EcomAction) -> Dict[str, Any]:
|
| 22 |
+
payload: Dict[str, Any] = {
|
| 23 |
+
"action_type": action.action_type,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
+
if action.reason_code is not None:
|
| 26 |
+
payload["reason_code"] = action.reason_code
|
| 27 |
+
if action.metadata:
|
| 28 |
+
payload["metadata"] = action.metadata
|
| 29 |
+
return payload
|
| 30 |
|
| 31 |
+
def _parse_result(self, payload: Dict[str, Any]) -> StepResult[EcomObservation]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
obs_data = payload.get("observation", {})
|
| 33 |
observation = EcomObservation(
|
| 34 |
+
return_reason=obs_data.get("return_reason", ""),
|
| 35 |
+
product_category=obs_data.get("product_category", ""),
|
| 36 |
+
product_value=obs_data.get("product_value", "low"),
|
| 37 |
+
days_since_purchase=int(obs_data.get("days_since_purchase", 0)),
|
| 38 |
+
user_account_age_days=int(obs_data.get("user_account_age_days", 0)),
|
| 39 |
+
product_condition_notes=obs_data.get("product_condition_notes", ""),
|
| 40 |
+
return_rate=float(obs_data.get("return_rate", 0.0)),
|
| 41 |
+
total_orders=int(obs_data.get("total_orders", 1)),
|
| 42 |
+
policy_summary=obs_data.get("policy_summary", ""),
|
| 43 |
+
info=obs_data.get("info", {}),
|
| 44 |
+
done=bool(payload.get("done", False)),
|
| 45 |
reward=payload.get("reward"),
|
|
|
|
| 46 |
)
|
| 47 |
|
| 48 |
return StepResult(
|
| 49 |
observation=observation,
|
| 50 |
reward=payload.get("reward"),
|
| 51 |
+
done=bool(payload.get("done", False)),
|
| 52 |
)
|
| 53 |
|
| 54 |
+
def _parse_state(self, payload: Dict[str, Any]) -> State:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
return State(
|
| 56 |
episode_id=payload.get("episode_id"),
|
| 57 |
+
step_count=int(payload.get("step_count", 0)),
|
| 58 |
)
|
models.py
CHANGED
|
@@ -4,24 +4,67 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
-
Data models for the Ecom Environment.
|
| 9 |
|
| 10 |
-
|
| 11 |
-
"""
|
| 12 |
|
| 13 |
from openenv.core.env_server.types import Action, Observation
|
| 14 |
-
from pydantic import Field
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class EcomAction(Action):
|
| 18 |
-
"""Action for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class EcomObservation(Observation):
|
| 24 |
-
"""Observation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Data models for the returns decision environment."""
|
|
|
|
| 8 |
|
| 9 |
+
from typing import Any, Dict, Literal, Optional
|
|
|
|
| 10 |
|
| 11 |
from openenv.core.env_server.types import Action, Observation
|
| 12 |
+
from pydantic import BaseModel, Field, model_validator
|
| 13 |
+
|
| 14 |
+
ActionType = Literal["APPROVE", "REJECT", "ESCALATE", "REQUEST_INFO"]
|
| 15 |
+
RejectReason = Literal["TIME_EXPIRED", "POLICY_VIOLATION", "SUSPECTED_FRAUD"]
|
| 16 |
+
ValueBucket = Literal["low", "medium", "high"]
|
| 17 |
|
| 18 |
|
| 19 |
class EcomAction(Action):
|
| 20 |
+
"""Action schema for return-request handling."""
|
| 21 |
+
|
| 22 |
+
action_type: ActionType = Field(..., description="Decision type")
|
| 23 |
+
reason_code: Optional[RejectReason] = Field(
|
| 24 |
+
default=None,
|
| 25 |
+
description="Required when action_type is REJECT",
|
| 26 |
+
)
|
| 27 |
|
| 28 |
+
@model_validator(mode="after")
|
| 29 |
+
def validate_reason_code(self) -> "EcomAction":
|
| 30 |
+
if self.action_type == "REJECT" and self.reason_code is None:
|
| 31 |
+
raise ValueError("reason_code is required when action_type is REJECT")
|
| 32 |
+
if self.action_type != "REJECT" and self.reason_code is not None:
|
| 33 |
+
raise ValueError("reason_code is only allowed when action_type is REJECT")
|
| 34 |
+
return self
|
| 35 |
|
| 36 |
|
| 37 |
class EcomObservation(Observation):
|
| 38 |
+
"""Observation schema for the partially observable returns task."""
|
| 39 |
+
|
| 40 |
+
return_reason: str = Field(..., description="Customer-provided return reason")
|
| 41 |
+
product_category: str = Field(..., description="Product category")
|
| 42 |
+
product_value: ValueBucket = Field(..., description="Value bucket")
|
| 43 |
+
days_since_purchase: int = Field(..., ge=0, description="Elapsed days")
|
| 44 |
+
user_account_age_days: int = Field(..., ge=0, description="Account age in days")
|
| 45 |
+
product_condition_notes: str = Field(..., description="Condition summary")
|
| 46 |
+
return_rate: float = Field(
|
| 47 |
+
..., ge=0.0, le=1.0, description="Historical return rate"
|
| 48 |
+
)
|
| 49 |
+
total_orders: int = Field(..., ge=1, description="Total historical orders")
|
| 50 |
+
policy_summary: str = Field(
|
| 51 |
+
...,
|
| 52 |
+
description="Natural-language policy text with rules and exceptions",
|
| 53 |
+
)
|
| 54 |
+
info: Dict[str, Any] = Field(
|
| 55 |
+
default_factory=dict,
|
| 56 |
+
description="Step info payload (OpenEnv-compatible info channel)",
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class EcomReward(BaseModel):
|
| 61 |
+
"""Typed reward breakdown used by deterministic task graders."""
|
| 62 |
|
| 63 |
+
policy_gate: float = Field(..., ge=0.0, le=1.0)
|
| 64 |
+
financial_score: float = Field(..., ge=0.0, le=1.0)
|
| 65 |
+
fraud_score: float = Field(..., ge=0.0, le=1.0)
|
| 66 |
+
efficiency_score: float = Field(..., ge=0.0, le=1.0)
|
| 67 |
+
normalized_reward: float = Field(..., ge=0.0, le=1.0)
|
| 68 |
+
policy_violation: bool
|
| 69 |
+
optimal_action: Optional[str] = None
|
| 70 |
+
matched_optimal: Optional[bool] = None
|
openenv.yaml
CHANGED
|
@@ -4,4 +4,8 @@ type: space
|
|
| 4 |
runtime: fastapi
|
| 5 |
app: server.app:app
|
| 6 |
port: 8000
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
runtime: fastapi
|
| 5 |
app: server.app:app
|
| 6 |
port: 8000
|
| 7 |
+
description: "Policy-constrained e-commerce returns decision environment with latent fraud risk"
|
| 8 |
+
tags:
|
| 9 |
+
- openenv
|
| 10 |
+
- operations
|
| 11 |
+
- decision-making
|
server/__init__.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""Ecom environment server components."""
|
| 8 |
|
| 9 |
from .ecom_environment import EcomEnvironment
|
| 10 |
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Ecom returns environment server components."""
|
| 8 |
|
| 9 |
from .ecom_environment import EcomEnvironment
|
| 10 |
|
server/app.py
CHANGED
|
@@ -4,29 +4,7 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
-
FastAPI application for the Ecom Environment.
|
| 9 |
-
|
| 10 |
-
This module creates an HTTP server that exposes the EcomEnvironment
|
| 11 |
-
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
-
|
| 13 |
-
Endpoints:
|
| 14 |
-
- POST /reset: Reset the environment
|
| 15 |
-
- POST /step: Execute an action
|
| 16 |
-
- GET /state: Get current environment state
|
| 17 |
-
- GET /schema: Get action/observation schemas
|
| 18 |
-
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
-
|
| 20 |
-
Usage:
|
| 21 |
-
# Development (with auto-reload):
|
| 22 |
-
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
-
|
| 24 |
-
# Production:
|
| 25 |
-
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
-
|
| 27 |
-
# Or run directly:
|
| 28 |
-
python -m server.app
|
| 29 |
-
"""
|
| 30 |
|
| 31 |
try:
|
| 32 |
from openenv.core.env_server.http_server import create_app
|
|
@@ -35,18 +13,39 @@ except Exception as e: # pragma: no cover
|
|
| 35 |
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
) from e
|
| 37 |
|
|
|
|
|
|
|
| 38 |
|
| 39 |
from ecom.models import EcomAction, EcomObservation
|
| 40 |
from ecom.server.ecom_environment import EcomEnvironment
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# Create the app with web interface and README integration
|
| 44 |
app = create_app(
|
| 45 |
-
|
| 46 |
EcomAction,
|
| 47 |
EcomObservation,
|
| 48 |
env_name="ecom",
|
| 49 |
-
max_concurrent_envs=
|
| 50 |
)
|
| 51 |
|
| 52 |
|
|
@@ -73,9 +72,4 @@ def main(host: str = "0.0.0.0", port: int = 8000):
|
|
| 73 |
|
| 74 |
|
| 75 |
if __name__ == "__main__":
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
parser = argparse.ArgumentParser()
|
| 79 |
-
parser.add_argument("--port", type=int, default=8000)
|
| 80 |
-
args = parser.parse_args()
|
| 81 |
-
main(port=args.port)
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""FastAPI application for the returns decision environment."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
try:
|
| 10 |
from openenv.core.env_server.http_server import create_app
|
|
|
|
| 13 |
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 14 |
) from e
|
| 15 |
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
|
| 19 |
from ecom.models import EcomAction, EcomObservation
|
| 20 |
from ecom.server.ecom_environment import EcomEnvironment
|
| 21 |
|
| 22 |
|
| 23 |
+
def _env_factory() -> EcomEnvironment:
|
| 24 |
+
mode = os.getenv("ECOM_MODE", "medium").strip().lower()
|
| 25 |
+
if mode not in {"easy", "medium", "hard"}:
|
| 26 |
+
mode = "medium"
|
| 27 |
+
|
| 28 |
+
def _maybe_float(name: str) -> float | None:
|
| 29 |
+
raw = os.getenv(name)
|
| 30 |
+
if raw is None or raw.strip() == "":
|
| 31 |
+
return None
|
| 32 |
+
return float(raw)
|
| 33 |
+
|
| 34 |
+
return EcomEnvironment(
|
| 35 |
+
mode=mode,
|
| 36 |
+
fraud_probability=_maybe_float("ECOM_FRAUD_PROBABILITY"),
|
| 37 |
+
ambiguity_rate=_maybe_float("ECOM_AMBIGUITY_RATE"),
|
| 38 |
+
conflict_rate=_maybe_float("ECOM_CONFLICT_RATE"),
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
# Create the app with web interface and README integration
|
| 43 |
app = create_app(
|
| 44 |
+
_env_factory,
|
| 45 |
EcomAction,
|
| 46 |
EcomObservation,
|
| 47 |
env_name="ecom",
|
| 48 |
+
max_concurrent_envs=4,
|
| 49 |
)
|
| 50 |
|
| 51 |
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
if __name__ == "__main__":
|
| 75 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server/ecom_environment.py
CHANGED
|
@@ -4,99 +4,830 @@
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
-
"""
|
| 8 |
-
Ecom Environment Implementation.
|
| 9 |
|
| 10 |
-
|
| 11 |
-
Perfect for testing HTTP server infrastructure.
|
| 12 |
-
"""
|
| 13 |
|
|
|
|
|
|
|
| 14 |
from uuid import uuid4
|
| 15 |
|
| 16 |
from openenv.core.env_server.interfaces import Environment
|
| 17 |
from openenv.core.env_server.types import State
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
This environment is designed for testing the HTTP server infrastructure.
|
| 28 |
-
It maintains minimal state and simply echoes back whatever message it receives.
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
# Enable concurrent WebSocket sessions.
|
| 41 |
-
# Set to True if your environment isolates state between instances.
|
| 42 |
-
# When True, multiple WebSocket clients can connect simultaneously, each
|
| 43 |
-
# getting their own environment instance (when using factory mode in app.py).
|
| 44 |
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
""
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
Returns:
|
| 56 |
-
EcomObservation with a ready message
|
| 57 |
-
"""
|
| 58 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 59 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
done=False,
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
-
def step(
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
Returns:
|
| 76 |
-
EcomObservation with the echoed message and its length
|
| 77 |
-
"""
|
| 78 |
self._state.step_count += 1
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
reward=reward,
|
| 91 |
-
|
|
|
|
| 92 |
)
|
| 93 |
|
| 94 |
@property
|
| 95 |
def state(self) -> State:
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# This source code is licensed under the BSD-style license found in the
|
| 5 |
# LICENSE file in the root directory of this source tree.
|
| 6 |
|
| 7 |
+
"""Returns decision environment implementation."""
|
|
|
|
| 8 |
|
| 9 |
+
from __future__ import annotations
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from typing import Any, Dict, Literal, Optional
|
| 13 |
from uuid import uuid4
|
| 14 |
|
| 15 |
from openenv.core.env_server.interfaces import Environment
|
| 16 |
from openenv.core.env_server.types import State
|
| 17 |
|
| 18 |
+
from ecom.models import EcomAction, EcomObservation, EcomReward
|
| 19 |
|
| 20 |
+
Difficulty = Literal["easy", "medium", "hard"]
|
| 21 |
+
Intent = Literal["genuine", "abusive"]
|
| 22 |
+
FinalAction = Literal["APPROVE", "REJECT", "ESCALATE"]
|
| 23 |
|
| 24 |
|
| 25 |
+
@dataclass(frozen=True)
|
| 26 |
+
class DifficultyConfig:
|
| 27 |
+
fraud_probability: float
|
| 28 |
+
ambiguity_rate: float
|
| 29 |
+
conflict_rate: float
|
| 30 |
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
@dataclass(frozen=True)
|
| 33 |
+
class TaskSpec:
|
| 34 |
+
difficulty: Difficulty
|
| 35 |
+
seed: int
|
| 36 |
+
objective: str
|
| 37 |
+
success_threshold: float
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@dataclass(frozen=True)
|
| 41 |
+
class PolicyProfile:
|
| 42 |
+
window_days: int
|
| 43 |
+
non_returnable: tuple[str, ...]
|
| 44 |
+
exception_text: str
|
| 45 |
+
|
| 46 |
+
def summary(self) -> str:
|
| 47 |
+
categories = ", ".join(self.non_returnable)
|
| 48 |
+
return (
|
| 49 |
+
f"Returns accepted within {self.window_days} days. "
|
| 50 |
+
f"Non-returnable categories: {categories}. "
|
| 51 |
+
f"Exception: {self.exception_text}."
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class HiddenCaseState:
|
| 57 |
+
fraud_risk_score: float
|
| 58 |
+
true_intent: Intent
|
| 59 |
+
optimal_action: FinalAction
|
| 60 |
+
cost_impact: Dict[str, float]
|
| 61 |
+
category_policy_violated: bool
|
| 62 |
+
time_policy_violated: bool
|
| 63 |
+
exception_applies: bool
|
| 64 |
+
is_ambiguous: bool
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@dataclass
|
| 68 |
+
class VisibleCase:
|
| 69 |
+
return_reason: str
|
| 70 |
+
product_category: str
|
| 71 |
+
product_value: Literal["low", "medium", "high"]
|
| 72 |
+
days_since_purchase: int
|
| 73 |
+
user_account_age_days: int
|
| 74 |
+
product_condition_notes: str
|
| 75 |
+
return_rate: float
|
| 76 |
+
total_orders: int
|
| 77 |
+
policy_summary: str
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class EcomEnvironment(Environment[EcomAction, EcomObservation, State]):
|
| 81 |
+
"""Single-request return decision environment with partial observability."""
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 84 |
|
| 85 |
+
_VALUE_INDEX: Dict[str, int] = {"low": 0, "medium": 1, "high": 2}
|
| 86 |
+
|
| 87 |
+
_DIFFICULTY_DEFAULTS: Dict[Difficulty, DifficultyConfig] = {
|
| 88 |
+
"easy": DifficultyConfig(
|
| 89 |
+
fraud_probability=0.10, ambiguity_rate=0.10, conflict_rate=0.05
|
| 90 |
+
),
|
| 91 |
+
"medium": DifficultyConfig(
|
| 92 |
+
fraud_probability=0.25, ambiguity_rate=0.30, conflict_rate=0.20
|
| 93 |
+
),
|
| 94 |
+
"hard": DifficultyConfig(
|
| 95 |
+
fraud_probability=0.40, ambiguity_rate=0.55, conflict_rate=0.45
|
| 96 |
+
),
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
_CATEGORY_POLICIES: Dict[str, PolicyProfile] = {
|
| 100 |
+
"electronics": PolicyProfile(
|
| 101 |
+
window_days=30,
|
| 102 |
+
non_returnable=("final-sale", "personal-care"),
|
| 103 |
+
exception_text="Defective electronics remain returnable even beyond standard restrictions",
|
| 104 |
+
),
|
| 105 |
+
"fashion": PolicyProfile(
|
| 106 |
+
window_days=45,
|
| 107 |
+
non_returnable=("underwear", "swimwear"),
|
| 108 |
+
exception_text="Quality defects override category restrictions",
|
| 109 |
+
),
|
| 110 |
+
"home": PolicyProfile(
|
| 111 |
+
window_days=60,
|
| 112 |
+
non_returnable=("custom-furniture",),
|
| 113 |
+
exception_text="Damage in transit is always eligible for return",
|
| 114 |
+
),
|
| 115 |
+
}
|
| 116 |
|
| 117 |
+
_CONDITION_NOTES: Dict[str, tuple[str, str]] = {
|
| 118 |
+
"defective": (
|
| 119 |
+
"Customer reports device fails to power on intermittently",
|
| 120 |
+
"Diagnostic notes show repeat hardware faults and consistent malfunction",
|
| 121 |
+
),
|
| 122 |
+
"wrong-item": (
|
| 123 |
+
"Packaging label and item description appear mismatched",
|
| 124 |
+
"Warehouse scan and photo check confirm SKU mismatch from fulfillment",
|
| 125 |
+
),
|
| 126 |
+
"damaged-shipping": (
|
| 127 |
+
"Outer box shows dents and seal damage from transit",
|
| 128 |
+
"Carrier handoff log notes impact event with photo-confirmed product damage",
|
| 129 |
+
),
|
| 130 |
+
"changed-mind": (
|
| 131 |
+
"Customer no longer wants the item and packaging appears opened",
|
| 132 |
+
"Follow-up confirms item used lightly with no defect evidence",
|
| 133 |
+
),
|
| 134 |
+
"size-issue": (
|
| 135 |
+
"Customer reports fit mismatch after trying item once",
|
| 136 |
+
"Follow-up confirms sizing mismatch with item otherwise in resellable condition",
|
| 137 |
+
),
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
_TASKS: Dict[str, TaskSpec] = {
|
| 141 |
+
"easy_policy_compliance": TaskSpec(
|
| 142 |
+
difficulty="easy",
|
| 143 |
+
seed=111,
|
| 144 |
+
objective=(
|
| 145 |
+
"Handle a straightforward, low-risk return and maximize policy-compliant value."
|
| 146 |
+
),
|
| 147 |
+
success_threshold=0.75,
|
| 148 |
+
),
|
| 149 |
+
"medium_balanced_judgment": TaskSpec(
|
| 150 |
+
difficulty="medium",
|
| 151 |
+
seed=222,
|
| 152 |
+
objective=(
|
| 153 |
+
"Balance policy, fraud risk, and cost trade-offs in an ambiguous return request."
|
| 154 |
+
),
|
| 155 |
+
success_threshold=0.68,
|
| 156 |
+
),
|
| 157 |
+
"hard_conflicting_signals": TaskSpec(
|
| 158 |
+
difficulty="hard",
|
| 159 |
+
seed=333,
|
| 160 |
+
objective=(
|
| 161 |
+
"Resolve conflicting policy exceptions and risk signals in a high-value case."
|
| 162 |
+
),
|
| 163 |
+
success_threshold=0.62,
|
| 164 |
+
),
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
def __init__(
|
| 168 |
+
self,
|
| 169 |
+
mode: Difficulty = "medium",
|
| 170 |
+
*,
|
| 171 |
+
fraud_probability: Optional[float] = None,
|
| 172 |
+
ambiguity_rate: Optional[float] = None,
|
| 173 |
+
conflict_rate: Optional[float] = None,
|
| 174 |
+
task_name: Optional[str] = None,
|
| 175 |
+
):
|
| 176 |
+
self._task_name: Optional[str] = task_name
|
| 177 |
+
self._task_spec: Optional[TaskSpec] = None
|
| 178 |
+
|
| 179 |
+
if task_name is not None:
|
| 180 |
+
if task_name not in self._TASKS:
|
| 181 |
+
valid = ", ".join(sorted(self._TASKS))
|
| 182 |
+
raise ValueError(
|
| 183 |
+
f"Unknown task_name '{task_name}'. Valid tasks: {valid}"
|
| 184 |
+
)
|
| 185 |
+
self._task_spec = self._TASKS[task_name]
|
| 186 |
+
mode = self._task_spec.difficulty
|
| 187 |
+
|
| 188 |
+
if mode not in self._DIFFICULTY_DEFAULTS:
|
| 189 |
+
raise ValueError("mode must be one of: easy, medium, hard")
|
| 190 |
+
|
| 191 |
+
self._mode: Difficulty = mode
|
| 192 |
+
base_cfg = self._DIFFICULTY_DEFAULTS[mode]
|
| 193 |
+
self._cfg = DifficultyConfig(
|
| 194 |
+
fraud_probability=self._clamp01(
|
| 195 |
+
base_cfg.fraud_probability
|
| 196 |
+
if fraud_probability is None
|
| 197 |
+
else fraud_probability
|
| 198 |
+
),
|
| 199 |
+
ambiguity_rate=self._clamp01(
|
| 200 |
+
base_cfg.ambiguity_rate if ambiguity_rate is None else ambiguity_rate
|
| 201 |
+
),
|
| 202 |
+
conflict_rate=self._clamp01(
|
| 203 |
+
base_cfg.conflict_rate if conflict_rate is None else conflict_rate
|
| 204 |
+
),
|
| 205 |
+
)
|
| 206 |
|
|
|
|
|
|
|
|
|
|
| 207 |
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 208 |
+
self._visible_case: Optional[VisibleCase] = None
|
| 209 |
+
self._hidden_case: Optional[HiddenCaseState] = None
|
| 210 |
+
self._requested_info = False
|
| 211 |
+
self._done = False
|
| 212 |
+
self._task_seed: Optional[int] = (
|
| 213 |
+
self._task_spec.seed if self._task_spec else None
|
| 214 |
+
)
|
| 215 |
|
| 216 |
+
def reset(
|
| 217 |
+
self,
|
| 218 |
+
seed: Optional[int] = None,
|
| 219 |
+
episode_id: Optional[str] = None,
|
| 220 |
+
task_name: Optional[str] = None,
|
| 221 |
+
**kwargs,
|
| 222 |
+
) -> EcomObservation:
|
| 223 |
+
del kwargs
|
| 224 |
+
if task_name is not None:
|
| 225 |
+
if task_name not in self._TASKS:
|
| 226 |
+
valid = ", ".join(sorted(self._TASKS))
|
| 227 |
+
raise ValueError(
|
| 228 |
+
f"Unknown task_name '{task_name}'. Valid tasks: {valid}"
|
| 229 |
+
)
|
| 230 |
+
self._task_name = task_name
|
| 231 |
+
self._task_spec = self._TASKS[task_name]
|
| 232 |
+
self._task_seed = self._task_spec.seed
|
| 233 |
+
self._mode = self._task_spec.difficulty
|
| 234 |
+
base_cfg = self._DIFFICULTY_DEFAULTS[self._mode]
|
| 235 |
+
self._cfg = DifficultyConfig(
|
| 236 |
+
fraud_probability=base_cfg.fraud_probability,
|
| 237 |
+
ambiguity_rate=base_cfg.ambiguity_rate,
|
| 238 |
+
conflict_rate=base_cfg.conflict_rate,
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
|
| 242 |
+
self._requested_info = False
|
| 243 |
+
self._done = False
|
| 244 |
+
|
| 245 |
+
effective_seed = seed
|
| 246 |
+
if effective_seed is None and self._task_seed is not None:
|
| 247 |
+
effective_seed = self._task_seed
|
| 248 |
+
|
| 249 |
+
rng = self._rng(effective_seed)
|
| 250 |
+
self._visible_case, self._hidden_case = self._generate_case(rng)
|
| 251 |
+
|
| 252 |
+
return self._to_observation(
|
| 253 |
+
self._visible_case,
|
| 254 |
+
reward=None,
|
| 255 |
done=False,
|
| 256 |
+
info={
|
| 257 |
+
"mode": self._mode,
|
| 258 |
+
"task_name": self._task_name,
|
| 259 |
+
"task_objective": self._task_spec.objective
|
| 260 |
+
if self._task_spec
|
| 261 |
+
else None,
|
| 262 |
+
"task_seed": effective_seed,
|
| 263 |
+
"phase": "initial",
|
| 264 |
+
"step_contract": "observation_reward_done_info",
|
| 265 |
+
},
|
| 266 |
)
|
| 267 |
|
| 268 |
+
def step(
|
| 269 |
+
self,
|
| 270 |
+
action: EcomAction,
|
| 271 |
+
timeout_s: Optional[float] = None,
|
| 272 |
+
**kwargs,
|
| 273 |
+
) -> EcomObservation:
|
| 274 |
+
del timeout_s, kwargs
|
| 275 |
+
if self._visible_case is None or self._hidden_case is None:
|
| 276 |
+
# Allow stateless HTTP /step calls by lazily initializing an episode.
|
| 277 |
+
self.reset()
|
| 278 |
+
if self._done:
|
| 279 |
+
raise RuntimeError(
|
| 280 |
+
"Episode already terminated. Call reset() to start a new episode"
|
| 281 |
+
)
|
| 282 |
|
|
|
|
|
|
|
|
|
|
| 283 |
self._state.step_count += 1
|
| 284 |
|
| 285 |
+
if action.action_type == "REQUEST_INFO":
|
| 286 |
+
if self._requested_info:
|
| 287 |
+
info = {
|
| 288 |
+
"invalid_action": "REQUEST_INFO already used",
|
| 289 |
+
"allowed_actions": ["APPROVE", "REJECT", "ESCALATE"],
|
| 290 |
+
"step_penalty": -0.10,
|
| 291 |
+
"step_contract": "observation_reward_done_info",
|
| 292 |
+
}
|
| 293 |
+
return self._to_observation(
|
| 294 |
+
self._visible_case,
|
| 295 |
+
reward=-0.10,
|
| 296 |
+
done=False,
|
| 297 |
+
info=info,
|
| 298 |
+
)
|
| 299 |
|
| 300 |
+
self._requested_info = True
|
| 301 |
+
self._visible_case = self._refine_after_request_info(
|
| 302 |
+
self._visible_case,
|
| 303 |
+
self._hidden_case,
|
| 304 |
+
)
|
| 305 |
+
info_gain_reward = 0.08 if self._hidden_case.is_ambiguous else -0.03
|
| 306 |
+
info = {
|
| 307 |
+
"phase": "post_request_info",
|
| 308 |
+
"revealed": ["product_condition_notes", "return_reason"],
|
| 309 |
+
"step_reward": info_gain_reward,
|
| 310 |
+
"step_contract": "observation_reward_done_info",
|
| 311 |
+
}
|
| 312 |
+
return self._to_observation(
|
| 313 |
+
self._visible_case,
|
| 314 |
+
reward=info_gain_reward,
|
| 315 |
+
done=False,
|
| 316 |
+
info=info,
|
| 317 |
+
)
|
| 318 |
|
| 319 |
+
if action.action_type not in ("APPROVE", "REJECT", "ESCALATE"):
|
| 320 |
+
info = {
|
| 321 |
+
"invalid_action": "Final action must be APPROVE, REJECT, or ESCALATE",
|
| 322 |
+
"step_penalty": -0.05,
|
| 323 |
+
"step_contract": "observation_reward_done_info",
|
| 324 |
+
}
|
| 325 |
+
return self._to_observation(
|
| 326 |
+
self._visible_case,
|
| 327 |
+
reward=-0.05,
|
| 328 |
+
done=False,
|
| 329 |
+
info=info,
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
reward, breakdown = self._evaluate(
|
| 333 |
+
action, self._visible_case, self._hidden_case
|
| 334 |
+
)
|
| 335 |
+
self._done = True
|
| 336 |
+
info = {
|
| 337 |
+
"mode": self._mode,
|
| 338 |
+
"task_name": self._task_name,
|
| 339 |
+
"phase": "terminal",
|
| 340 |
+
"breakdown": breakdown,
|
| 341 |
+
"grader_score": float(breakdown["normalized_reward"]),
|
| 342 |
+
"grader_success": self._task_success(float(breakdown["normalized_reward"])),
|
| 343 |
+
"step_contract": "observation_reward_done_info",
|
| 344 |
+
}
|
| 345 |
+
return self._to_observation(
|
| 346 |
+
self._visible_case,
|
| 347 |
reward=reward,
|
| 348 |
+
done=True,
|
| 349 |
+
info=info,
|
| 350 |
)
|
| 351 |
|
| 352 |
@property
|
| 353 |
def state(self) -> State:
|
| 354 |
+
return State(
|
| 355 |
+
episode_id=self._state.episode_id,
|
| 356 |
+
step_count=self._state.step_count,
|
| 357 |
+
mode=self._mode,
|
| 358 |
+
task_name=self._task_name,
|
| 359 |
+
task_objective=self._task_spec.objective if self._task_spec else None,
|
| 360 |
+
done=self._done,
|
| 361 |
+
requested_info=self._requested_info,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
@classmethod
|
| 365 |
+
def task_names(cls) -> tuple[str, ...]:
|
| 366 |
+
return tuple(cls._TASKS.keys())
|
| 367 |
+
|
| 368 |
+
@classmethod
|
| 369 |
+
def task_spec(cls, task_name: str) -> TaskSpec:
|
| 370 |
+
if task_name not in cls._TASKS:
|
| 371 |
+
valid = ", ".join(sorted(cls._TASKS))
|
| 372 |
+
raise ValueError(f"Unknown task_name '{task_name}'. Valid tasks: {valid}")
|
| 373 |
+
return cls._TASKS[task_name]
|
| 374 |
+
|
| 375 |
+
def _task_success(self, score: float) -> bool:
|
| 376 |
+
if self._task_spec is None:
|
| 377 |
+
return score >= 0.7
|
| 378 |
+
return score >= self._task_spec.success_threshold
|
| 379 |
+
|
| 380 |
+
def grader_score(self, action: EcomAction) -> float:
|
| 381 |
+
if self._visible_case is None or self._hidden_case is None:
|
| 382 |
+
raise RuntimeError("Environment must be reset() before grader scoring")
|
| 383 |
+
score, _ = self._evaluate(action, self._visible_case, self._hidden_case)
|
| 384 |
+
return score
|
| 385 |
+
|
| 386 |
+
def get_metadata(self) -> "EnvironmentMetadata":
|
| 387 |
+
from openenv.core.env_server.types import EnvironmentMetadata
|
| 388 |
+
|
| 389 |
+
return EnvironmentMetadata(
|
| 390 |
+
name="ecom-returns-decision",
|
| 391 |
+
description=(
|
| 392 |
+
"Operational return-decision environment with policy constraints, "
|
| 393 |
+
"latent fraud risk, and cost-aware grading."
|
| 394 |
+
),
|
| 395 |
+
version="1.0.0",
|
| 396 |
+
author="OpenEnv_H",
|
| 397 |
+
documentation_url="https://huggingface.co/spaces/<repo-id>",
|
| 398 |
+
)
|
| 399 |
+
|
| 400 |
+
@staticmethod
|
| 401 |
+
def _rng(seed: Optional[int]):
|
| 402 |
+
import random
|
| 403 |
+
|
| 404 |
+
return random.Random(seed)
|
| 405 |
+
|
| 406 |
+
def _generate_case(self, rng) -> tuple[VisibleCase, HiddenCaseState]:
|
| 407 |
+
category = rng.choice(tuple(self._CATEGORY_POLICIES.keys()))
|
| 408 |
+
policy = self._CATEGORY_POLICIES[category]
|
| 409 |
+
|
| 410 |
+
return_reason = self._weighted_choice(
|
| 411 |
+
rng,
|
| 412 |
+
{
|
| 413 |
+
"defective": 0.24,
|
| 414 |
+
"wrong-item": 0.14,
|
| 415 |
+
"damaged-shipping": 0.12,
|
| 416 |
+
"changed-mind": 0.28,
|
| 417 |
+
"size-issue": 0.22,
|
| 418 |
+
},
|
| 419 |
+
)
|
| 420 |
+
value_bucket = self._weighted_choice(
|
| 421 |
+
rng,
|
| 422 |
+
{
|
| 423 |
+
"low": 0.40,
|
| 424 |
+
"medium": 0.40,
|
| 425 |
+
"high": 0.20,
|
| 426 |
+
},
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
days_since_purchase = rng.randint(0, 90)
|
| 430 |
+
user_account_age_days = rng.randint(15, 2200)
|
| 431 |
+
total_orders = rng.randint(2, 220)
|
| 432 |
+
|
| 433 |
+
# Mandatory behavioral signal.
|
| 434 |
+
return_rate = self._sample_return_rate(rng, total_orders)
|
| 435 |
+
|
| 436 |
+
# Mandatory correlations.
|
| 437 |
+
# 1) Higher return_rate -> higher fraud, lower return_rate -> lower fraud.
|
| 438 |
+
# 2) Higher product_value -> higher fraud, lower product_value -> lower fraud.
|
| 439 |
+
base_risk = self._cfg.fraud_probability
|
| 440 |
+
risk = base_risk
|
| 441 |
+
risk += 0.35 * (return_rate - 0.30)
|
| 442 |
+
risk += 0.10 * (self._VALUE_INDEX[value_bucket] - 1)
|
| 443 |
+
risk += 0.08 if return_reason == "changed-mind" else 0.0
|
| 444 |
+
risk -= (
|
| 445 |
+
0.10
|
| 446 |
+
if return_reason in ("defective", "wrong-item", "damaged-shipping")
|
| 447 |
+
else 0.0
|
| 448 |
+
)
|
| 449 |
+
risk += 0.10 if user_account_age_days < 90 else 0.0
|
| 450 |
+
fraud_risk_score = self._clamp01(risk)
|
| 451 |
+
|
| 452 |
+
# Intent depends on computed latent risk, not independent coin flips.
|
| 453 |
+
true_intent: Intent = (
|
| 454 |
+
"abusive" if rng.random() < fraud_risk_score else "genuine"
|
| 455 |
+
)
|
| 456 |
+
|
| 457 |
+
# Policy constraints with exception support.
|
| 458 |
+
exception_applies = return_reason in ("defective", "damaged-shipping")
|
| 459 |
+
|
| 460 |
+
category_policy_violated = False
|
| 461 |
+
if not exception_applies:
|
| 462 |
+
category_flag_prob = 0.10 + 0.12 * self._cfg.conflict_rate
|
| 463 |
+
if return_reason == "changed-mind":
|
| 464 |
+
category_flag_prob += 0.10
|
| 465 |
+
if rng.random() < self._clamp01(category_flag_prob):
|
| 466 |
+
policy_tag = rng.choice(policy.non_returnable)
|
| 467 |
+
category_policy_violated = True
|
| 468 |
+
else:
|
| 469 |
+
policy_tag = None
|
| 470 |
+
else:
|
| 471 |
+
policy_tag = None
|
| 472 |
+
|
| 473 |
+
time_policy_violated = (
|
| 474 |
+
days_since_purchase > policy.window_days and not exception_applies
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
is_ambiguous = rng.random() < self._cfg.ambiguity_rate
|
| 478 |
+
is_conflicting = rng.random() < self._cfg.conflict_rate
|
| 479 |
+
|
| 480 |
+
# Hardness is not just fraud probability; ambiguity and conflicts reshape signals.
|
| 481 |
+
condition_brief, condition_detailed = self._CONDITION_NOTES[return_reason]
|
| 482 |
+
if category_policy_violated and policy_tag is not None:
|
| 483 |
+
condition_brief += (
|
| 484 |
+
f"; order line is tagged under restricted class '{policy_tag}'"
|
| 485 |
+
)
|
| 486 |
+
condition_detailed += (
|
| 487 |
+
f"; policy audit confirms '{policy_tag}' item class on this order line"
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
if is_conflicting:
|
| 491 |
+
condition_brief = self._inject_conflict_signal(
|
| 492 |
+
condition_brief, return_reason
|
| 493 |
+
)
|
| 494 |
+
condition_detailed = self._inject_conflict_signal(
|
| 495 |
+
condition_detailed, return_reason
|
| 496 |
+
)
|
| 497 |
+
|
| 498 |
+
policy_summary = policy.summary()
|
| 499 |
+
if is_ambiguous:
|
| 500 |
+
policy_summary = self._make_policy_more_ambiguous(policy_summary)
|
| 501 |
+
|
| 502 |
+
visible = VisibleCase(
|
| 503 |
+
return_reason=return_reason,
|
| 504 |
+
product_category=category,
|
| 505 |
+
product_value=value_bucket,
|
| 506 |
+
days_since_purchase=days_since_purchase,
|
| 507 |
+
user_account_age_days=user_account_age_days,
|
| 508 |
+
product_condition_notes=condition_brief,
|
| 509 |
+
return_rate=return_rate,
|
| 510 |
+
total_orders=total_orders,
|
| 511 |
+
policy_summary=policy_summary,
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
financial_scores = self._financial_scores(
|
| 515 |
+
value_bucket=value_bucket,
|
| 516 |
+
intent=true_intent,
|
| 517 |
+
category_violation=category_policy_violated,
|
| 518 |
+
time_violation=time_policy_violated,
|
| 519 |
+
return_reason=return_reason,
|
| 520 |
+
)
|
| 521 |
+
optimal_action = self._argmax_action(financial_scores)
|
| 522 |
+
|
| 523 |
+
hidden = HiddenCaseState(
|
| 524 |
+
fraud_risk_score=fraud_risk_score,
|
| 525 |
+
true_intent=true_intent,
|
| 526 |
+
optimal_action=optimal_action,
|
| 527 |
+
cost_impact=financial_scores,
|
| 528 |
+
category_policy_violated=category_policy_violated,
|
| 529 |
+
time_policy_violated=time_policy_violated,
|
| 530 |
+
exception_applies=exception_applies,
|
| 531 |
+
is_ambiguous=is_ambiguous or is_conflicting,
|
| 532 |
+
)
|
| 533 |
+
return visible, hidden
|
| 534 |
+
|
| 535 |
+
@staticmethod
|
| 536 |
+
def _sample_return_rate(rng, total_orders: int) -> float:
|
| 537 |
+
band = rng.random()
|
| 538 |
+
if band < 0.60:
|
| 539 |
+
center = 0.12
|
| 540 |
+
spread = 0.08
|
| 541 |
+
elif band < 0.90:
|
| 542 |
+
center = 0.30
|
| 543 |
+
spread = 0.10
|
| 544 |
+
else:
|
| 545 |
+
center = 0.55
|
| 546 |
+
spread = 0.12
|
| 547 |
+
noise = (rng.random() * 2.0 - 1.0) * spread
|
| 548 |
+
historical_pressure = min(0.08, 8.0 / float(total_orders))
|
| 549 |
+
return EcomEnvironment._clamp01(center + noise + historical_pressure)
|
| 550 |
+
|
| 551 |
+
@staticmethod
|
| 552 |
+
def _inject_conflict_signal(text: str, reason: str) -> str:
|
| 553 |
+
if reason in ("defective", "damaged-shipping"):
|
| 554 |
+
return text + "; inspection has mixed indicators and partial evidence"
|
| 555 |
+
return text + "; customer claims conflict with available logistics notes"
|
| 556 |
+
|
| 557 |
+
@staticmethod
|
| 558 |
+
def _make_policy_more_ambiguous(text: str) -> str:
|
| 559 |
+
return (
|
| 560 |
+
text + " In borderline cases, consistency checks and risk controls apply."
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
@staticmethod
|
| 564 |
+
def _financial_scores(
|
| 565 |
+
*,
|
| 566 |
+
value_bucket: str,
|
| 567 |
+
intent: Intent,
|
| 568 |
+
category_violation: bool,
|
| 569 |
+
time_violation: bool,
|
| 570 |
+
return_reason: str,
|
| 571 |
+
) -> Dict[FinalAction, float]:
|
| 572 |
+
value_scale = {"low": 1.0, "medium": 1.7, "high": 2.6}[value_bucket]
|
| 573 |
+
|
| 574 |
+
approve_gain = 0.45
|
| 575 |
+
if return_reason in ("defective", "wrong-item", "damaged-shipping"):
|
| 576 |
+
approve_gain += 0.25
|
| 577 |
+
if intent == "abusive":
|
| 578 |
+
approve_gain -= 0.45 * value_scale
|
| 579 |
+
else:
|
| 580 |
+
approve_gain -= 0.15 * value_scale
|
| 581 |
+
if category_violation or time_violation:
|
| 582 |
+
approve_gain -= 0.35
|
| 583 |
+
|
| 584 |
+
reject_gain = 0.25
|
| 585 |
+
if intent == "abusive":
|
| 586 |
+
reject_gain += 0.35 * value_scale
|
| 587 |
+
else:
|
| 588 |
+
reject_gain -= 0.30
|
| 589 |
+
if return_reason in ("defective", "wrong-item", "damaged-shipping"):
|
| 590 |
+
reject_gain -= 0.20
|
| 591 |
+
|
| 592 |
+
escalate_gain = 0.30
|
| 593 |
+
escalate_gain -= 0.08 * value_scale
|
| 594 |
+
if intent == "abusive":
|
| 595 |
+
escalate_gain += 0.12
|
| 596 |
+
if return_reason in ("defective", "damaged-shipping"):
|
| 597 |
+
escalate_gain += 0.05
|
| 598 |
+
|
| 599 |
+
return {
|
| 600 |
+
"APPROVE": approve_gain,
|
| 601 |
+
"REJECT": reject_gain,
|
| 602 |
+
"ESCALATE": escalate_gain,
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
@staticmethod
|
| 606 |
+
def _argmax_action(scores: Dict[FinalAction, float]) -> FinalAction:
|
| 607 |
+
return max(scores.keys(), key=lambda k: scores[k])
|
| 608 |
+
|
| 609 |
+
def _evaluate(
|
| 610 |
+
self,
|
| 611 |
+
action: EcomAction,
|
| 612 |
+
visible: VisibleCase,
|
| 613 |
+
hidden: HiddenCaseState,
|
| 614 |
+
) -> tuple[float, Dict[str, Any]]:
|
| 615 |
+
policy_ok = self._policy_gate(action, visible, hidden)
|
| 616 |
+
if not policy_ok:
|
| 617 |
+
reward_model = EcomReward(
|
| 618 |
+
policy_gate=0.0,
|
| 619 |
+
financial_score=0.0,
|
| 620 |
+
fraud_score=0.0,
|
| 621 |
+
efficiency_score=0.0,
|
| 622 |
+
normalized_reward=0.0,
|
| 623 |
+
policy_violation=True,
|
| 624 |
+
)
|
| 625 |
+
return 0.0, reward_model.model_dump()
|
| 626 |
+
|
| 627 |
+
final_action = action.action_type
|
| 628 |
+
if final_action == "REJECT":
|
| 629 |
+
reason_bonus = 0.0
|
| 630 |
+
if hidden.time_policy_violated and action.reason_code == "TIME_EXPIRED":
|
| 631 |
+
reason_bonus = 0.05
|
| 632 |
+
elif (
|
| 633 |
+
hidden.category_policy_violated
|
| 634 |
+
and action.reason_code == "POLICY_VIOLATION"
|
| 635 |
+
):
|
| 636 |
+
reason_bonus = 0.05
|
| 637 |
+
elif (
|
| 638 |
+
hidden.true_intent == "abusive"
|
| 639 |
+
and action.reason_code == "SUSPECTED_FRAUD"
|
| 640 |
+
):
|
| 641 |
+
reason_bonus = 0.05
|
| 642 |
+
else:
|
| 643 |
+
reason_bonus = 0.0
|
| 644 |
+
|
| 645 |
+
trajectory_bonus = 0.0
|
| 646 |
+
if self._requested_info and hidden.is_ambiguous:
|
| 647 |
+
trajectory_bonus += 0.05
|
| 648 |
+
|
| 649 |
+
# Component scores are individually bounded [0, 1] before weighting.
|
| 650 |
+
financial_raw = (
|
| 651 |
+
hidden.cost_impact[final_action] + reason_bonus + trajectory_bonus
|
| 652 |
+
)
|
| 653 |
+
financial_score = self._normalize_financial(financial_raw)
|
| 654 |
+
|
| 655 |
+
fraud_score = self._fraud_component(final_action, hidden)
|
| 656 |
+
efficiency_score = self._efficiency_component(final_action)
|
| 657 |
+
|
| 658 |
+
final_reward = (
|
| 659 |
+
0.50 * financial_score + 0.30 * fraud_score + 0.20 * efficiency_score
|
| 660 |
+
)
|
| 661 |
+
final_reward = self._clamp01(final_reward)
|
| 662 |
+
|
| 663 |
+
reward_model = EcomReward(
|
| 664 |
+
policy_gate=1.0,
|
| 665 |
+
financial_score=financial_score,
|
| 666 |
+
fraud_score=fraud_score,
|
| 667 |
+
efficiency_score=efficiency_score,
|
| 668 |
+
normalized_reward=final_reward,
|
| 669 |
+
policy_violation=False,
|
| 670 |
+
optimal_action=hidden.optimal_action,
|
| 671 |
+
matched_optimal=final_action == hidden.optimal_action,
|
| 672 |
+
)
|
| 673 |
+
return final_reward, reward_model.model_dump()
|
| 674 |
+
|
| 675 |
+
@staticmethod
|
| 676 |
+
def _policy_gate(
|
| 677 |
+
action: EcomAction,
|
| 678 |
+
visible: VisibleCase,
|
| 679 |
+
hidden: HiddenCaseState,
|
| 680 |
+
) -> bool:
|
| 681 |
+
if action.action_type == "APPROVE":
|
| 682 |
+
if hidden.time_policy_violated or hidden.category_policy_violated:
|
| 683 |
+
return False
|
| 684 |
+
|
| 685 |
+
if action.action_type == "REJECT":
|
| 686 |
+
if hidden.time_policy_violated and hidden.category_policy_violated:
|
| 687 |
+
if action.reason_code not in ("TIME_EXPIRED", "POLICY_VIOLATION"):
|
| 688 |
+
return False
|
| 689 |
+
elif hidden.time_policy_violated:
|
| 690 |
+
if action.reason_code != "TIME_EXPIRED":
|
| 691 |
+
return False
|
| 692 |
+
elif hidden.category_policy_violated:
|
| 693 |
+
if action.reason_code != "POLICY_VIOLATION":
|
| 694 |
+
return False
|
| 695 |
+
else:
|
| 696 |
+
if action.reason_code in ("TIME_EXPIRED", "POLICY_VIOLATION"):
|
| 697 |
+
return False
|
| 698 |
+
|
| 699 |
+
# Prevent unsupported fraud accusation when fraud signal is very low.
|
| 700 |
+
if (
|
| 701 |
+
action.reason_code == "SUSPECTED_FRAUD"
|
| 702 |
+
and hidden.fraud_risk_score < 0.45
|
| 703 |
+
):
|
| 704 |
+
return False
|
| 705 |
+
|
| 706 |
+
# If no violation and no fraud signal, rejecting a clear service-failure claim is policy-inconsistent.
|
| 707 |
+
if (
|
| 708 |
+
action.action_type == "REJECT"
|
| 709 |
+
and not hidden.time_policy_violated
|
| 710 |
+
and not hidden.category_policy_violated
|
| 711 |
+
and hidden.fraud_risk_score < 0.30
|
| 712 |
+
and visible.return_reason in ("defective", "wrong-item", "damaged-shipping")
|
| 713 |
+
):
|
| 714 |
+
return False
|
| 715 |
+
|
| 716 |
+
return True
|
| 717 |
+
|
| 718 |
+
@staticmethod
|
| 719 |
+
def _normalize_financial(raw_value: float) -> float:
|
| 720 |
+
# Bound from approximately [-1.5, 1.5] into [0, 1] deterministically.
|
| 721 |
+
return EcomEnvironment._clamp01((raw_value + 1.5) / 3.0)
|
| 722 |
+
|
| 723 |
+
@staticmethod
|
| 724 |
+
def _fraud_component(final_action: FinalAction, hidden: HiddenCaseState) -> float:
|
| 725 |
+
risk = hidden.fraud_risk_score
|
| 726 |
+
if final_action == "REJECT":
|
| 727 |
+
if hidden.true_intent == "abusive":
|
| 728 |
+
return EcomEnvironment._clamp01(0.60 + 0.40 * risk)
|
| 729 |
+
return EcomEnvironment._clamp01(0.20 + 0.30 * (1.0 - risk))
|
| 730 |
+
|
| 731 |
+
if final_action == "APPROVE":
|
| 732 |
+
if hidden.true_intent == "genuine":
|
| 733 |
+
return EcomEnvironment._clamp01(0.65 + 0.35 * (1.0 - risk))
|
| 734 |
+
return EcomEnvironment._clamp01(0.10 + 0.20 * (1.0 - risk))
|
| 735 |
+
|
| 736 |
+
# ESCALATE
|
| 737 |
+
if hidden.true_intent == "abusive":
|
| 738 |
+
return EcomEnvironment._clamp01(0.50 + 0.30 * risk)
|
| 739 |
+
return EcomEnvironment._clamp01(0.45 + 0.25 * (1.0 - risk))
|
| 740 |
+
|
| 741 |
+
def _efficiency_component(self, final_action: FinalAction) -> float:
|
| 742 |
+
# Escalation and prior info requests incur efficiency penalty.
|
| 743 |
+
base = 1.0
|
| 744 |
+
if self._requested_info:
|
| 745 |
+
base -= 0.20
|
| 746 |
+
if final_action == "ESCALATE":
|
| 747 |
+
base -= 0.30
|
| 748 |
+
return self._clamp01(base)
|
| 749 |
+
|
| 750 |
+
def _refine_after_request_info(
|
| 751 |
+
self,
|
| 752 |
+
visible: VisibleCase,
|
| 753 |
+
hidden: HiddenCaseState,
|
| 754 |
+
) -> VisibleCase:
|
| 755 |
+
reason = visible.return_reason
|
| 756 |
+
if hidden.true_intent == "abusive":
|
| 757 |
+
refined_reason = (
|
| 758 |
+
"changed-mind" if reason in ("defective", "wrong-item") else reason
|
| 759 |
+
)
|
| 760 |
+
refined_notes = (
|
| 761 |
+
visible.product_condition_notes
|
| 762 |
+
+ "; follow-up review found no reproducible defect evidence"
|
| 763 |
+
)
|
| 764 |
+
else:
|
| 765 |
+
refined_reason = reason
|
| 766 |
+
refined_notes = (
|
| 767 |
+
self._CONDITION_NOTES[reason][1]
|
| 768 |
+
if reason in self._CONDITION_NOTES
|
| 769 |
+
else visible.product_condition_notes
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
# Deterministic, existing-field-only refinement.
|
| 773 |
+
refined_return_rate = self._clamp01(
|
| 774 |
+
visible.return_rate - 0.03
|
| 775 |
+
if hidden.true_intent == "genuine"
|
| 776 |
+
else visible.return_rate + 0.03
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
return VisibleCase(
|
| 780 |
+
return_reason=refined_reason,
|
| 781 |
+
product_category=visible.product_category,
|
| 782 |
+
product_value=visible.product_value,
|
| 783 |
+
days_since_purchase=visible.days_since_purchase,
|
| 784 |
+
user_account_age_days=visible.user_account_age_days,
|
| 785 |
+
product_condition_notes=refined_notes,
|
| 786 |
+
return_rate=refined_return_rate,
|
| 787 |
+
total_orders=visible.total_orders,
|
| 788 |
+
policy_summary=visible.policy_summary,
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
@staticmethod
|
| 792 |
+
def _to_observation(
|
| 793 |
+
case: VisibleCase,
|
| 794 |
+
*,
|
| 795 |
+
reward: Optional[float],
|
| 796 |
+
done: bool,
|
| 797 |
+
info: Dict[str, Any],
|
| 798 |
+
) -> EcomObservation:
|
| 799 |
+
return EcomObservation(
|
| 800 |
+
return_reason=case.return_reason,
|
| 801 |
+
product_category=case.product_category,
|
| 802 |
+
product_value=case.product_value,
|
| 803 |
+
days_since_purchase=case.days_since_purchase,
|
| 804 |
+
user_account_age_days=case.user_account_age_days,
|
| 805 |
+
product_condition_notes=case.product_condition_notes,
|
| 806 |
+
return_rate=case.return_rate,
|
| 807 |
+
total_orders=case.total_orders,
|
| 808 |
+
policy_summary=case.policy_summary,
|
| 809 |
+
reward=reward,
|
| 810 |
+
done=done,
|
| 811 |
+
info=info,
|
| 812 |
+
)
|
| 813 |
+
|
| 814 |
+
@staticmethod
|
| 815 |
+
def _weighted_choice(rng, distribution: Dict[str, float]) -> str:
|
| 816 |
+
threshold = rng.random()
|
| 817 |
+
cumulative = 0.0
|
| 818 |
+
last = None
|
| 819 |
+
for key, weight in distribution.items():
|
| 820 |
+
cumulative += weight
|
| 821 |
+
last = key
|
| 822 |
+
if threshold <= cumulative:
|
| 823 |
+
return key
|
| 824 |
+
assert last is not None
|
| 825 |
+
return last
|
| 826 |
|
| 827 |
+
@staticmethod
|
| 828 |
+
def _clamp01(value: float) -> float:
|
| 829 |
+
if value < 0.0:
|
| 830 |
+
return 0.0
|
| 831 |
+
if value > 1.0:
|
| 832 |
+
return 1.0
|
| 833 |
+
return float(value)
|