24122168-collab commited on
Commit Β·
6ba100e
1
Parent(s): a9bd4aa
Add application file
Browse files- Dockerfile +18 -0
- ai_server_admin/Dockerfile +80 -0
- ai_server_admin/README.md +255 -0
- ai_server_admin/__init__.py +16 -0
- ai_server_admin/__pycache__/__init__.cpython-312.pyc +0 -0
- ai_server_admin/__pycache__/client.cpython-312.pyc +0 -0
- ai_server_admin/__pycache__/models.cpython-312.pyc +0 -0
- ai_server_admin/client.py +99 -0
- ai_server_admin/models.py +27 -0
- ai_server_admin/openenv.yaml +7 -0
- ai_server_admin/pyproject.toml +45 -0
- ai_server_admin/server/__init__.py +11 -0
- ai_server_admin/server/__pycache__/__init__.cpython-312.pyc +0 -0
- ai_server_admin/server/__pycache__/ai_server_admin_environment.cpython-312.pyc +0 -0
- ai_server_admin/server/__pycache__/app.cpython-312.pyc +0 -0
- ai_server_admin/server/ai_server_admin_environment.py +104 -0
- ai_server_admin/server/app.py +84 -0
- ai_server_admin/server/requirements.txt +6 -0
- ai_server_admin/uv.lock +0 -0
- app.py +114 -0
- cdk/app.py +31 -0
- cdk/email_gatekeeper_stack.py +144 -0
- cdk/requirements.txt +2 -0
- env.py +405 -0
- environment.py +501 -0
- inference.py +189 -0
- lambda/classifier.py +118 -0
- lambda/handler.py +156 -0
- openenv.yaml +176 -0
- requirements.txt +8 -0
- sagemaker/classifier.py +158 -0
- sagemaker/deploy.py +285 -0
- sagemaker/inference.py +357 -0
- sagemaker/model.tar.gz +3 -0
- sagemaker/package.py +71 -0
- sagemaker/requirements.txt +10 -0
- sagemaker/upload_to_hf.py +79 -0
- test_cases_advanced.json +377 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#Base image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
#Set work directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
#Install dependencies
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
#Copy project files
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
#Expose the port Hugging Face expects
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
#Command to run FastAPI with uvicorn
|
| 18 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
ai_server_admin/Dockerfile
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=ai_server_admin
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
# Health check
|
| 75 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
+
|
| 78 |
+
# Run the FastAPI server
|
| 79 |
+
# The module path is constructed to work with the /app/env structure
|
| 80 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
ai_server_admin/README.md
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Ai Server Admin Environment Server
|
| 3 |
+
emoji: π½οΈ
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Ai Server Admin Environment
|
| 15 |
+
|
| 16 |
+
A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
|
| 17 |
+
|
| 18 |
+
## Quick Start
|
| 19 |
+
|
| 20 |
+
The simplest way to use the Ai Server Admin environment is through the `AiServerAdminEnv` class:
|
| 21 |
+
|
| 22 |
+
```python
|
| 23 |
+
from ai_server_admin import AiServerAdminAction, AiServerAdminEnv
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
# Create environment from Docker image
|
| 27 |
+
ai_server_adminenv = AiServerAdminEnv.from_docker_image("ai_server_admin-env:latest")
|
| 28 |
+
|
| 29 |
+
# Reset
|
| 30 |
+
result = ai_server_adminenv.reset()
|
| 31 |
+
print(f"Reset: {result.observation.echoed_message}")
|
| 32 |
+
|
| 33 |
+
# Send multiple messages
|
| 34 |
+
messages = ["Hello, World!", "Testing echo", "Final message"]
|
| 35 |
+
|
| 36 |
+
for msg in messages:
|
| 37 |
+
result = ai_server_adminenv.step(AiServerAdminAction(message=msg))
|
| 38 |
+
print(f"Sent: '{msg}'")
|
| 39 |
+
print(f" β Echoed: '{result.observation.echoed_message}'")
|
| 40 |
+
print(f" β Length: {result.observation.message_length}")
|
| 41 |
+
print(f" β Reward: {result.reward}")
|
| 42 |
+
|
| 43 |
+
finally:
|
| 44 |
+
# Always clean up
|
| 45 |
+
ai_server_adminenv.close()
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
That's it! The `AiServerAdminEnv.from_docker_image()` method handles:
|
| 49 |
+
- Starting the Docker container
|
| 50 |
+
- Waiting for the server to be ready
|
| 51 |
+
- Connecting to the environment
|
| 52 |
+
- Container cleanup when you call `close()`
|
| 53 |
+
|
| 54 |
+
## Building the Docker Image
|
| 55 |
+
|
| 56 |
+
Before using the environment, you need to build the Docker image:
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
# From project root
|
| 60 |
+
docker build -t ai_server_admin-env:latest -f server/Dockerfile .
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Deploying to Hugging Face Spaces
|
| 64 |
+
|
| 65 |
+
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# From the environment directory (where openenv.yaml is located)
|
| 69 |
+
openenv push
|
| 70 |
+
|
| 71 |
+
# Or specify options
|
| 72 |
+
openenv push --namespace my-org --private
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
The `openenv push` command will:
|
| 76 |
+
1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
|
| 77 |
+
2. Prepare a custom build for Hugging Face Docker space (enables web interface)
|
| 78 |
+
3. Upload to Hugging Face (ensuring you're logged in)
|
| 79 |
+
|
| 80 |
+
### Prerequisites
|
| 81 |
+
|
| 82 |
+
- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
|
| 83 |
+
|
| 84 |
+
### Options
|
| 85 |
+
|
| 86 |
+
- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
|
| 87 |
+
- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
|
| 88 |
+
- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
|
| 89 |
+
- `--private`: Deploy the space as private (default: public)
|
| 90 |
+
|
| 91 |
+
### Examples
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
|
| 95 |
+
openenv push
|
| 96 |
+
|
| 97 |
+
# Push to a specific repository
|
| 98 |
+
openenv push --repo-id my-org/my-env
|
| 99 |
+
|
| 100 |
+
# Push with a custom base image
|
| 101 |
+
openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
|
| 102 |
+
|
| 103 |
+
# Push as a private space
|
| 104 |
+
openenv push --private
|
| 105 |
+
|
| 106 |
+
# Combine options
|
| 107 |
+
openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
After deployment, your space will be available at:
|
| 111 |
+
`https://huggingface.co/spaces/<repo-id>`
|
| 112 |
+
|
| 113 |
+
The deployed space includes:
|
| 114 |
+
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 115 |
+
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 116 |
+
- **Health Check** at `/health` - Container health monitoring
|
| 117 |
+
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 118 |
+
|
| 119 |
+
## Environment Details
|
| 120 |
+
|
| 121 |
+
### Action
|
| 122 |
+
**AiServerAdminAction**: Contains a single field
|
| 123 |
+
- `message` (str) - The message to echo back
|
| 124 |
+
|
| 125 |
+
### Observation
|
| 126 |
+
**AiServerAdminObservation**: Contains the echo response and metadata
|
| 127 |
+
- `echoed_message` (str) - The message echoed back
|
| 128 |
+
- `message_length` (int) - Length of the message
|
| 129 |
+
- `reward` (float) - Reward based on message length (length Γ 0.1)
|
| 130 |
+
- `done` (bool) - Always False for echo environment
|
| 131 |
+
- `metadata` (dict) - Additional info like step count
|
| 132 |
+
|
| 133 |
+
### Reward
|
| 134 |
+
The reward is calculated as: `message_length Γ 0.1`
|
| 135 |
+
- "Hi" β reward: 0.2
|
| 136 |
+
- "Hello, World!" β reward: 1.3
|
| 137 |
+
- Empty message β reward: 0.0
|
| 138 |
+
|
| 139 |
+
## Advanced Usage
|
| 140 |
+
|
| 141 |
+
### Connecting to an Existing Server
|
| 142 |
+
|
| 143 |
+
If you already have a Ai Server Admin environment server running, you can connect directly:
|
| 144 |
+
|
| 145 |
+
```python
|
| 146 |
+
from ai_server_admin import AiServerAdminEnv
|
| 147 |
+
|
| 148 |
+
# Connect to existing server
|
| 149 |
+
ai_server_adminenv = AiServerAdminEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 150 |
+
|
| 151 |
+
# Use as normal
|
| 152 |
+
result = ai_server_adminenv.reset()
|
| 153 |
+
result = ai_server_adminenv.step(AiServerAdminAction(message="Hello!"))
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
Note: When connecting to an existing server, `ai_server_adminenv.close()` will NOT stop the server.
|
| 157 |
+
|
| 158 |
+
### Using the Context Manager
|
| 159 |
+
|
| 160 |
+
The client supports context manager usage for automatic connection management:
|
| 161 |
+
|
| 162 |
+
```python
|
| 163 |
+
from ai_server_admin import AiServerAdminAction, AiServerAdminEnv
|
| 164 |
+
|
| 165 |
+
# Connect with context manager (auto-connects and closes)
|
| 166 |
+
with AiServerAdminEnv(base_url="http://localhost:8000") as env:
|
| 167 |
+
result = env.reset()
|
| 168 |
+
print(f"Reset: {result.observation.echoed_message}")
|
| 169 |
+
# Multiple steps with low latency
|
| 170 |
+
for msg in ["Hello", "World", "!"]:
|
| 171 |
+
result = env.step(AiServerAdminAction(message=msg))
|
| 172 |
+
print(f"Echoed: {result.observation.echoed_message}")
|
| 173 |
+
```
|
| 174 |
+
|
| 175 |
+
The client uses WebSocket connections for:
|
| 176 |
+
- **Lower latency**: No HTTP connection overhead per request
|
| 177 |
+
- **Persistent session**: Server maintains your environment state
|
| 178 |
+
- **Efficient for episodes**: Better for many sequential steps
|
| 179 |
+
|
| 180 |
+
### Concurrent WebSocket Sessions
|
| 181 |
+
|
| 182 |
+
The server supports multiple concurrent WebSocket connections. To enable this,
|
| 183 |
+
modify `server/app.py` to use factory mode:
|
| 184 |
+
|
| 185 |
+
```python
|
| 186 |
+
# In server/app.py - use factory mode for concurrent sessions
|
| 187 |
+
app = create_app(
|
| 188 |
+
AiServerAdminEnvironment, # Pass class, not instance
|
| 189 |
+
AiServerAdminAction,
|
| 190 |
+
AiServerAdminObservation,
|
| 191 |
+
max_concurrent_envs=4, # Allow 4 concurrent sessions
|
| 192 |
+
)
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
Then multiple clients can connect simultaneously:
|
| 196 |
+
|
| 197 |
+
```python
|
| 198 |
+
from ai_server_admin import AiServerAdminAction, AiServerAdminEnv
|
| 199 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 200 |
+
|
| 201 |
+
def run_episode(client_id: int):
|
| 202 |
+
with AiServerAdminEnv(base_url="http://localhost:8000") as env:
|
| 203 |
+
result = env.reset()
|
| 204 |
+
for i in range(10):
|
| 205 |
+
result = env.step(AiServerAdminAction(message=f"Client {client_id}, step {i}"))
|
| 206 |
+
return client_id, result.observation.message_length
|
| 207 |
+
|
| 208 |
+
# Run 4 episodes concurrently
|
| 209 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
| 210 |
+
results = list(executor.map(run_episode, range(4)))
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
## Development & Testing
|
| 214 |
+
|
| 215 |
+
### Direct Environment Testing
|
| 216 |
+
|
| 217 |
+
Test the environment logic directly without starting the HTTP server:
|
| 218 |
+
|
| 219 |
+
```bash
|
| 220 |
+
# From the server directory
|
| 221 |
+
python3 server/ai_server_admin_environment.py
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
This verifies that:
|
| 225 |
+
- Environment resets correctly
|
| 226 |
+
- Step executes actions properly
|
| 227 |
+
- State tracking works
|
| 228 |
+
- Rewards are calculated correctly
|
| 229 |
+
|
| 230 |
+
### Running Locally
|
| 231 |
+
|
| 232 |
+
Run the server locally for development:
|
| 233 |
+
|
| 234 |
+
```bash
|
| 235 |
+
uvicorn server.app:app --reload
|
| 236 |
+
```
|
| 237 |
+
|
| 238 |
+
## Project Structure
|
| 239 |
+
|
| 240 |
+
```
|
| 241 |
+
ai_server_admin/
|
| 242 |
+
βββ .dockerignore # Docker build exclusions
|
| 243 |
+
βββ __init__.py # Module exports
|
| 244 |
+
βββ README.md # This file
|
| 245 |
+
βββ openenv.yaml # OpenEnv manifest
|
| 246 |
+
βββ pyproject.toml # Project metadata and dependencies
|
| 247 |
+
βββ uv.lock # Locked dependencies (generated)
|
| 248 |
+
βββ client.py # AiServerAdminEnv client
|
| 249 |
+
βββ models.py # Action and Observation models
|
| 250 |
+
βββ server/
|
| 251 |
+
βββ __init__.py # Server module exports
|
| 252 |
+
βββ ai_server_admin_environment.py # Core environment logic
|
| 253 |
+
βββ app.py # FastAPI application (HTTP + WebSocket endpoints)
|
| 254 |
+
βββ Dockerfile # Container image definition
|
| 255 |
+
```
|
ai_server_admin/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ai Server Admin Environment."""
|
| 8 |
+
|
| 9 |
+
from .client import AiServerAdminEnv
|
| 10 |
+
from .models import AiServerAdminAction, AiServerAdminObservation
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"AiServerAdminAction",
|
| 14 |
+
"AiServerAdminObservation",
|
| 15 |
+
"AiServerAdminEnv",
|
| 16 |
+
]
|
ai_server_admin/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (458 Bytes). View file
|
|
|
ai_server_admin/__pycache__/client.cpython-312.pyc
ADDED
|
Binary file (4.02 kB). View file
|
|
|
ai_server_admin/__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (1.4 kB). View file
|
|
|
ai_server_admin/client.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ai Server Admin Environment Client."""
|
| 8 |
+
|
| 9 |
+
from typing import Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
from .models import AiServerAdminAction, AiServerAdminObservation
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AiServerAdminEnv(
|
| 19 |
+
EnvClient[AiServerAdminAction, AiServerAdminObservation, State]
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Client for the Ai Server Admin Environment.
|
| 23 |
+
|
| 24 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
+
enabling efficient multi-step interactions with lower latency.
|
| 26 |
+
Each client instance has its own dedicated environment session on the server.
|
| 27 |
+
|
| 28 |
+
Example:
|
| 29 |
+
>>> # Connect to a running server
|
| 30 |
+
>>> with AiServerAdminEnv(base_url="http://localhost:8000") as client:
|
| 31 |
+
... result = client.reset()
|
| 32 |
+
... print(result.observation.echoed_message)
|
| 33 |
+
...
|
| 34 |
+
... result = client.step(AiServerAdminAction(message="Hello!"))
|
| 35 |
+
... print(result.observation.echoed_message)
|
| 36 |
+
|
| 37 |
+
Example with Docker:
|
| 38 |
+
>>> # Automatically start container and connect
|
| 39 |
+
>>> client = AiServerAdminEnv.from_docker_image("ai_server_admin-env:latest")
|
| 40 |
+
>>> try:
|
| 41 |
+
... result = client.reset()
|
| 42 |
+
... result = client.step(AiServerAdminAction(message="Test"))
|
| 43 |
+
... finally:
|
| 44 |
+
... client.close()
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def _step_payload(self, action: AiServerAdminAction) -> Dict:
|
| 48 |
+
"""
|
| 49 |
+
Convert AiServerAdminAction to JSON payload for step message.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
action: AiServerAdminAction instance
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Dictionary representation suitable for JSON encoding
|
| 56 |
+
"""
|
| 57 |
+
return {
|
| 58 |
+
"message": action.message,
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
def _parse_result(self, payload: Dict) -> StepResult[AiServerAdminObservation]:
|
| 62 |
+
"""
|
| 63 |
+
Parse server response into StepResult[AiServerAdminObservation].
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
payload: JSON response data from server
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
StepResult with AiServerAdminObservation
|
| 70 |
+
"""
|
| 71 |
+
obs_data = payload.get("observation", {})
|
| 72 |
+
observation = AiServerAdminObservation(
|
| 73 |
+
echoed_message=obs_data.get("echoed_message", ""),
|
| 74 |
+
message_length=obs_data.get("message_length", 0),
|
| 75 |
+
done=payload.get("done", False),
|
| 76 |
+
reward=payload.get("reward"),
|
| 77 |
+
metadata=obs_data.get("metadata", {}),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
return StepResult(
|
| 81 |
+
observation=observation,
|
| 82 |
+
reward=payload.get("reward"),
|
| 83 |
+
done=payload.get("done", False),
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 87 |
+
"""
|
| 88 |
+
Parse server response into State object.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
payload: JSON response from state request
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
State object with episode_id and step_count
|
| 95 |
+
"""
|
| 96 |
+
return State(
|
| 97 |
+
episode_id=payload.get("episode_id"),
|
| 98 |
+
step_count=payload.get("step_count", 0),
|
| 99 |
+
)
|
ai_server_admin/models.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Ai Server Admin Environment.
|
| 9 |
+
|
| 10 |
+
The ai_server_admin environment is a simple test environment that echoes back messages.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from openenv.core.env_server.types import Action, Observation
|
| 14 |
+
from pydantic import Field
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class AiServerAdminAction(Action):
|
| 18 |
+
"""Action for the Ai Server Admin environment - just a message to echo."""
|
| 19 |
+
|
| 20 |
+
message: str = Field(..., description="Message to echo back")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class AiServerAdminObservation(Observation):
|
| 24 |
+
"""Observation from the Ai Server Admin environment - the echoed message."""
|
| 25 |
+
|
| 26 |
+
echoed_message: str = Field(default="", description="The echoed message")
|
| 27 |
+
message_length: int = Field(default=0, description="Length of the echoed message")
|
ai_server_admin/openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: ai_server_admin
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
ai_server_admin/pyproject.toml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-ai_server_admin"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Ai Server Admin environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
# "numpy>=1.19.0",
|
| 25 |
+
# "torch>=2.0.0",
|
| 26 |
+
# "gymnasium>=0.29.0",
|
| 27 |
+
# "openspiel>=1.0.0",
|
| 28 |
+
# "smolagents>=1.22.0,<2",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
[project.optional-dependencies]
|
| 32 |
+
dev = [
|
| 33 |
+
"pytest>=8.0.0",
|
| 34 |
+
"pytest-cov>=4.0.0",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.scripts]
|
| 38 |
+
# Server entry point - enables running via: uv run --project . server
|
| 39 |
+
# or: python -m ai_server_admin.server.app
|
| 40 |
+
server = "ai_server_admin.server.app:main"
|
| 41 |
+
|
| 42 |
+
[tool.setuptools]
|
| 43 |
+
include-package-data = true
|
| 44 |
+
packages = ["ai_server_admin", "ai_server_admin.server"]
|
| 45 |
+
package-dir = { "ai_server_admin" = ".", "ai_server_admin.server" = "server" }
|
ai_server_admin/server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Ai Server Admin environment server components."""
|
| 8 |
+
|
| 9 |
+
from .ai_server_admin_environment import AiServerAdminEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["AiServerAdminEnvironment"]
|
ai_server_admin/server/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (400 Bytes). View file
|
|
|
ai_server_admin/server/__pycache__/ai_server_admin_environment.cpython-312.pyc
ADDED
|
Binary file (3.95 kB). View file
|
|
|
ai_server_admin/server/__pycache__/app.cpython-312.pyc
ADDED
|
Binary file (2.86 kB). View file
|
|
|
ai_server_admin/server/ai_server_admin_environment.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Ai Server Admin Environment Implementation.
|
| 9 |
+
|
| 10 |
+
A simple test environment that echoes back messages sent to it.
|
| 11 |
+
Perfect for testing HTTP server infrastructure.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from uuid import uuid4
|
| 15 |
+
|
| 16 |
+
from openenv.core.env_server.interfaces import Environment
|
| 17 |
+
from openenv.core.env_server.types import State
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
from ..models import AiServerAdminAction, AiServerAdminObservation
|
| 21 |
+
except ImportError:
|
| 22 |
+
from models import AiServerAdminAction, AiServerAdminObservation
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class AiServerAdminEnvironment(Environment):
|
| 26 |
+
"""
|
| 27 |
+
A simple echo environment that echoes back messages.
|
| 28 |
+
|
| 29 |
+
This environment is designed for testing the HTTP server infrastructure.
|
| 30 |
+
It maintains minimal state and simply echoes back whatever message it receives.
|
| 31 |
+
|
| 32 |
+
Example:
|
| 33 |
+
>>> env = AiServerAdminEnvironment()
|
| 34 |
+
>>> obs = env.reset()
|
| 35 |
+
>>> print(obs.echoed_message) # "Ai Server Admin environment ready!"
|
| 36 |
+
>>>
|
| 37 |
+
>>> obs = env.step(AiServerAdminAction(message="Hello"))
|
| 38 |
+
>>> print(obs.echoed_message) # "Hello"
|
| 39 |
+
>>> print(obs.message_length) # 5
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
# Enable concurrent WebSocket sessions.
|
| 43 |
+
# Set to True if your environment isolates state between instances.
|
| 44 |
+
# When True, multiple WebSocket clients can connect simultaneously, each
|
| 45 |
+
# getting their own environment instance (when using factory mode in app.py).
|
| 46 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 47 |
+
|
| 48 |
+
def __init__(self):
|
| 49 |
+
"""Initialize the ai_server_admin environment."""
|
| 50 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 51 |
+
self._reset_count = 0
|
| 52 |
+
|
| 53 |
+
def reset(self) -> AiServerAdminObservation:
|
| 54 |
+
"""
|
| 55 |
+
Reset the environment.
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
AiServerAdminObservation with a ready message
|
| 59 |
+
"""
|
| 60 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 61 |
+
self._reset_count += 1
|
| 62 |
+
|
| 63 |
+
return AiServerAdminObservation(
|
| 64 |
+
echoed_message="Ai Server Admin environment ready!",
|
| 65 |
+
message_length=0,
|
| 66 |
+
done=False,
|
| 67 |
+
reward=0.0,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
def step(self, action: AiServerAdminAction) -> AiServerAdminObservation: # type: ignore[override]
|
| 71 |
+
"""
|
| 72 |
+
Execute a step in the environment by echoing the message.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
action: AiServerAdminAction containing the message to echo
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
AiServerAdminObservation with the echoed message and its length
|
| 79 |
+
"""
|
| 80 |
+
self._state.step_count += 1
|
| 81 |
+
|
| 82 |
+
message = action.message
|
| 83 |
+
length = len(message)
|
| 84 |
+
|
| 85 |
+
# Simple reward: longer messages get higher rewards
|
| 86 |
+
reward = length * 0.1
|
| 87 |
+
|
| 88 |
+
return AiServerAdminObservation(
|
| 89 |
+
echoed_message=message,
|
| 90 |
+
message_length=length,
|
| 91 |
+
done=False,
|
| 92 |
+
reward=reward,
|
| 93 |
+
metadata={"original_message": message, "step": self._state.step_count},
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
@property
|
| 97 |
+
def state(self) -> State:
|
| 98 |
+
"""
|
| 99 |
+
Get the current environment state.
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
Current State with episode_id and step_count
|
| 103 |
+
"""
|
| 104 |
+
return self._state
|
ai_server_admin/server/app.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Ai Server Admin Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the AiServerAdminEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from openenv.core.env_server.http_server import create_app
|
| 33 |
+
except Exception as e: # pragma: no cover
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
+
) from e
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from ..models import AiServerAdminAction, AiServerAdminObservation
|
| 40 |
+
from .ai_server_admin_environment import AiServerAdminEnvironment
|
| 41 |
+
except ModuleNotFoundError:
|
| 42 |
+
from models import AiServerAdminAction, AiServerAdminObservation
|
| 43 |
+
from server.ai_server_admin_environment import AiServerAdminEnvironment
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Create the app with web interface and README integration
|
| 47 |
+
app = create_app(
|
| 48 |
+
AiServerAdminEnvironment,
|
| 49 |
+
AiServerAdminAction,
|
| 50 |
+
AiServerAdminObservation,
|
| 51 |
+
env_name="ai_server_admin",
|
| 52 |
+
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 57 |
+
"""
|
| 58 |
+
Entry point for direct execution via uv run or python -m.
|
| 59 |
+
|
| 60 |
+
This function enables running the server without Docker:
|
| 61 |
+
uv run --project . server
|
| 62 |
+
uv run --project . server --port 8001
|
| 63 |
+
python -m ai_server_admin.server.app
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
host: Host address to bind to (default: "0.0.0.0")
|
| 67 |
+
port: Port number to listen on (default: 8000)
|
| 68 |
+
|
| 69 |
+
For production deployments, consider using uvicorn directly with
|
| 70 |
+
multiple workers:
|
| 71 |
+
uvicorn ai_server_admin.server.app:app --workers 4
|
| 72 |
+
"""
|
| 73 |
+
import uvicorn
|
| 74 |
+
|
| 75 |
+
uvicorn.run(app, host=host, port=port)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
import argparse
|
| 80 |
+
|
| 81 |
+
parser = argparse.ArgumentParser()
|
| 82 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 83 |
+
args = parser.parse_args()
|
| 84 |
+
main(port=args.port)
|
ai_server_admin/server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
ai_server_admin/uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py β Gradio Web Interface for Hugging Face Spaces
|
| 3 |
+
=====================================================
|
| 4 |
+
Provides an interactive demo of the Email Gatekeeper RL environment.
|
| 5 |
+
Hugging Face Spaces serves this on port 7860 automatically.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import numpy as np
|
| 10 |
+
from env import (
|
| 11 |
+
EmailTriageEnv, TASK_SPLITS,
|
| 12 |
+
URGENCY_LABELS, ROUTING_LABELS, RESOLUTION_LABELS,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
_LEGAL_SECURITY_KW = {"lawsuit", "attorney", "sue", "ransomware", "extortion"}
|
| 16 |
+
_BILLING_ESCALATE_KW = {"refund"}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _classify(email: dict) -> np.ndarray:
|
| 20 |
+
kw = set(email.get("keywords", []))
|
| 21 |
+
context = email.get("context", "").lower()
|
| 22 |
+
if context == "legal" or kw & {"lawsuit", "attorney", "sue"}:
|
| 23 |
+
return np.array([2, 2, 2], dtype=np.int64)
|
| 24 |
+
if context == "security":
|
| 25 |
+
if kw & _LEGAL_SECURITY_KW or ("hacked" in kw and "breach" in kw):
|
| 26 |
+
return np.array([2, 2, 2], dtype=np.int64)
|
| 27 |
+
return np.array([2, 1, 2], dtype=np.int64)
|
| 28 |
+
if context == "billing":
|
| 29 |
+
return np.array([1, 2, 2] if kw & _BILLING_ESCALATE_KW
|
| 30 |
+
else [1, 0, 1], dtype=np.int64)
|
| 31 |
+
if context == "tech" or kw & {"crash", "error", "bug", "slow"}:
|
| 32 |
+
return np.array([0, 1, 1], dtype=np.int64)
|
| 33 |
+
return np.array([0, 0, 0], dtype=np.int64)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def run_task_demo(task: str) -> str:
|
| 37 |
+
env = EmailTriageEnv(task=task, shuffle=False)
|
| 38 |
+
env.reset(seed=42)
|
| 39 |
+
email_queue = list(env._queue)
|
| 40 |
+
|
| 41 |
+
lines = []
|
| 42 |
+
cumulative = 0.0
|
| 43 |
+
terminated = False
|
| 44 |
+
step = 0
|
| 45 |
+
|
| 46 |
+
while not terminated:
|
| 47 |
+
email = email_queue[step]
|
| 48 |
+
action = _classify(email)
|
| 49 |
+
_, norm_reward, terminated, _, info = env.step(action)
|
| 50 |
+
cumulative += norm_reward
|
| 51 |
+
|
| 52 |
+
raw = info["raw_reward"]
|
| 53 |
+
ca = info["correct_actions"]
|
| 54 |
+
|
| 55 |
+
verdict = ("β
EXACT" if raw >= 1.0 else
|
| 56 |
+
"πΆ PARTIAL" if raw > 0 else
|
| 57 |
+
"π¨ SECURITY MISS" if raw < 0 else "β WRONG")
|
| 58 |
+
|
| 59 |
+
lines.append(
|
| 60 |
+
f"#{step+1:02d} [{email['difficulty'].upper()}] "
|
| 61 |
+
f"{email['description'][:40]}\n"
|
| 62 |
+
f" Predicted : {URGENCY_LABELS[action[0]]} | "
|
| 63 |
+
f"{ROUTING_LABELS[action[1]]} | {RESOLUTION_LABELS[action[2]]}\n"
|
| 64 |
+
f" Correct : {URGENCY_LABELS[ca[0]]} | "
|
| 65 |
+
f"{ROUTING_LABELS[ca[1]]} | {RESOLUTION_LABELS[ca[2]]}\n"
|
| 66 |
+
f" Reward : {raw:+.1f} {verdict}\n"
|
| 67 |
+
)
|
| 68 |
+
step += 1
|
| 69 |
+
|
| 70 |
+
final = max(0.0, min(1.0, cumulative))
|
| 71 |
+
lines.append(f"\n{'β'*50}")
|
| 72 |
+
lines.append(f"Final Score : {final:.3f} / 1.0")
|
| 73 |
+
return "\n".join(lines)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
with gr.Blocks(title="Email Gatekeeper RL") as demo:
|
| 77 |
+
gr.Markdown("""
|
| 78 |
+
# π§ Email Gatekeeper β RL Environment Demo
|
| 79 |
+
**Meta x PyTorch Hackathon** | Gymnasium-based email triage agent
|
| 80 |
+
|
| 81 |
+
The agent classifies each email across **3 simultaneous dimensions**:
|
| 82 |
+
`Urgency` Γ `Department` Γ `Resolution Action`
|
| 83 |
+
""")
|
| 84 |
+
|
| 85 |
+
with gr.Row():
|
| 86 |
+
task_dropdown = gr.Dropdown(
|
| 87 |
+
choices=["easy", "medium", "hard"],
|
| 88 |
+
value="easy",
|
| 89 |
+
label="Select Task",
|
| 90 |
+
)
|
| 91 |
+
run_btn = gr.Button("βΆ Run Episode", variant="primary")
|
| 92 |
+
|
| 93 |
+
output_box = gr.Textbox(
|
| 94 |
+
label="Episode Results",
|
| 95 |
+
lines=30,
|
| 96 |
+
max_lines=50,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
run_btn.click(fn=run_task_demo, inputs=task_dropdown, outputs=output_box)
|
| 100 |
+
|
| 101 |
+
gr.Markdown("""
|
| 102 |
+
### Reward Function
|
| 103 |
+
| Result | Reward |
|
| 104 |
+
|---|---|
|
| 105 |
+
| β
Exact Match (all 3 correct) | +1.0 |
|
| 106 |
+
| πΆ Partial (urgency correct, 1 wrong) | +0.2 |
|
| 107 |
+
| πΆ Partial (urgency correct, 2 wrong) | +0.1 |
|
| 108 |
+
| π¨ Security Miss | **-2.0** |
|
| 109 |
+
| β Wrong urgency | 0.0 |
|
| 110 |
+
""")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
cdk/app.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
app.py β CDK application entry point.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
cd cdk
|
| 7 |
+
pip install aws-cdk-lib constructs
|
| 8 |
+
cdk bootstrap # first time only
|
| 9 |
+
cdk synth # preview CloudFormation template
|
| 10 |
+
cdk deploy # deploy to AWS
|
| 11 |
+
cdk destroy # tear down all resources
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import aws_cdk as cdk
|
| 15 |
+
from email_gatekeeper_stack import EmailGatekeeperStack
|
| 16 |
+
|
| 17 |
+
app = cdk.App()
|
| 18 |
+
|
| 19 |
+
EmailGatekeeperStack(
|
| 20 |
+
app,
|
| 21 |
+
"EmailGatekeeperStack",
|
| 22 |
+
# Pin to a specific account + region to avoid environment-agnostic limitations
|
| 23 |
+
# (required for SES receipt rules and S3 bucket policies).
|
| 24 |
+
# Replace with your actual AWS account ID and preferred region.
|
| 25 |
+
env=cdk.Environment(
|
| 26 |
+
account="123456789012", # β replace with your AWS account ID
|
| 27 |
+
region="us-east-1", # β SES inbound is only available in us-east-1,
|
| 28 |
+
), # us-west-2, and eu-west-1
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
app.synth()
|
cdk/email_gatekeeper_stack.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
email_gatekeeper_stack.py β AWS CDK Stack for the Email Gatekeeper.
|
| 3 |
+
|
| 4 |
+
Resources created:
|
| 5 |
+
- S3 bucket : receives raw .eml files from SES
|
| 6 |
+
- Lambda function : classifies each email using the rule-based engine
|
| 7 |
+
- DynamoDB table : stores every triage result (email_id as partition key)
|
| 8 |
+
- SNS topic : fires an alert whenever a Security Breach is detected
|
| 9 |
+
- SES receipt rule : routes inbound email β S3 bucket (requires verified domain)
|
| 10 |
+
- IAM roles/policies : least-privilege access for Lambda β S3, DynamoDB, SNS
|
| 11 |
+
|
| 12 |
+
Deploy:
|
| 13 |
+
cd cdk
|
| 14 |
+
pip install aws-cdk-lib constructs
|
| 15 |
+
cdk bootstrap # first time only per account/region
|
| 16 |
+
cdk deploy
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import aws_cdk as cdk
|
| 20 |
+
from aws_cdk import (
|
| 21 |
+
Stack,
|
| 22 |
+
Duration,
|
| 23 |
+
RemovalPolicy,
|
| 24 |
+
aws_s3 as s3,
|
| 25 |
+
aws_lambda as lambda_,
|
| 26 |
+
aws_dynamodb as dynamodb,
|
| 27 |
+
aws_sns as sns,
|
| 28 |
+
aws_sns_subscriptions as sns_subs,
|
| 29 |
+
aws_s3_notifications as s3n,
|
| 30 |
+
aws_ses as ses,
|
| 31 |
+
aws_ses_actions as ses_actions,
|
| 32 |
+
aws_iam as iam,
|
| 33 |
+
)
|
| 34 |
+
from constructs import Construct
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class EmailGatekeeperStack(Stack):
|
| 38 |
+
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
|
| 39 |
+
super().__init__(scope, construct_id, **kwargs)
|
| 40 |
+
|
| 41 |
+
# ββ 1. S3 bucket β stores raw .eml files delivered by SES βββββββββββββ
|
| 42 |
+
email_bucket = s3.Bucket(
|
| 43 |
+
self, "EmailBucket",
|
| 44 |
+
bucket_name=f"email-gatekeeper-inbox-{self.account}",
|
| 45 |
+
# Block all public access β emails are private
|
| 46 |
+
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
|
| 47 |
+
encryption=s3.BucketEncryption.S3_MANAGED,
|
| 48 |
+
# Auto-delete raw emails after 30 days to control storage costs
|
| 49 |
+
lifecycle_rules=[
|
| 50 |
+
s3.LifecycleRule(expiration=Duration.days(30))
|
| 51 |
+
],
|
| 52 |
+
removal_policy=RemovalPolicy.RETAIN, # keep emails if stack is deleted
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# ββ 2. DynamoDB table β persists every triage decision βββββββββββββββββ
|
| 56 |
+
results_table = dynamodb.Table(
|
| 57 |
+
self, "EmailResultsTable",
|
| 58 |
+
table_name="EmailTriageResults",
|
| 59 |
+
partition_key=dynamodb.Attribute(
|
| 60 |
+
name="email_id",
|
| 61 |
+
type=dynamodb.AttributeType.STRING,
|
| 62 |
+
),
|
| 63 |
+
billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST, # serverless billing
|
| 64 |
+
removal_policy=RemovalPolicy.RETAIN,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# ββ 3. SNS topic β security breach alerts βββββββββββββββββββββββββββββ
|
| 68 |
+
security_topic = sns.Topic(
|
| 69 |
+
self, "SecurityAlertTopic",
|
| 70 |
+
topic_name="EmailGatekeeperSecurityAlerts",
|
| 71 |
+
display_name="Email Gatekeeper β Security Breach Alerts",
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# Add your alert email here β replace with a real address
|
| 75 |
+
security_topic.add_subscription(
|
| 76 |
+
sns_subs.EmailSubscription("security-team@your-domain.com")
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# ββ 4. Lambda function βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
classifier_fn = lambda_.Function(
|
| 81 |
+
self, "EmailClassifierFn",
|
| 82 |
+
function_name="EmailGatekeeperClassifier",
|
| 83 |
+
runtime=lambda_.Runtime.PYTHON_3_12,
|
| 84 |
+
# Points to the ../lambda/ directory β CDK zips it automatically
|
| 85 |
+
code=lambda_.Code.from_asset("../lambda"),
|
| 86 |
+
handler="handler.lambda_handler",
|
| 87 |
+
timeout=Duration.seconds(30),
|
| 88 |
+
memory_size=256, # classifier is CPU-light
|
| 89 |
+
environment={
|
| 90 |
+
"EMAIL_RESULTS_TABLE": results_table.table_name,
|
| 91 |
+
"SECURITY_ALERT_TOPIC_ARN": security_topic.topic_arn,
|
| 92 |
+
},
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Grant Lambda least-privilege access to each resource
|
| 96 |
+
email_bucket.grant_read(classifier_fn)
|
| 97 |
+
results_table.grant_write_data(classifier_fn)
|
| 98 |
+
security_topic.grant_publish(classifier_fn)
|
| 99 |
+
|
| 100 |
+
# ββ 5. S3 β Lambda trigger βββββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
+
# Fires whenever SES drops a new .eml into the bucket
|
| 102 |
+
email_bucket.add_event_notification(
|
| 103 |
+
s3.EventType.OBJECT_CREATED,
|
| 104 |
+
s3n.LambdaDestination(classifier_fn),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# ββ 6. SES receipt rule β routes inbound email to S3 ββββββββββββββββββ
|
| 108 |
+
# IMPORTANT: your domain must be verified in SES before this works.
|
| 109 |
+
# Replace "mail.your-domain.com" with your actual verified domain.
|
| 110 |
+
rule_set = ses.ReceiptRuleSet(
|
| 111 |
+
self, "EmailRuleSet",
|
| 112 |
+
rule_set_name="EmailGatekeeperRuleSet",
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
rule_set.add_rule(
|
| 116 |
+
"StoreInS3Rule",
|
| 117 |
+
recipients=["inbox@mail.your-domain.com"], # β replace with your address
|
| 118 |
+
actions=[
|
| 119 |
+
ses_actions.S3(
|
| 120 |
+
bucket=email_bucket,
|
| 121 |
+
object_key_prefix="incoming/", # all emails land under incoming/
|
| 122 |
+
)
|
| 123 |
+
],
|
| 124 |
+
scan_enabled=True, # enable SES spam/virus scanning
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# ββ 7. Allow SES to write to the S3 bucket ββββββββββββββββββββββββββββ
|
| 128 |
+
email_bucket.add_to_resource_policy(
|
| 129 |
+
iam.PolicyStatement(
|
| 130 |
+
sid="AllowSESPuts",
|
| 131 |
+
principals=[iam.ServicePrincipal("ses.amazonaws.com")],
|
| 132 |
+
actions=["s3:PutObject"],
|
| 133 |
+
resources=[email_bucket.arn_for_objects("incoming/*")],
|
| 134 |
+
conditions={
|
| 135 |
+
"StringEquals": {"aws:SourceAccount": self.account}
|
| 136 |
+
},
|
| 137 |
+
)
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# ββ 8. CloudFormation outputs β useful after deploy ββββββββββββββββββββ
|
| 141 |
+
cdk.CfnOutput(self, "BucketName", value=email_bucket.bucket_name)
|
| 142 |
+
cdk.CfnOutput(self, "TableName", value=results_table.table_name)
|
| 143 |
+
cdk.CfnOutput(self, "LambdaArn", value=classifier_fn.function_arn)
|
| 144 |
+
cdk.CfnOutput(self, "SecurityTopicArn",value=security_topic.topic_arn)
|
cdk/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aws-cdk-lib>=2.100.0
|
| 2 |
+
constructs>=10.0.0
|
env.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
env.py β Email Gatekeeper RL Environment (OpenEnv Specification)
|
| 3 |
+
================================================================
|
| 4 |
+
Gymnasium environment for intelligent email triage.
|
| 5 |
+
Wraps the core EmailTriageEnv logic with:
|
| 6 |
+
- Pydantic typed Action and Observation models
|
| 7 |
+
- state() method returning current environment state
|
| 8 |
+
- Three task splits: easy / medium / hard
|
| 9 |
+
- Full OpenEnv-compatible interface
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import gymnasium as gym
|
| 16 |
+
from gymnasium import spaces
|
| 17 |
+
from pydantic import BaseModel, Field
|
| 18 |
+
from typing import Optional
|
| 19 |
+
|
| 20 |
+
# ββ Vocabulary & encoding (canonical β must not change between versions) ββββββ
|
| 21 |
+
|
| 22 |
+
KEYWORD_VOCAB = [
|
| 23 |
+
"invoice", "payment", "overdue", "refund",
|
| 24 |
+
"hacked", "breach", "unauthorized", "password",
|
| 25 |
+
"crash", "error", "bug", "slow",
|
| 26 |
+
"lawsuit", "legal", "attorney", "sue",
|
| 27 |
+
"spam", "offer", "win", "free",
|
| 28 |
+
"urgent", "critical","angry", "threat",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
SENTIMENT_MAP = {"positive": 0, "neutral": 1, "negative": 2}
|
| 32 |
+
CONTEXT_MAP = {"spam": 0, "billing": 1, "tech": 2, "security": 3, "legal": 4}
|
| 33 |
+
OBS_DIM = len(KEYWORD_VOCAB) + len(SENTIMENT_MAP) + len(CONTEXT_MAP) # 32
|
| 34 |
+
|
| 35 |
+
# ββ Label maps ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
URGENCY_LABELS = {0: "General", 1: "Billing", 2: "Security Breach"}
|
| 37 |
+
ROUTING_LABELS = {0: "AI Auto-Reply", 1: "Tech Support", 2: "Legal"}
|
| 38 |
+
RESOLUTION_LABELS = {0: "Archive", 1: "Draft Reply", 2: "Escalate"}
|
| 39 |
+
|
| 40 |
+
# ββ Reward weights ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
REWARD_EXACT = 1.0
|
| 42 |
+
REWARD_PARTIAL_1_WRONG = 0.2
|
| 43 |
+
REWARD_PARTIAL_2_WRONG = 0.1
|
| 44 |
+
PENALTY_SECURITY_MISS = -2.0
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
# Pydantic Typed Models
|
| 49 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
|
| 51 |
+
class EmailAction(BaseModel):
|
| 52 |
+
"""
|
| 53 |
+
The agent's triage decision for one email.
|
| 54 |
+
All three dimensions must be predicted simultaneously.
|
| 55 |
+
"""
|
| 56 |
+
urgency: int = Field(
|
| 57 |
+
..., ge=0, le=2,
|
| 58 |
+
description="0=General | 1=Billing | 2=Security Breach"
|
| 59 |
+
)
|
| 60 |
+
routing: int = Field(
|
| 61 |
+
..., ge=0, le=2,
|
| 62 |
+
description="0=AI Auto-Reply | 1=Tech Support | 2=Legal"
|
| 63 |
+
)
|
| 64 |
+
resolution: int = Field(
|
| 65 |
+
..., ge=0, le=2,
|
| 66 |
+
description="0=Archive | 1=Draft Reply | 2=Escalate"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
def to_array(self) -> np.ndarray:
|
| 70 |
+
return np.array([self.urgency, self.routing, self.resolution],
|
| 71 |
+
dtype=np.int64)
|
| 72 |
+
|
| 73 |
+
@classmethod
|
| 74 |
+
def from_array(cls, arr: np.ndarray) -> "EmailAction":
|
| 75 |
+
return cls(urgency=int(arr[0]), routing=int(arr[1]),
|
| 76 |
+
resolution=int(arr[2]))
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class EmailObservation(BaseModel):
|
| 80 |
+
"""
|
| 81 |
+
The agent's view of the current email.
|
| 82 |
+
Encoded as a flat float32 vector of length 32.
|
| 83 |
+
"""
|
| 84 |
+
keyword_flags: list[float] = Field(
|
| 85 |
+
..., description=f"Binary flags for {len(KEYWORD_VOCAB)} vocab keywords"
|
| 86 |
+
)
|
| 87 |
+
sentiment_onehot: list[float] = Field(
|
| 88 |
+
..., description="One-hot: [positive, neutral, negative]"
|
| 89 |
+
)
|
| 90 |
+
context_onehot: list[float] = Field(
|
| 91 |
+
..., description="One-hot: [spam, billing, tech, security, legal]"
|
| 92 |
+
)
|
| 93 |
+
# Human-readable metadata (not used by the agent, useful for logging)
|
| 94 |
+
description: str = ""
|
| 95 |
+
difficulty: str = ""
|
| 96 |
+
context_str: str = ""
|
| 97 |
+
sentiment_str: str = ""
|
| 98 |
+
keywords: list[str] = Field(default_factory=list)
|
| 99 |
+
|
| 100 |
+
def to_array(self) -> np.ndarray:
|
| 101 |
+
return np.array(
|
| 102 |
+
self.keyword_flags + self.sentiment_onehot + self.context_onehot,
|
| 103 |
+
dtype=np.float32,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class EnvironmentState(BaseModel):
|
| 108 |
+
"""Current snapshot of the environment β returned by state()."""
|
| 109 |
+
step_index: int
|
| 110 |
+
total_emails: int
|
| 111 |
+
emails_remaining: int
|
| 112 |
+
current_email: dict
|
| 113 |
+
cumulative_reward: float
|
| 114 |
+
task: str # "easy" | "medium" | "hard" | "all"
|
| 115 |
+
terminated: bool
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class StepResult(BaseModel):
|
| 119 |
+
"""Typed return value from step()."""
|
| 120 |
+
observation: EmailObservation
|
| 121 |
+
reward: float
|
| 122 |
+
normalised_reward: float
|
| 123 |
+
terminated: bool
|
| 124 |
+
truncated: bool
|
| 125 |
+
info: dict
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββββββββββββ
|
| 129 |
+
# Dataset
|
| 130 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
+
|
| 132 |
+
EMAIL_DATASET: list[dict] = [
|
| 133 |
+
# ββ Easy: Spam detection βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 134 |
+
{"description": "Spam promo", "keywords": ["spam","offer","win","free"],
|
| 135 |
+
"sentiment": "positive", "context": "spam", "difficulty": "easy",
|
| 136 |
+
"correct_actions": (0, 0, 0)},
|
| 137 |
+
{"description": "Spam lottery", "keywords": ["free","win","offer"],
|
| 138 |
+
"sentiment": "positive", "context": "spam", "difficulty": "easy",
|
| 139 |
+
"correct_actions": (0, 0, 0)},
|
| 140 |
+
{"description": "Routine support", "keywords": ["slow","error"],
|
| 141 |
+
"sentiment": "neutral", "context": "tech", "difficulty": "easy",
|
| 142 |
+
"correct_actions": (0, 1, 1)},
|
| 143 |
+
{"description": "General billing", "keywords": ["invoice","payment"],
|
| 144 |
+
"sentiment": "neutral", "context": "billing", "difficulty": "easy",
|
| 145 |
+
"correct_actions": (1, 0, 1)},
|
| 146 |
+
# ββ Medium: Support routing βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
+
{"description": "Overdue invoice", "keywords": ["invoice","overdue","payment","angry"],
|
| 148 |
+
"sentiment": "negative", "context": "billing", "difficulty": "medium",
|
| 149 |
+
"correct_actions": (1, 0, 1)},
|
| 150 |
+
{"description": "Refund dispute", "keywords": ["refund","payment","angry"],
|
| 151 |
+
"sentiment": "negative", "context": "billing", "difficulty": "medium",
|
| 152 |
+
"correct_actions": (1, 2, 2)},
|
| 153 |
+
{"description": "App crash report", "keywords": ["crash","bug","error"],
|
| 154 |
+
"sentiment": "negative", "context": "tech", "difficulty": "medium",
|
| 155 |
+
"correct_actions": (0, 1, 1)},
|
| 156 |
+
{"description": "Persistent login bug","keywords": ["bug","password","error"],
|
| 157 |
+
"sentiment": "negative", "context": "tech", "difficulty": "medium",
|
| 158 |
+
"correct_actions": (0, 1, 1)},
|
| 159 |
+
{"description": "Polite legal ultimatum","keywords": ["refund","legal","angry","threat"],
|
| 160 |
+
"sentiment": "negative", "context": "legal", "difficulty": "medium",
|
| 161 |
+
"correct_actions": (2, 2, 2)},
|
| 162 |
+
{"description": "Attorney CC warning", "keywords": ["invoice","overdue","attorney","legal","payment","threat"],
|
| 163 |
+
"sentiment": "negative", "context": "legal", "difficulty": "medium",
|
| 164 |
+
"correct_actions": (2, 2, 2)},
|
| 165 |
+
{"description": "Regulatory complaint","keywords": ["angry","threat","legal"],
|
| 166 |
+
"sentiment": "negative", "context": "legal", "difficulty": "medium",
|
| 167 |
+
"correct_actions": (2, 2, 2)},
|
| 168 |
+
{"description": "SLA breach legal", "keywords": ["breach","legal","threat","angry"],
|
| 169 |
+
"sentiment": "negative", "context": "legal", "difficulty": "medium",
|
| 170 |
+
"correct_actions": (2, 2, 2)},
|
| 171 |
+
# ββ Hard: Phishing & security threats ββββββββββββββββββββββββββββββββββββ
|
| 172 |
+
{"description": "IT audit phish", "keywords": ["password","unauthorized","critical","urgent","threat"],
|
| 173 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 174 |
+
"correct_actions": (2, 1, 2)},
|
| 175 |
+
{"description": "Fake invoice portal", "keywords": ["invoice","payment","password","unauthorized","urgent"],
|
| 176 |
+
"sentiment": "neutral", "context": "security","difficulty": "hard",
|
| 177 |
+
"correct_actions": (2, 1, 2)},
|
| 178 |
+
{"description": "HR credential phish", "keywords": ["password","urgent","critical"],
|
| 179 |
+
"sentiment": "neutral", "context": "security","difficulty": "hard",
|
| 180 |
+
"correct_actions": (2, 1, 2)},
|
| 181 |
+
{"description": "Fake suspension", "keywords": ["unauthorized","password","breach","urgent","threat"],
|
| 182 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 183 |
+
"correct_actions": (2, 1, 2)},
|
| 184 |
+
{"description": "BEC vendor reply", "keywords": ["password","unauthorized","urgent"],
|
| 185 |
+
"sentiment": "neutral", "context": "security","difficulty": "hard",
|
| 186 |
+
"correct_actions": (2, 1, 2)},
|
| 187 |
+
{"description": "Sign-in alert phish", "keywords": ["unauthorized","password","hacked","breach","urgent"],
|
| 188 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 189 |
+
"correct_actions": (2, 1, 2)},
|
| 190 |
+
{"description": "Payroll phish", "keywords": ["payment","password","urgent","threat"],
|
| 191 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 192 |
+
"correct_actions": (2, 1, 2)},
|
| 193 |
+
{"description": "License renewal BEC", "keywords": ["password","critical","urgent","error"],
|
| 194 |
+
"sentiment": "neutral", "context": "security","difficulty": "hard",
|
| 195 |
+
"correct_actions": (2, 1, 2)},
|
| 196 |
+
{"description": "GDPR phish", "keywords": ["breach","hacked","password","legal","threat","urgent","unauthorized"],
|
| 197 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 198 |
+
"correct_actions": (2, 1, 2)},
|
| 199 |
+
{"description": "Ransomware audit", "keywords": ["hacked","breach","unauthorized","lawsuit","legal","threat","critical","urgent"],
|
| 200 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 201 |
+
"correct_actions": (2, 2, 2)},
|
| 202 |
+
{"description": "Data extortion", "keywords": ["hacked","breach","unauthorized","attorney","threat","critical","urgent"],
|
| 203 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 204 |
+
"correct_actions": (2, 2, 2)},
|
| 205 |
+
{"description": "Fake law firm", "keywords": ["unauthorized","breach","attorney","lawsuit","legal","threat"],
|
| 206 |
+
"sentiment": "negative", "context": "legal", "difficulty": "hard",
|
| 207 |
+
"correct_actions": (2, 2, 2)},
|
| 208 |
+
{"description": "Account hacked", "keywords": ["hacked","unauthorized","password","urgent","angry"],
|
| 209 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 210 |
+
"correct_actions": (2, 1, 2)},
|
| 211 |
+
{"description": "Data breach notice", "keywords": ["breach","unauthorized","critical","threat"],
|
| 212 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 213 |
+
"correct_actions": (2, 1, 2)},
|
| 214 |
+
{"description": "Legal lawsuit threat","keywords": ["lawsuit","legal","attorney","threat","angry"],
|
| 215 |
+
"sentiment": "negative", "context": "legal", "difficulty": "hard",
|
| 216 |
+
"correct_actions": (2, 2, 2)},
|
| 217 |
+
{"description": "Ransomware threat", "keywords": ["hacked","threat","critical","urgent","breach"],
|
| 218 |
+
"sentiment": "negative", "context": "security","difficulty": "hard",
|
| 219 |
+
"correct_actions": (2, 2, 2)},
|
| 220 |
+
]
|
| 221 |
+
|
| 222 |
+
# Task splits β used by inference.py for per-task scoring
|
| 223 |
+
TASK_SPLITS: dict[str, list[dict]] = {
|
| 224 |
+
"easy": [e for e in EMAIL_DATASET if e["difficulty"] == "easy"],
|
| 225 |
+
"medium": [e for e in EMAIL_DATASET if e["difficulty"] == "medium"],
|
| 226 |
+
"hard": [e for e in EMAIL_DATASET if e["difficulty"] == "hard"],
|
| 227 |
+
"all": EMAIL_DATASET,
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 232 |
+
# Core Environment
|
| 233 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 234 |
+
|
| 235 |
+
class EmailTriageEnv(gym.Env):
|
| 236 |
+
"""
|
| 237 |
+
OpenEnv-compliant Gymnasium environment for email triage.
|
| 238 |
+
|
| 239 |
+
The agent receives one email per step as a 32-dim observation vector
|
| 240 |
+
and must output three simultaneous discrete decisions.
|
| 241 |
+
|
| 242 |
+
Parameters
|
| 243 |
+
----------
|
| 244 |
+
task : str
|
| 245 |
+
"easy" | "medium" | "hard" | "all" β which email subset to use.
|
| 246 |
+
shuffle : bool
|
| 247 |
+
Shuffle emails on each reset (default True).
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
metadata = {"render_modes": ["human"]}
|
| 251 |
+
|
| 252 |
+
def __init__(self, task: str = "all", shuffle: bool = True):
|
| 253 |
+
super().__init__()
|
| 254 |
+
|
| 255 |
+
if task not in TASK_SPLITS:
|
| 256 |
+
raise ValueError(f"task must be one of {list(TASK_SPLITS)}. Got '{task}'.")
|
| 257 |
+
|
| 258 |
+
self.task = task
|
| 259 |
+
self.shuffle = shuffle
|
| 260 |
+
self.email_batch = TASK_SPLITS[task]
|
| 261 |
+
|
| 262 |
+
# Gymnasium spaces
|
| 263 |
+
self.action_space = spaces.MultiDiscrete([3, 3, 3])
|
| 264 |
+
self.observation_space = spaces.Box(
|
| 265 |
+
low=0.0, high=1.0, shape=(OBS_DIM,), dtype=np.float32
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
# Internal state
|
| 269 |
+
self._queue: list[dict] = []
|
| 270 |
+
self._current_email: dict = {}
|
| 271 |
+
self._step_idx: int = 0
|
| 272 |
+
self._cumulative_reward: float = 0.0
|
| 273 |
+
self._max_episode_reward: float = len(self.email_batch) * REWARD_EXACT
|
| 274 |
+
|
| 275 |
+
# ββ Encoding helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 276 |
+
|
| 277 |
+
def _encode_to_obs(self, email: dict) -> EmailObservation:
|
| 278 |
+
"""Convert an email dict into a typed EmailObservation."""
|
| 279 |
+
kw_flags = [1.0 if kw in email["keywords"] else 0.0
|
| 280 |
+
for kw in KEYWORD_VOCAB]
|
| 281 |
+
|
| 282 |
+
sentiment_vec = [0.0] * len(SENTIMENT_MAP)
|
| 283 |
+
sentiment_vec[SENTIMENT_MAP[email["sentiment"]]] = 1.0
|
| 284 |
+
|
| 285 |
+
context_vec = [0.0] * len(CONTEXT_MAP)
|
| 286 |
+
context_vec[CONTEXT_MAP[email["context"]]] = 1.0
|
| 287 |
+
|
| 288 |
+
return EmailObservation(
|
| 289 |
+
keyword_flags=kw_flags,
|
| 290 |
+
sentiment_onehot=sentiment_vec,
|
| 291 |
+
context_onehot=context_vec,
|
| 292 |
+
description=email.get("description", ""),
|
| 293 |
+
difficulty=email.get("difficulty", ""),
|
| 294 |
+
context_str=email["context"],
|
| 295 |
+
sentiment_str=email["sentiment"],
|
| 296 |
+
keywords=email["keywords"],
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
def _compute_reward(self, action: np.ndarray, email: dict) -> float:
|
| 300 |
+
"""
|
| 301 |
+
Reward function β same logic as environment.py, priority order:
|
| 302 |
+
1. Security miss β -2.0 (correct urgency=2, predicted otherwise)
|
| 303 |
+
2. Exact match β +1.0
|
| 304 |
+
3. Partial-1 β +0.2 (urgency correct, 1 other wrong)
|
| 305 |
+
4. Partial-2 β +0.1 (urgency correct, both others wrong)
|
| 306 |
+
5. Wrong β 0.0
|
| 307 |
+
"""
|
| 308 |
+
u, r, res = int(action[0]), int(action[1]), int(action[2])
|
| 309 |
+
c = email["correct_actions"]
|
| 310 |
+
|
| 311 |
+
if c[0] == 2 and u != 2:
|
| 312 |
+
return PENALTY_SECURITY_MISS
|
| 313 |
+
if (u, r, res) == c:
|
| 314 |
+
return REWARD_EXACT
|
| 315 |
+
if u == c[0]:
|
| 316 |
+
wrong = sum([r != c[1], res != c[2]])
|
| 317 |
+
return REWARD_PARTIAL_1_WRONG if wrong == 1 else REWARD_PARTIAL_2_WRONG
|
| 318 |
+
return 0.0
|
| 319 |
+
|
| 320 |
+
# ββ OpenEnv API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 321 |
+
|
| 322 |
+
def reset(
|
| 323 |
+
self,
|
| 324 |
+
*,
|
| 325 |
+
seed: Optional[int] = None,
|
| 326 |
+
options: Optional[dict] = None,
|
| 327 |
+
) -> tuple[np.ndarray, dict]:
|
| 328 |
+
super().reset(seed=seed)
|
| 329 |
+
|
| 330 |
+
self._queue = list(self.email_batch)
|
| 331 |
+
if self.shuffle:
|
| 332 |
+
self.np_random.shuffle(self._queue)
|
| 333 |
+
|
| 334 |
+
self._step_idx = 0
|
| 335 |
+
self._cumulative_reward = 0.0
|
| 336 |
+
self._current_email = self._queue[0]
|
| 337 |
+
|
| 338 |
+
obs = self._encode_to_obs(self._current_email)
|
| 339 |
+
info = {
|
| 340 |
+
"description": self._current_email["description"],
|
| 341 |
+
"difficulty": self._current_email["difficulty"],
|
| 342 |
+
"task": self.task,
|
| 343 |
+
"total_steps": len(self._queue),
|
| 344 |
+
}
|
| 345 |
+
return obs.to_array(), info
|
| 346 |
+
|
| 347 |
+
def step(
|
| 348 |
+
self, action: np.ndarray
|
| 349 |
+
) -> tuple[np.ndarray, float, bool, bool, dict]:
|
| 350 |
+
# Capture current email BEFORE advancing pointer
|
| 351 |
+
scored_email = self._current_email
|
| 352 |
+
raw_reward = self._compute_reward(action, scored_email)
|
| 353 |
+
norm_reward = raw_reward / self._max_episode_reward
|
| 354 |
+
|
| 355 |
+
self._cumulative_reward += norm_reward
|
| 356 |
+
self._step_idx += 1
|
| 357 |
+
terminated = self._step_idx >= len(self._queue)
|
| 358 |
+
|
| 359 |
+
if not terminated:
|
| 360 |
+
self._current_email = self._queue[self._step_idx]
|
| 361 |
+
obs = self._encode_to_obs(self._current_email)
|
| 362 |
+
else:
|
| 363 |
+
obs = self._encode_to_obs(scored_email)
|
| 364 |
+
|
| 365 |
+
# Decode action for info dict
|
| 366 |
+
typed_action = EmailAction.from_array(action)
|
| 367 |
+
correct = scored_email["correct_actions"]
|
| 368 |
+
|
| 369 |
+
info = {
|
| 370 |
+
"raw_reward": raw_reward,
|
| 371 |
+
"correct_actions": correct,
|
| 372 |
+
"predicted": (typed_action.urgency,
|
| 373 |
+
typed_action.routing,
|
| 374 |
+
typed_action.resolution),
|
| 375 |
+
"difficulty": scored_email["difficulty"],
|
| 376 |
+
"description": scored_email.get("description", ""),
|
| 377 |
+
"urgency_label": URGENCY_LABELS[typed_action.urgency],
|
| 378 |
+
"routing_label": ROUTING_LABELS[typed_action.routing],
|
| 379 |
+
"resolution_label": RESOLUTION_LABELS[typed_action.resolution],
|
| 380 |
+
"cumulative_score": self._cumulative_reward,
|
| 381 |
+
}
|
| 382 |
+
return obs.to_array(), norm_reward, terminated, False, info
|
| 383 |
+
|
| 384 |
+
def state(self) -> EnvironmentState:
|
| 385 |
+
"""
|
| 386 |
+
Return a typed snapshot of the current environment state.
|
| 387 |
+
Required by the OpenEnv specification.
|
| 388 |
+
"""
|
| 389 |
+
return EnvironmentState(
|
| 390 |
+
step_index=self._step_idx,
|
| 391 |
+
total_emails=len(self._queue),
|
| 392 |
+
emails_remaining=max(0, len(self._queue) - self._step_idx),
|
| 393 |
+
current_email=self._current_email,
|
| 394 |
+
cumulative_reward=self._cumulative_reward,
|
| 395 |
+
task=self.task,
|
| 396 |
+
terminated=self._step_idx >= len(self._queue),
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
def render(self, mode: str = "human") -> None:
|
| 400 |
+
e = self._current_email
|
| 401 |
+
print(
|
| 402 |
+
f"[{self.task.upper()} | Step {self._step_idx}/{len(self._queue)}] "
|
| 403 |
+
f"{e['description']} | {e['difficulty']} | "
|
| 404 |
+
f"sentiment={e['sentiment']} context={e['context']}"
|
| 405 |
+
)
|
environment.py
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EmailTriageEnv β Intelligent Email Gatekeeper RL Environment
|
| 3 |
+
============================================================
|
| 4 |
+
Observation : flat Box vector encoding keywords, sentiment, and context.
|
| 5 |
+
Action : MultiDiscrete([3, 3, 3])
|
| 6 |
+
[0] Urgency β 0=General, 1=Billing, 2=Security Breach
|
| 7 |
+
[1] Routing β 0=AI Auto-Reply, 1=Tech Support, 2=Legal
|
| 8 |
+
[2] Resolutionβ 0=Archive, 1=Draft Reply, 2=Escalate to Human
|
| 9 |
+
Reward : +0.5 fully correct | -0.4 wrong priority on crisis email
|
| 10 |
+
Cumulative ideal score over one episode β 1.0 (normalised).
|
| 11 |
+
Episode : one email per step; done after all emails in the batch.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import gymnasium as gym
|
| 16 |
+
from gymnasium import spaces
|
| 17 |
+
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
+
# Vocabulary & encoding helpers
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
|
| 22 |
+
# Fixed keyword vocabulary β extend freely.
|
| 23 |
+
KEYWORD_VOCAB = [
|
| 24 |
+
"invoice", "payment", "overdue", "refund", # billing
|
| 25 |
+
"hacked", "breach", "unauthorized", "password", # security
|
| 26 |
+
"crash", "error", "bug", "slow", # tech
|
| 27 |
+
"lawsuit", "legal", "attorney", "sue", # legal
|
| 28 |
+
"spam", "offer", "win", "free", # spam
|
| 29 |
+
"urgent", "critical", "angry", "threat", # sentiment signals
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
# Sentiment classes β index
|
| 33 |
+
SENTIMENT_MAP = {"positive": 0, "neutral": 1, "negative": 2}
|
| 34 |
+
|
| 35 |
+
# Context classes β index
|
| 36 |
+
CONTEXT_MAP = {"spam": 0, "billing": 1, "tech": 2, "security": 3, "legal": 4}
|
| 37 |
+
|
| 38 |
+
# Observation vector length: keyword flags + one-hot sentiment + one-hot context
|
| 39 |
+
OBS_DIM = len(KEYWORD_VOCAB) + len(SENTIMENT_MAP) + len(CONTEXT_MAP)
|
| 40 |
+
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
# Mock email dataset
|
| 43 |
+
# ---------------------------------------------------------------------------
|
| 44 |
+
# Each entry:
|
| 45 |
+
# keywords : list[str] β subset of KEYWORD_VOCAB
|
| 46 |
+
# sentiment : str β key in SENTIMENT_MAP
|
| 47 |
+
# context : str β key in CONTEXT_MAP
|
| 48 |
+
# difficulty : str β "easy" | "medium" | "hard"
|
| 49 |
+
# correct_actions: tuple β (urgency, routing, resolution)
|
| 50 |
+
# description : str β human-readable label (for debugging)
|
| 51 |
+
|
| 52 |
+
EMAIL_DATASET = [
|
| 53 |
+
# ββ Easy: Spam vs Real ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
{
|
| 55 |
+
"description": "Spam promo",
|
| 56 |
+
"keywords": ["spam", "offer", "win", "free"],
|
| 57 |
+
"sentiment": "positive",
|
| 58 |
+
"context": "spam",
|
| 59 |
+
"difficulty": "easy",
|
| 60 |
+
"correct_actions": (0, 0, 0), # General | AI Auto-Reply | Archive
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"description": "Spam lottery",
|
| 64 |
+
"keywords": ["free", "win", "offer"],
|
| 65 |
+
"sentiment": "positive",
|
| 66 |
+
"context": "spam",
|
| 67 |
+
"difficulty": "easy",
|
| 68 |
+
"correct_actions": (0, 0, 0),
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"description": "Routine support request",
|
| 72 |
+
"keywords": ["slow", "error"],
|
| 73 |
+
"sentiment": "neutral",
|
| 74 |
+
"context": "tech",
|
| 75 |
+
"difficulty": "easy",
|
| 76 |
+
"correct_actions": (0, 1, 1), # General | Tech Support | Draft Reply
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"description": "General billing inquiry",
|
| 80 |
+
"keywords": ["invoice", "payment"],
|
| 81 |
+
"sentiment": "neutral",
|
| 82 |
+
"context": "billing",
|
| 83 |
+
"difficulty": "easy",
|
| 84 |
+
"correct_actions": (1, 0, 1), # Billing | AI Auto-Reply | Draft Reply
|
| 85 |
+
},
|
| 86 |
+
# ββ Medium: Billing / Tech context ββββββββββββββββββββββββββββββββββββββ
|
| 87 |
+
{
|
| 88 |
+
"description": "Overdue invoice complaint",
|
| 89 |
+
"keywords": ["invoice", "overdue", "payment", "angry"],
|
| 90 |
+
"sentiment": "negative",
|
| 91 |
+
"context": "billing",
|
| 92 |
+
"difficulty": "medium",
|
| 93 |
+
"correct_actions": (1, 0, 1), # Billing | AI Auto-Reply | Draft Reply
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"description": "Refund dispute",
|
| 97 |
+
"keywords": ["refund", "payment", "angry"],
|
| 98 |
+
"sentiment": "negative",
|
| 99 |
+
"context": "billing",
|
| 100 |
+
"difficulty": "medium",
|
| 101 |
+
"correct_actions": (1, 2, 2), # Billing | Legal | Escalate
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"description": "App crash report",
|
| 105 |
+
"keywords": ["crash", "bug", "error"],
|
| 106 |
+
"sentiment": "negative",
|
| 107 |
+
"context": "tech",
|
| 108 |
+
"difficulty": "medium",
|
| 109 |
+
"correct_actions": (0, 1, 1), # General | Tech Support | Draft Reply
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"description": "Persistent login bug",
|
| 113 |
+
"keywords": ["bug", "password", "error"],
|
| 114 |
+
"sentiment": "negative",
|
| 115 |
+
"context": "tech",
|
| 116 |
+
"difficulty": "medium",
|
| 117 |
+
"correct_actions": (0, 1, 1),
|
| 118 |
+
},
|
| 119 |
+
# ββ Medium: Passive-Aggressive Legal Threats ββββββββββββββββββββββββββββ
|
| 120 |
+
{
|
| 121 |
+
"id": "TC-M-01",
|
| 122 |
+
"description": "Polite refund ultimatum with implied legal action",
|
| 123 |
+
"keywords": ["refund", "legal", "angry", "threat"],
|
| 124 |
+
"sentiment": "negative",
|
| 125 |
+
"context": "legal",
|
| 126 |
+
"difficulty": "medium",
|
| 127 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"id": "TC-M-02",
|
| 131 |
+
"description": "Overdue invoice with attorney CC warning",
|
| 132 |
+
"keywords": ["invoice", "overdue", "attorney", "legal", "payment", "threat"],
|
| 133 |
+
"sentiment": "negative",
|
| 134 |
+
"context": "legal",
|
| 135 |
+
"difficulty": "medium",
|
| 136 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"id": "TC-M-03",
|
| 140 |
+
"description": "Disappointed customer implying regulatory complaint",
|
| 141 |
+
"keywords": ["angry", "threat", "legal"],
|
| 142 |
+
"sentiment": "negative",
|
| 143 |
+
"context": "legal",
|
| 144 |
+
"difficulty": "medium",
|
| 145 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"id": "TC-M-04",
|
| 149 |
+
"description": "SLA breach complaint with legal remedy language",
|
| 150 |
+
"keywords": ["breach", "legal", "threat", "angry"],
|
| 151 |
+
"sentiment": "negative",
|
| 152 |
+
"context": "legal",
|
| 153 |
+
"difficulty": "medium",
|
| 154 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": "TC-M-05",
|
| 158 |
+
"description": "Billing dispute with small claims court mention",
|
| 159 |
+
"keywords": ["refund", "payment", "angry", "lawsuit", "urgent"],
|
| 160 |
+
"sentiment": "negative",
|
| 161 |
+
"context": "legal",
|
| 162 |
+
"difficulty": "medium",
|
| 163 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"id": "TC-M-06",
|
| 167 |
+
"description": "GDPR deletion request with legal consequence warning",
|
| 168 |
+
"keywords": ["legal", "breach", "threat", "angry"],
|
| 169 |
+
"sentiment": "negative",
|
| 170 |
+
"context": "legal",
|
| 171 |
+
"difficulty": "medium",
|
| 172 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"id": "TC-M-07",
|
| 176 |
+
"description": "Subscription cancellation with chargeback and CFPB threat",
|
| 177 |
+
"keywords": ["payment", "refund", "angry", "threat", "legal"],
|
| 178 |
+
"sentiment": "negative",
|
| 179 |
+
"context": "legal",
|
| 180 |
+
"difficulty": "medium",
|
| 181 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"id": "TC-M-08",
|
| 185 |
+
"description": "Vendor threatening IP infringement claim",
|
| 186 |
+
"keywords": ["unauthorized", "legal", "attorney", "threat"],
|
| 187 |
+
"sentiment": "negative",
|
| 188 |
+
"context": "legal",
|
| 189 |
+
"difficulty": "medium",
|
| 190 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 191 |
+
},
|
| 192 |
+
# ββ Hard: Subtle Phishing Attempts ββββββββββββββββββββββββββββββββββββββ
|
| 193 |
+
{
|
| 194 |
+
"id": "TC-H-01",
|
| 195 |
+
"description": "IT password reset disguised as routine security audit",
|
| 196 |
+
"keywords": ["password", "unauthorized", "critical", "urgent", "threat"],
|
| 197 |
+
"sentiment": "negative",
|
| 198 |
+
"context": "security",
|
| 199 |
+
"difficulty": "hard",
|
| 200 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"id": "TC-H-02",
|
| 204 |
+
"description": "Fake invoice payment portal redirect β credential harvest",
|
| 205 |
+
"keywords": ["invoice", "payment", "password", "unauthorized", "urgent"],
|
| 206 |
+
"sentiment": "neutral",
|
| 207 |
+
"context": "security",
|
| 208 |
+
"difficulty": "hard",
|
| 209 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"id": "TC-H-03",
|
| 213 |
+
"description": "HR benefits enrollment with credential capture",
|
| 214 |
+
"keywords": ["password", "urgent", "critical"],
|
| 215 |
+
"sentiment": "neutral",
|
| 216 |
+
"context": "security",
|
| 217 |
+
"difficulty": "hard",
|
| 218 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"id": "TC-H-04",
|
| 222 |
+
"description": "Fake account suspension notice with login link",
|
| 223 |
+
"keywords": ["unauthorized", "password", "breach", "urgent", "threat"],
|
| 224 |
+
"sentiment": "negative",
|
| 225 |
+
"context": "security",
|
| 226 |
+
"difficulty": "hard",
|
| 227 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"id": "TC-H-05",
|
| 231 |
+
"description": "Vendor onboarding BEC β admin credentials via reply",
|
| 232 |
+
"keywords": ["password", "unauthorized", "urgent"],
|
| 233 |
+
"sentiment": "neutral",
|
| 234 |
+
"context": "security",
|
| 235 |
+
"difficulty": "hard",
|
| 236 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"id": "TC-H-06",
|
| 240 |
+
"description": "Fake new sign-in alert β was this you? phish",
|
| 241 |
+
"keywords": ["unauthorized", "password", "hacked", "breach", "urgent"],
|
| 242 |
+
"sentiment": "negative",
|
| 243 |
+
"context": "security",
|
| 244 |
+
"difficulty": "hard",
|
| 245 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"id": "TC-H-07",
|
| 249 |
+
"description": "Payroll migration phish β salary interruption fear",
|
| 250 |
+
"keywords": ["payment", "password", "urgent", "threat"],
|
| 251 |
+
"sentiment": "negative",
|
| 252 |
+
"context": "security",
|
| 253 |
+
"difficulty": "hard",
|
| 254 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"id": "TC-H-08",
|
| 258 |
+
"description": "Software license renewal β admin credential request",
|
| 259 |
+
"keywords": ["password", "critical", "urgent", "error"],
|
| 260 |
+
"sentiment": "neutral",
|
| 261 |
+
"context": "security",
|
| 262 |
+
"difficulty": "hard",
|
| 263 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 264 |
+
},
|
| 265 |
+
# ββ Hard: Phishing + Legal Threat Overlay βββββββββββββββββββββββββββββββ
|
| 266 |
+
{
|
| 267 |
+
"id": "TC-H-09",
|
| 268 |
+
"description": "Fake GDPR breach notice β credential harvest via legal fear",
|
| 269 |
+
"keywords": ["breach", "hacked", "password", "legal", "threat", "urgent", "unauthorized"],
|
| 270 |
+
"sentiment": "negative",
|
| 271 |
+
"context": "security",
|
| 272 |
+
"difficulty": "hard",
|
| 273 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 274 |
+
# NOTE: legal language is the deception layer; credential harvest = Tech Support
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"id": "TC-H-10",
|
| 278 |
+
"description": "Ransomware disguised as software compliance audit",
|
| 279 |
+
"keywords": ["hacked", "breach", "unauthorized", "lawsuit", "legal", "threat", "critical", "urgent"],
|
| 280 |
+
"sentiment": "negative",
|
| 281 |
+
"context": "security",
|
| 282 |
+
"difficulty": "hard",
|
| 283 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"id": "TC-H-11",
|
| 287 |
+
"description": "Extortion β threatening to publish stolen data",
|
| 288 |
+
"keywords": ["hacked", "breach", "unauthorized", "attorney", "threat", "critical", "urgent"],
|
| 289 |
+
"sentiment": "negative",
|
| 290 |
+
"context": "security",
|
| 291 |
+
"difficulty": "hard",
|
| 292 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"id": "TC-H-12",
|
| 296 |
+
"description": "Fake law firm letter claiming evidence of data misuse",
|
| 297 |
+
"keywords": ["unauthorized", "breach", "attorney", "lawsuit", "legal", "threat"],
|
| 298 |
+
"sentiment": "negative",
|
| 299 |
+
"context": "legal",
|
| 300 |
+
"difficulty": "hard",
|
| 301 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 302 |
+
},
|
| 303 |
+
# ββ Hard: Crisis / Security threats βββββββββββββββββββββββββββββββββββββ
|
| 304 |
+
{
|
| 305 |
+
"description": "Account hacked β urgent",
|
| 306 |
+
"keywords": ["hacked", "unauthorized", "password", "urgent", "angry"],
|
| 307 |
+
"sentiment": "negative",
|
| 308 |
+
"context": "security",
|
| 309 |
+
"difficulty": "hard",
|
| 310 |
+
"correct_actions": (2, 1, 2), # Security | Tech Support | Escalate
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"description": "Data breach notification",
|
| 314 |
+
"keywords": ["breach", "unauthorized", "critical", "threat"],
|
| 315 |
+
"sentiment": "negative",
|
| 316 |
+
"context": "security",
|
| 317 |
+
"difficulty": "hard",
|
| 318 |
+
"correct_actions": (2, 1, 2),
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"description": "Legal threat β lawsuit",
|
| 322 |
+
"keywords": ["lawsuit", "legal", "attorney", "threat", "angry"],
|
| 323 |
+
"sentiment": "negative",
|
| 324 |
+
"context": "legal",
|
| 325 |
+
"difficulty": "hard",
|
| 326 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"description": "Ransomware / extortion threat",
|
| 330 |
+
"keywords": ["hacked", "threat", "critical", "urgent", "breach"],
|
| 331 |
+
"sentiment": "negative",
|
| 332 |
+
"context": "security",
|
| 333 |
+
"difficulty": "hard",
|
| 334 |
+
"correct_actions": (2, 2, 2), # Security | Legal | Escalate
|
| 335 |
+
},
|
| 336 |
+
]
|
| 337 |
+
|
| 338 |
+
# ---------------------------------------------------------------------------
|
| 339 |
+
# Reward weights β adjust freely
|
| 340 |
+
# ---------------------------------------------------------------------------
|
| 341 |
+
REWARD_CORRECT_FULL = 1.0 # teeno actions bilkul sahi
|
| 342 |
+
REWARD_PARTIAL_ONE_WRONG = 0.2 # urgency sahi + sirf ek dimension galat
|
| 343 |
+
REWARD_PARTIAL_BOTH_WRONG= 0.1 # urgency sahi lekin routing aur resolution dono galat
|
| 344 |
+
PENALTY_MISSED_SECURITY = -2.0 # security breach email miss kiya
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
# ---------------------------------------------------------------------------
|
| 348 |
+
# Environment
|
| 349 |
+
# ---------------------------------------------------------------------------
|
| 350 |
+
|
| 351 |
+
class EmailTriageEnv(gym.Env):
|
| 352 |
+
"""
|
| 353 |
+
Single-email-per-step triage environment.
|
| 354 |
+
|
| 355 |
+
Parameters
|
| 356 |
+
----------
|
| 357 |
+
batch : list[dict] | None
|
| 358 |
+
Custom email batch. Defaults to the full EMAIL_DATASET.
|
| 359 |
+
shuffle : bool
|
| 360 |
+
Shuffle the batch on each reset (default True).
|
| 361 |
+
"""
|
| 362 |
+
|
| 363 |
+
metadata = {"render_modes": ["human"]}
|
| 364 |
+
|
| 365 |
+
def __init__(self, batch: list | None = None, shuffle: bool = True):
|
| 366 |
+
super().__init__()
|
| 367 |
+
|
| 368 |
+
self.email_batch = batch if batch is not None else EMAIL_DATASET
|
| 369 |
+
self.shuffle = shuffle
|
| 370 |
+
|
| 371 |
+
# Action space: [urgency(3), routing(3), resolution(3)]
|
| 372 |
+
self.action_space = spaces.MultiDiscrete([3, 3, 3])
|
| 373 |
+
|
| 374 |
+
# Observation space: binary keyword flags + one-hot sentiment + one-hot context
|
| 375 |
+
self.observation_space = spaces.Box(
|
| 376 |
+
low=0.0, high=1.0, shape=(OBS_DIM,), dtype=np.float32
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
# Internal state (populated by reset)
|
| 380 |
+
self._queue: list[dict] = []
|
| 381 |
+
self._current_email: dict = {}
|
| 382 |
+
self._step_idx: int = 0
|
| 383 |
+
|
| 384 |
+
# Normalisation constant: max possible reward per episode
|
| 385 |
+
self._max_episode_reward = len(self.email_batch) * REWARD_CORRECT_FULL
|
| 386 |
+
|
| 387 |
+
# ------------------------------------------------------------------
|
| 388 |
+
# Helpers
|
| 389 |
+
# ------------------------------------------------------------------
|
| 390 |
+
|
| 391 |
+
def _encode(self, email: dict) -> np.ndarray:
|
| 392 |
+
"""Convert an email dict into a flat float32 observation vector."""
|
| 393 |
+
# Keyword flags (binary)
|
| 394 |
+
kw_flags = np.array(
|
| 395 |
+
[1.0 if kw in email["keywords"] else 0.0 for kw in KEYWORD_VOCAB],
|
| 396 |
+
dtype=np.float32,
|
| 397 |
+
)
|
| 398 |
+
# One-hot sentiment
|
| 399 |
+
sentiment_vec = np.zeros(len(SENTIMENT_MAP), dtype=np.float32)
|
| 400 |
+
sentiment_vec[SENTIMENT_MAP[email["sentiment"]]] = 1.0
|
| 401 |
+
|
| 402 |
+
# One-hot context
|
| 403 |
+
context_vec = np.zeros(len(CONTEXT_MAP), dtype=np.float32)
|
| 404 |
+
context_vec[CONTEXT_MAP[email["context"]]] = 1.0
|
| 405 |
+
|
| 406 |
+
return np.concatenate([kw_flags, sentiment_vec, context_vec])
|
| 407 |
+
|
| 408 |
+
def _compute_reward(self, action: np.ndarray, email: dict) -> float:
|
| 409 |
+
"""
|
| 410 |
+
Strict reward rules (priority order mein check hote hain):
|
| 411 |
+
-2.0 β security breach email ko urgency=2 nahi diya (sabse bada penalty)
|
| 412 |
+
+1.0 β teeno actions bilkul sahi (exact match)
|
| 413 |
+
+0.2 β sirf urgency sahi, routing AUR resolution dono sahi (2 out of 3)
|
| 414 |
+
+0.1 β sirf urgency sahi, baaki ek galat (1 dimension wrong)
|
| 415 |
+
0.0 β urgency hi galat hai (non-security email)
|
| 416 |
+
"""
|
| 417 |
+
urgency = int(action[0])
|
| 418 |
+
routing = int(action[1])
|
| 419 |
+
resolution = int(action[2])
|
| 420 |
+
correct = email["correct_actions"]
|
| 421 |
+
|
| 422 |
+
# Priority 1: Security breach miss β sabse bada crime
|
| 423 |
+
if correct[0] == 2 and urgency != 2:
|
| 424 |
+
return PENALTY_MISSED_SECURITY
|
| 425 |
+
|
| 426 |
+
# Priority 2: Perfect match
|
| 427 |
+
if (urgency, routing, resolution) == correct:
|
| 428 |
+
return REWARD_CORRECT_FULL
|
| 429 |
+
|
| 430 |
+
# Priority 3: Urgency sahi hai β partial credit
|
| 431 |
+
if urgency == correct[0]:
|
| 432 |
+
# Dono routing aur resolution galat hain
|
| 433 |
+
routing_ok = (routing == correct[1])
|
| 434 |
+
resolution_ok = (resolution == correct[2])
|
| 435 |
+
if routing_ok and not resolution_ok:
|
| 436 |
+
return REWARD_PARTIAL_ONE_WRONG # sirf resolution galat
|
| 437 |
+
if resolution_ok and not routing_ok:
|
| 438 |
+
return REWARD_PARTIAL_ONE_WRONG # sirf routing galat
|
| 439 |
+
return REWARD_PARTIAL_BOTH_WRONG # dono galat
|
| 440 |
+
|
| 441 |
+
return 0.0
|
| 442 |
+
|
| 443 |
+
# ------------------------------------------------------------------
|
| 444 |
+
# gymnasium API
|
| 445 |
+
# ------------------------------------------------------------------
|
| 446 |
+
|
| 447 |
+
def reset(self, *, seed: int | None = None, options: dict | None = None):
|
| 448 |
+
super().reset(seed=seed)
|
| 449 |
+
|
| 450 |
+
self._queue = list(self.email_batch)
|
| 451 |
+
if self.shuffle:
|
| 452 |
+
self.np_random.shuffle(self._queue) # uses gymnasium's seeded RNG
|
| 453 |
+
|
| 454 |
+
self._step_idx = 0
|
| 455 |
+
self._current_email = self._queue[self._step_idx]
|
| 456 |
+
|
| 457 |
+
obs = self._encode(self._current_email)
|
| 458 |
+
info = {"description": self._current_email["description"],
|
| 459 |
+
"difficulty": self._current_email["difficulty"]}
|
| 460 |
+
return obs, info
|
| 461 |
+
|
| 462 |
+
def step(self, action: np.ndarray):
|
| 463 |
+
"""
|
| 464 |
+
Process one email triage decision.
|
| 465 |
+
|
| 466 |
+
Returns
|
| 467 |
+
-------
|
| 468 |
+
obs, reward, terminated, truncated, info
|
| 469 |
+
"""
|
| 470 |
+
# ββ Bug Fix: correct_actions PEHLE save karo, PHIR pointer badlao ββ
|
| 471 |
+
# Pehle current email ka ground truth capture karo reward ke saath
|
| 472 |
+
scored_email = self._current_email
|
| 473 |
+
reward = self._compute_reward(action, scored_email)
|
| 474 |
+
normalised_reward = reward / self._max_episode_reward
|
| 475 |
+
|
| 476 |
+
# Ab pointer aage badhao
|
| 477 |
+
self._step_idx += 1
|
| 478 |
+
terminated = self._step_idx >= len(self._queue)
|
| 479 |
+
|
| 480 |
+
if not terminated:
|
| 481 |
+
self._current_email = self._queue[self._step_idx]
|
| 482 |
+
obs = self._encode(self._current_email)
|
| 483 |
+
else:
|
| 484 |
+
obs = self._encode(scored_email) # terminal step pe last obs return karo
|
| 485 |
+
|
| 486 |
+
# info mein SCORED email ka data β agli email ka nahi
|
| 487 |
+
info = {
|
| 488 |
+
"raw_reward": reward,
|
| 489 |
+
"correct_actions": scored_email["correct_actions"],
|
| 490 |
+
"difficulty": scored_email["difficulty"],
|
| 491 |
+
"description": scored_email.get("description", ""),
|
| 492 |
+
}
|
| 493 |
+
return obs, normalised_reward, terminated, False, info
|
| 494 |
+
|
| 495 |
+
def render(self, mode: str = "human"):
|
| 496 |
+
"""Print current email details to stdout."""
|
| 497 |
+
e = self._current_email
|
| 498 |
+
print(f"[Step {self._step_idx}] {e['description']} "
|
| 499 |
+
f"| difficulty={e['difficulty']} "
|
| 500 |
+
f"| sentiment={e['sentiment']} "
|
| 501 |
+
f"| context={e['context']}")
|
inference.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
inference.py β OpenEnv Baseline Inference Script
|
| 3 |
+
=================================================
|
| 4 |
+
Runs the rule-based classifier against all three tasks defined in
|
| 5 |
+
openenv.yaml and reports per-task scores in the 0.0 β 1.0 range.
|
| 6 |
+
|
| 7 |
+
This script proves reproducibility for the hackathon submission.
|
| 8 |
+
Run it with:
|
| 9 |
+
python inference.py
|
| 10 |
+
|
| 11 |
+
Expected output:
|
| 12 |
+
Task 1 [EASY] β Spam Detection : 1.000 β
|
| 13 |
+
Task 2 [MEDIUM] β Support Routing : 0.950 β
|
| 14 |
+
Task 3 [HARD] β Phishing / Security : 0.900 β
|
| 15 |
+
Overall Score : 0.950
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
from env import (
|
| 20 |
+
EmailTriageEnv,
|
| 21 |
+
EmailAction,
|
| 22 |
+
TASK_SPLITS,
|
| 23 |
+
URGENCY_LABELS,
|
| 24 |
+
ROUTING_LABELS,
|
| 25 |
+
RESOLUTION_LABELS,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# ββ Rule-based classifier (your 95%-accuracy agent) βββββββββββββββββββββββββββ
|
| 29 |
+
|
| 30 |
+
_LEGAL_SECURITY_KW = {"lawsuit", "attorney", "sue", "ransomware", "extortion"}
|
| 31 |
+
_BILLING_ESCALATE_KW = {"refund"}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _classify(email: dict) -> np.ndarray:
|
| 35 |
+
"""
|
| 36 |
+
Deterministic rule-based classifier.
|
| 37 |
+
Returns np.ndarray([urgency, routing, resolution]).
|
| 38 |
+
"""
|
| 39 |
+
kw = set(email.get("keywords", []))
|
| 40 |
+
context = email.get("context", "").lower()
|
| 41 |
+
|
| 42 |
+
if context == "legal" or kw & {"lawsuit", "attorney", "sue"}:
|
| 43 |
+
return np.array([2, 2, 2], dtype=np.int64)
|
| 44 |
+
|
| 45 |
+
if context == "security":
|
| 46 |
+
if kw & _LEGAL_SECURITY_KW or ("hacked" in kw and "breach" in kw):
|
| 47 |
+
return np.array([2, 2, 2], dtype=np.int64)
|
| 48 |
+
return np.array([2, 1, 2], dtype=np.int64)
|
| 49 |
+
|
| 50 |
+
if context == "billing":
|
| 51 |
+
if kw & _BILLING_ESCALATE_KW:
|
| 52 |
+
return np.array([1, 2, 2], dtype=np.int64)
|
| 53 |
+
return np.array([1, 0, 1], dtype=np.int64)
|
| 54 |
+
|
| 55 |
+
if context == "tech" or kw & {"crash", "error", "bug", "slow"}:
|
| 56 |
+
return np.array([0, 1, 1], dtype=np.int64)
|
| 57 |
+
|
| 58 |
+
return np.array([0, 0, 0], dtype=np.int64)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ββ Per-task runner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
|
| 63 |
+
def run_task(task: str, verbose: bool = False) -> float:
|
| 64 |
+
"""
|
| 65 |
+
Run one full episode on the given task using the rule-based classifier.
|
| 66 |
+
Returns the normalised cumulative score in [0.0, 1.0].
|
| 67 |
+
"""
|
| 68 |
+
env = EmailTriageEnv(task=task, shuffle=False)
|
| 69 |
+
obs, info = env.reset(seed=42)
|
| 70 |
+
|
| 71 |
+
email_queue = list(env._queue) # snapshot before any steps
|
| 72 |
+
cumulative_score = 0.0
|
| 73 |
+
step = 0
|
| 74 |
+
terminated = False
|
| 75 |
+
|
| 76 |
+
task_labels = {
|
| 77 |
+
"easy": "Task 1 [EASY] β Spam Detection ",
|
| 78 |
+
"medium": "Task 2 [MEDIUM] β Support Routing ",
|
| 79 |
+
"hard": "Task 3 [HARD] β Phishing / Security ",
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
if verbose:
|
| 83 |
+
print(f"\n {'β' * 58}")
|
| 84 |
+
print(f" {task_labels.get(task, task.upper())}")
|
| 85 |
+
print(f" {'β' * 58}")
|
| 86 |
+
|
| 87 |
+
while not terminated:
|
| 88 |
+
current_email = email_queue[step]
|
| 89 |
+
action = _classify(current_email)
|
| 90 |
+
|
| 91 |
+
obs, norm_reward, terminated, _, info = env.step(action)
|
| 92 |
+
cumulative_score += norm_reward
|
| 93 |
+
|
| 94 |
+
if verbose:
|
| 95 |
+
ca = info["correct_actions"]
|
| 96 |
+
raw = info["raw_reward"]
|
| 97 |
+
|
| 98 |
+
pred_str = (f"{URGENCY_LABELS[action[0]]} | "
|
| 99 |
+
f"{ROUTING_LABELS[action[1]]} | "
|
| 100 |
+
f"{RESOLUTION_LABELS[action[2]]}")
|
| 101 |
+
corr_str = (f"{URGENCY_LABELS[ca[0]]} | "
|
| 102 |
+
f"{ROUTING_LABELS[ca[1]]} | "
|
| 103 |
+
f"{RESOLUTION_LABELS[ca[2]]}")
|
| 104 |
+
|
| 105 |
+
if raw >= 1.0:
|
| 106 |
+
verdict = "β
EXACT"
|
| 107 |
+
elif raw > 0:
|
| 108 |
+
verdict = "πΆ PARTIAL"
|
| 109 |
+
elif raw < 0:
|
| 110 |
+
verdict = "π¨ SECURITY MISS"
|
| 111 |
+
else:
|
| 112 |
+
verdict = "β WRONG"
|
| 113 |
+
|
| 114 |
+
print(f" #{step+1:02d} [{current_email['difficulty'].upper():<6}] "
|
| 115 |
+
f"{current_email['description'][:35]:<35} "
|
| 116 |
+
f"reward={raw:+.1f} {verdict}")
|
| 117 |
+
if raw < 1.0:
|
| 118 |
+
print(f" Predicted : {pred_str}")
|
| 119 |
+
print(f" Correct : {corr_str}")
|
| 120 |
+
|
| 121 |
+
step += 1
|
| 122 |
+
|
| 123 |
+
# Clamp to [0.0, 1.0] β penalties can push below 0
|
| 124 |
+
final_score = max(0.0, min(1.0, cumulative_score))
|
| 125 |
+
|
| 126 |
+
env_state = env.state()
|
| 127 |
+
assert env_state.terminated, "Episode should be terminated after all steps"
|
| 128 |
+
|
| 129 |
+
return final_score
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
|
| 134 |
+
def main():
|
| 135 |
+
print(f"\n{'β' * 62}")
|
| 136 |
+
print(" EMAIL GATEKEEPER β OpenEnv Baseline Inference")
|
| 137 |
+
print(" Meta x PyTorch Hackathon | Reproducibility Report")
|
| 138 |
+
print(f"{'β' * 62}")
|
| 139 |
+
|
| 140 |
+
tasks = [
|
| 141 |
+
("easy", "Task 1 [EASY] β Spam Detection "),
|
| 142 |
+
("medium", "Task 2 [MEDIUM] β Support Routing "),
|
| 143 |
+
("hard", "Task 3 [HARD] β Phishing / Security "),
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
scores = {}
|
| 147 |
+
all_correct = 0
|
| 148 |
+
all_total = 0
|
| 149 |
+
|
| 150 |
+
for task_id, label in tasks:
|
| 151 |
+
score = run_task(task_id, verbose=True)
|
| 152 |
+
scores[task_id] = score
|
| 153 |
+
|
| 154 |
+
n = len(TASK_SPLITS[task_id])
|
| 155 |
+
all_total += n
|
| 156 |
+
|
| 157 |
+
icon = "β
" if score >= 0.8 else ("β οΈ " if score >= 0.5 else "β")
|
| 158 |
+
print(f"\n {label}: {score:.3f} {icon}")
|
| 159 |
+
|
| 160 |
+
# Overall score = weighted average by number of emails per task
|
| 161 |
+
weights = {t: len(TASK_SPLITS[t]) for t in scores}
|
| 162 |
+
total_weight = sum(weights.values())
|
| 163 |
+
overall = sum(scores[t] * weights[t] / total_weight for t in scores)
|
| 164 |
+
|
| 165 |
+
print(f"\n{'β' * 62}")
|
| 166 |
+
print(f" {'Overall Score (weighted avg)':<42}: {overall:.3f}")
|
| 167 |
+
print(f" {'Total Emails Evaluated':<42}: {total_weight}")
|
| 168 |
+
|
| 169 |
+
# Per-task summary table
|
| 170 |
+
print(f"\n {'Task':<10} {'Emails':>7} {'Score':>8} {'Status':>10}")
|
| 171 |
+
print(f" {'β'*10} {'β'*7} {'β'*8} {'β'*10}")
|
| 172 |
+
for task_id, label in tasks:
|
| 173 |
+
n = len(TASK_SPLITS[task_id])
|
| 174 |
+
s = scores[task_id]
|
| 175 |
+
status = "PASS β
" if s >= 0.8 else ("WARN β οΈ " if s >= 0.5 else "FAIL β")
|
| 176 |
+
print(f" {task_id:<10} {n:>7} {s:>8.3f} {status:>10}")
|
| 177 |
+
|
| 178 |
+
print(f"\n{'β' * 62}\n")
|
| 179 |
+
|
| 180 |
+
# Return scores dict for programmatic use (e.g. CI pipelines)
|
| 181 |
+
return {
|
| 182 |
+
"task_scores": scores,
|
| 183 |
+
"overall": round(overall, 4),
|
| 184 |
+
"total_emails": total_weight,
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
if __name__ == "__main__":
|
| 189 |
+
results = main()
|
lambda/classifier.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
classifier.py β Portable rule-based email classifier for AWS Lambda.
|
| 3 |
+
Extracted from inference.py with zero heavy dependencies (no numpy/gymnasium).
|
| 4 |
+
All logic is identical to the local rule engine.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
# ββ Label maps (mirrors environment.py) βββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
URGENCY_LABELS = {0: "General", 1: "Billing", 2: "Security Breach"}
|
| 9 |
+
ROUTING_LABELS = {0: "AI Auto-Reply", 1: "Tech Support", 2: "Legal"}
|
| 10 |
+
RESOLUTION_LABELS = {0: "Archive", 1: "Draft Reply", 2: "Escalate"}
|
| 11 |
+
|
| 12 |
+
# Keywords that push a security email to Legal (ransomware / extortion level).
|
| 13 |
+
_LEGAL_SECURITY_KW = {"lawsuit", "attorney", "sue", "ransomware", "extortion", "legal"}
|
| 14 |
+
|
| 15 |
+
# Only "refund" escalates a billing email to Legal β "overdue" stays routine.
|
| 16 |
+
_BILLING_ESCALATE_KW = {"refund"}
|
| 17 |
+
|
| 18 |
+
# Full keyword vocabulary used for feature extraction from raw email text.
|
| 19 |
+
KEYWORD_VOCAB = [
|
| 20 |
+
"invoice", "payment", "overdue", "refund",
|
| 21 |
+
"hacked", "breach", "unauthorized", "password",
|
| 22 |
+
"crash", "error", "bug", "slow",
|
| 23 |
+
"lawsuit", "legal", "attorney", "sue",
|
| 24 |
+
"spam", "offer", "win", "free",
|
| 25 |
+
"urgent", "critical", "angry", "threat",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def extract_features(subject: str, body: str) -> dict:
|
| 30 |
+
"""
|
| 31 |
+
Parse raw email text into the feature dict expected by classify().
|
| 32 |
+
Returns: {keywords, sentiment, context}
|
| 33 |
+
"""
|
| 34 |
+
text = (subject + " " + body).lower()
|
| 35 |
+
tokens = set(text.split())
|
| 36 |
+
|
| 37 |
+
keywords = [kw for kw in KEYWORD_VOCAB if kw in tokens]
|
| 38 |
+
|
| 39 |
+
# Simple sentiment: negative words outweigh positive
|
| 40 |
+
neg_words = {"angry", "threat", "hacked", "breach", "lawsuit", "overdue",
|
| 41 |
+
"unauthorized", "ransomware", "critical", "urgent", "error",
|
| 42 |
+
"crash", "bug", "refund"}
|
| 43 |
+
pos_words = {"win", "free", "offer", "congratulations", "prize"}
|
| 44 |
+
|
| 45 |
+
neg_hits = len(tokens & neg_words)
|
| 46 |
+
pos_hits = len(tokens & pos_words)
|
| 47 |
+
|
| 48 |
+
if neg_hits > pos_hits:
|
| 49 |
+
sentiment = "negative"
|
| 50 |
+
elif pos_hits > 0:
|
| 51 |
+
sentiment = "positive"
|
| 52 |
+
else:
|
| 53 |
+
sentiment = "neutral"
|
| 54 |
+
|
| 55 |
+
# Context: first strong signal wins
|
| 56 |
+
kw_set = set(keywords)
|
| 57 |
+
if kw_set & {"hacked", "breach", "unauthorized", "ransomware"}:
|
| 58 |
+
context = "security"
|
| 59 |
+
elif kw_set & {"lawsuit", "attorney", "sue"}:
|
| 60 |
+
context = "legal"
|
| 61 |
+
elif kw_set & {"invoice", "payment", "overdue", "refund"}:
|
| 62 |
+
context = "billing"
|
| 63 |
+
elif kw_set & {"crash", "error", "bug", "slow", "password"}:
|
| 64 |
+
context = "tech"
|
| 65 |
+
elif kw_set & {"spam", "offer", "win", "free"}:
|
| 66 |
+
context = "spam"
|
| 67 |
+
else:
|
| 68 |
+
context = "general"
|
| 69 |
+
|
| 70 |
+
return {"keywords": keywords, "sentiment": sentiment, "context": context}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def classify(email: dict) -> tuple[int, int, int]:
|
| 74 |
+
"""
|
| 75 |
+
Rule-based classifier. Accepts a feature dict (keywords, context).
|
| 76 |
+
Returns (urgency, routing, resolution) as plain ints.
|
| 77 |
+
|
| 78 |
+
Priority order (first match wins):
|
| 79 |
+
1. Legal context / legal keywords β (2, 2, 2)
|
| 80 |
+
2. Security + legal signal β (2, 2, 2)
|
| 81 |
+
2. Security account-level β (2, 1, 2)
|
| 82 |
+
3. Billing dispute (refund) β (1, 2, 2)
|
| 83 |
+
4. Billing routine β (1, 0, 1)
|
| 84 |
+
5. Tech support β (0, 1, 1)
|
| 85 |
+
6. Spam / default β (0, 0, 0)
|
| 86 |
+
"""
|
| 87 |
+
kw = set(email.get("keywords", []))
|
| 88 |
+
context = email.get("context", "").lower()
|
| 89 |
+
|
| 90 |
+
if context == "legal" or kw & {"lawsuit", "attorney", "sue"}:
|
| 91 |
+
return (2, 2, 2)
|
| 92 |
+
|
| 93 |
+
if context == "security":
|
| 94 |
+
if kw & _LEGAL_SECURITY_KW or ("hacked" in kw and "breach" in kw):
|
| 95 |
+
return (2, 2, 2)
|
| 96 |
+
return (2, 1, 2)
|
| 97 |
+
|
| 98 |
+
if context == "billing":
|
| 99 |
+
if kw & _BILLING_ESCALATE_KW:
|
| 100 |
+
return (1, 2, 2)
|
| 101 |
+
return (1, 0, 1)
|
| 102 |
+
|
| 103 |
+
if context == "tech" or kw & {"crash", "error", "bug", "slow"}:
|
| 104 |
+
return (0, 1, 1)
|
| 105 |
+
|
| 106 |
+
return (0, 0, 0)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def decode(urgency: int, routing: int, resolution: int) -> dict:
|
| 110 |
+
"""Convert integer action tuple to human-readable label dict."""
|
| 111 |
+
return {
|
| 112 |
+
"urgency_code": urgency,
|
| 113 |
+
"routing_code": routing,
|
| 114 |
+
"resolution_code": resolution,
|
| 115 |
+
"urgency": URGENCY_LABELS[urgency],
|
| 116 |
+
"routing": ROUTING_LABELS[routing],
|
| 117 |
+
"resolution": RESOLUTION_LABELS[resolution],
|
| 118 |
+
}
|
lambda/handler.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
handler.py β AWS Lambda entry point for the Email Gatekeeper.
|
| 3 |
+
|
| 4 |
+
Trigger paths:
|
| 5 |
+
A) S3 Event : SES stores raw .eml β S3 β Lambda (s3:ObjectCreated)
|
| 6 |
+
B) Direct JSON : {"subject": "...", "body": "..."} for testing / API Gateway
|
| 7 |
+
|
| 8 |
+
On each invocation:
|
| 9 |
+
1. Parse the email (S3 object or direct payload)
|
| 10 |
+
2. Extract features (classifier.extract_features)
|
| 11 |
+
3. Classify (classifier.classify)
|
| 12 |
+
4. Persist result β DynamoDB table (EMAIL_RESULTS_TABLE env var)
|
| 13 |
+
5. Alert on breach β SNS topic (SECURITY_ALERT_TOPIC_ARN env var)
|
| 14 |
+
6. Return JSON result
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
import email
|
| 20 |
+
import uuid
|
| 21 |
+
import logging
|
| 22 |
+
from datetime import datetime, timezone
|
| 23 |
+
|
| 24 |
+
import boto3
|
| 25 |
+
|
| 26 |
+
from classifier import classify, decode, extract_features
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger()
|
| 29 |
+
logger.setLevel(logging.INFO)
|
| 30 |
+
|
| 31 |
+
# AWS clients β initialised once at cold-start for connection reuse
|
| 32 |
+
_s3 = boto3.client("s3")
|
| 33 |
+
_dynamodb = boto3.resource("dynamodb")
|
| 34 |
+
_sns = boto3.client("sns")
|
| 35 |
+
|
| 36 |
+
# Environment variables injected by CDK
|
| 37 |
+
_TABLE_NAME = os.environ.get("EMAIL_RESULTS_TABLE", "")
|
| 38 |
+
_TOPIC_ARN = os.environ.get("SECURITY_ALERT_TOPIC_ARN", "")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
|
| 43 |
+
def _parse_eml(raw_bytes: bytes) -> tuple[str, str]:
|
| 44 |
+
"""Extract subject and plain-text body from a raw .eml byte string."""
|
| 45 |
+
msg = email.message_from_bytes(raw_bytes)
|
| 46 |
+
subject = msg.get("Subject", "")
|
| 47 |
+
|
| 48 |
+
body = ""
|
| 49 |
+
if msg.is_multipart():
|
| 50 |
+
for part in msg.walk():
|
| 51 |
+
if part.get_content_type() == "text/plain":
|
| 52 |
+
body = part.get_payload(decode=True).decode("utf-8", errors="replace")
|
| 53 |
+
break
|
| 54 |
+
else:
|
| 55 |
+
body = msg.get_payload(decode=True).decode("utf-8", errors="replace")
|
| 56 |
+
|
| 57 |
+
return subject, body
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _fetch_from_s3(bucket: str, key: str) -> tuple[str, str]:
|
| 61 |
+
"""Download a raw .eml from S3 and return (subject, body)."""
|
| 62 |
+
logger.info("Fetching s3://%s/%s", bucket, key)
|
| 63 |
+
obj = _s3.get_object(Bucket=bucket, Key=key)
|
| 64 |
+
raw = obj["Body"].read()
|
| 65 |
+
return _parse_eml(raw)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _save_to_dynamodb(record: dict) -> None:
|
| 69 |
+
"""Persist the triage result to DynamoDB (best-effort, non-blocking)."""
|
| 70 |
+
if not _TABLE_NAME:
|
| 71 |
+
return
|
| 72 |
+
try:
|
| 73 |
+
table = _dynamodb.Table(_TABLE_NAME)
|
| 74 |
+
table.put_item(Item=record)
|
| 75 |
+
except Exception as exc:
|
| 76 |
+
logger.error("DynamoDB write failed: %s", exc)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _alert_security(record: dict) -> None:
|
| 80 |
+
"""Publish an SNS alert when a Security Breach is detected."""
|
| 81 |
+
if not _TOPIC_ARN:
|
| 82 |
+
return
|
| 83 |
+
try:
|
| 84 |
+
_sns.publish(
|
| 85 |
+
TopicArn=_TOPIC_ARN,
|
| 86 |
+
Subject="π¨ Security Breach Email Detected",
|
| 87 |
+
Message=json.dumps(record, indent=2),
|
| 88 |
+
)
|
| 89 |
+
logger.info("SNS alert published for email_id=%s", record.get("email_id"))
|
| 90 |
+
except Exception as exc:
|
| 91 |
+
logger.error("SNS publish failed: %s", exc)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# ββ Main handler βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
|
| 96 |
+
def lambda_handler(event: dict, context) -> dict:
|
| 97 |
+
"""
|
| 98 |
+
Unified entry point for S3-triggered and direct-invocation events.
|
| 99 |
+
|
| 100 |
+
S3 event shape (from SES β S3 β Lambda notification):
|
| 101 |
+
{"Records": [{"s3": {"bucket": {"name": "..."}, "object": {"key": "..."}}}]}
|
| 102 |
+
|
| 103 |
+
Direct invocation shape (for testing or API Gateway):
|
| 104 |
+
{"subject": "Your invoice is overdue", "body": "Please pay immediately."}
|
| 105 |
+
"""
|
| 106 |
+
logger.info("Event received: %s", json.dumps(event)[:500])
|
| 107 |
+
|
| 108 |
+
# ββ Determine input source ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
+
records = event.get("Records", [])
|
| 110 |
+
|
| 111 |
+
if records and records[0].get("eventSource") == "aws:s3":
|
| 112 |
+
# Path A: triggered by S3 object creation (SES-delivered email)
|
| 113 |
+
s3_info = records[0]["s3"]
|
| 114 |
+
bucket = s3_info["bucket"]["name"]
|
| 115 |
+
key = s3_info["object"]["key"]
|
| 116 |
+
subject, body = _fetch_from_s3(bucket, key)
|
| 117 |
+
source_ref = f"s3://{bucket}/{key}"
|
| 118 |
+
else:
|
| 119 |
+
# Path B: direct JSON invocation (testing / API Gateway)
|
| 120 |
+
subject = event.get("subject", "")
|
| 121 |
+
body = event.get("body", "")
|
| 122 |
+
source_ref = "direct-invocation"
|
| 123 |
+
|
| 124 |
+
if not subject and not body:
|
| 125 |
+
return {"statusCode": 400, "body": "No email content found in event."}
|
| 126 |
+
|
| 127 |
+
# ββ Classify ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 128 |
+
features = extract_features(subject, body)
|
| 129 |
+
urgency, routing, res = classify(features)
|
| 130 |
+
result = decode(urgency, routing, res)
|
| 131 |
+
|
| 132 |
+
# ββ Build persistence record ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
email_id = str(uuid.uuid4())
|
| 134 |
+
record = {
|
| 135 |
+
"email_id": email_id,
|
| 136 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 137 |
+
"source": source_ref,
|
| 138 |
+
"subject": subject[:500], # cap to avoid DDB item size issues
|
| 139 |
+
"detected_keywords": features["keywords"],
|
| 140 |
+
"sentiment": features["sentiment"],
|
| 141 |
+
"context": features["context"],
|
| 142 |
+
**result, # urgency/routing/resolution labels + codes
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
logger.info("Classification result: %s", json.dumps(result))
|
| 146 |
+
|
| 147 |
+
# ββ Persist & alert βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 148 |
+
_save_to_dynamodb(record)
|
| 149 |
+
|
| 150 |
+
if urgency == 2: # Security Breach
|
| 151 |
+
_alert_security(record)
|
| 152 |
+
|
| 153 |
+
return {
|
| 154 |
+
"statusCode": 200,
|
| 155 |
+
"body": json.dumps(record),
|
| 156 |
+
}
|
openenv.yaml
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# openenv.yaml β Email Gatekeeper RL Environment
|
| 2 |
+
# OpenEnv Specification v1.0
|
| 3 |
+
# Meta x PyTorch Hackathon Submission
|
| 4 |
+
# ============================================================
|
| 5 |
+
|
| 6 |
+
name: email-gatekeeper
|
| 7 |
+
version: "1.0.0"
|
| 8 |
+
description: >
|
| 9 |
+
Intelligent Email Gatekeeper β a Gymnasium-based Reinforcement Learning
|
| 10 |
+
environment where an agent learns to triage emails by simultaneously
|
| 11 |
+
predicting three dimensions: Urgency Category, Department Routing,
|
| 12 |
+
and Resolution Action. Covers 32 scenarios across spam detection,
|
| 13 |
+
support routing, and phishing/security threat identification.
|
| 14 |
+
|
| 15 |
+
author: zerogravity
|
| 16 |
+
license: MIT
|
| 17 |
+
framework: gymnasium
|
| 18 |
+
python_requires: ">=3.10"
|
| 19 |
+
|
| 20 |
+
# ββ Entry point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
entry_point: "env:EmailTriageEnv"
|
| 22 |
+
|
| 23 |
+
# ββ Observation space βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
observation_space:
|
| 25 |
+
type: Box
|
| 26 |
+
shape: [32]
|
| 27 |
+
dtype: float32
|
| 28 |
+
low: 0.0
|
| 29 |
+
high: 1.0
|
| 30 |
+
description: >
|
| 31 |
+
Flat vector of 32 floats encoding:
|
| 32 |
+
[0:24] Binary keyword flags (24 vocab words)
|
| 33 |
+
[24:27] One-hot sentiment (positive / neutral / negative)
|
| 34 |
+
[27:32] One-hot context (spam / billing / tech / security / legal)
|
| 35 |
+
|
| 36 |
+
# ββ Action space ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
+
action_space:
|
| 38 |
+
type: MultiDiscrete
|
| 39 |
+
nvec: [3, 3, 3]
|
| 40 |
+
dimensions:
|
| 41 |
+
- name: urgency
|
| 42 |
+
index: 0
|
| 43 |
+
values:
|
| 44 |
+
0: General
|
| 45 |
+
1: Billing
|
| 46 |
+
2: Security Breach
|
| 47 |
+
- name: routing
|
| 48 |
+
index: 1
|
| 49 |
+
values:
|
| 50 |
+
0: AI Auto-Reply
|
| 51 |
+
1: Tech Support
|
| 52 |
+
2: Legal
|
| 53 |
+
- name: resolution
|
| 54 |
+
index: 2
|
| 55 |
+
values:
|
| 56 |
+
0: Archive
|
| 57 |
+
1: Draft Reply
|
| 58 |
+
2: Escalate
|
| 59 |
+
|
| 60 |
+
# ββ Reward function βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
reward:
|
| 62 |
+
description: >
|
| 63 |
+
Strict penalty-based reward. Security breach misses are penalised
|
| 64 |
+
at 4x the magnitude of a correct answer to reflect real-world risk.
|
| 65 |
+
rules:
|
| 66 |
+
- condition: "correct urgency=2 but predicted urgency != 2"
|
| 67 |
+
reward: -2.0
|
| 68 |
+
label: SECURITY_MISS
|
| 69 |
+
- condition: "all three dimensions exactly correct"
|
| 70 |
+
reward: +1.0
|
| 71 |
+
label: EXACT
|
| 72 |
+
- condition: "urgency correct, exactly one other dimension wrong"
|
| 73 |
+
reward: +0.2
|
| 74 |
+
label: PARTIAL_1
|
| 75 |
+
- condition: "urgency correct, both other dimensions wrong"
|
| 76 |
+
reward: +0.1
|
| 77 |
+
label: PARTIAL_2
|
| 78 |
+
- condition: "urgency wrong on non-security email"
|
| 79 |
+
reward: 0.0
|
| 80 |
+
label: WRONG
|
| 81 |
+
normalisation: >
|
| 82 |
+
Each raw reward is divided by (num_emails * 1.0) so the ideal
|
| 83 |
+
cumulative episode score = 1.0
|
| 84 |
+
|
| 85 |
+
# ββ Tasks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 86 |
+
tasks:
|
| 87 |
+
|
| 88 |
+
- id: easy
|
| 89 |
+
name: "Task 1 β Spam vs Real Email Detection"
|
| 90 |
+
difficulty: easy
|
| 91 |
+
description: >
|
| 92 |
+
Agent must distinguish promotional spam from legitimate emails
|
| 93 |
+
and assign correct General/Billing urgency with appropriate routing.
|
| 94 |
+
num_emails: 4
|
| 95 |
+
email_types:
|
| 96 |
+
- Spam promotional
|
| 97 |
+
- Spam lottery
|
| 98 |
+
- Routine tech support
|
| 99 |
+
- General billing inquiry
|
| 100 |
+
target_score: 1.0
|
| 101 |
+
baseline_score: 1.0
|
| 102 |
+
success_threshold: 0.8
|
| 103 |
+
|
| 104 |
+
- id: medium
|
| 105 |
+
name: "Task 2 β Support Routing & Passive-Aggressive Legal Threats"
|
| 106 |
+
difficulty: medium
|
| 107 |
+
description: >
|
| 108 |
+
Agent must correctly route billing disputes, tech issues, and
|
| 109 |
+
passive-aggressive legal threats that use polite language to
|
| 110 |
+
disguise escalation intent.
|
| 111 |
+
num_emails: 8
|
| 112 |
+
email_types:
|
| 113 |
+
- Overdue invoice complaint
|
| 114 |
+
- Refund dispute
|
| 115 |
+
- App crash report
|
| 116 |
+
- Persistent login bug
|
| 117 |
+
- Polite legal ultimatum
|
| 118 |
+
- Attorney CC warning
|
| 119 |
+
- Regulatory complaint
|
| 120 |
+
- SLA breach legal notice
|
| 121 |
+
target_score: 1.0
|
| 122 |
+
baseline_score: 0.95
|
| 123 |
+
success_threshold: 0.75
|
| 124 |
+
|
| 125 |
+
- id: hard
|
| 126 |
+
name: "Task 3 β Phishing Detection & Security Threat Classification"
|
| 127 |
+
difficulty: hard
|
| 128 |
+
description: >
|
| 129 |
+
Agent must identify subtle phishing attempts disguised as IT notices,
|
| 130 |
+
HR emails, and vendor requests, plus classify ransomware and extortion
|
| 131 |
+
threats that combine security and legal signals.
|
| 132 |
+
num_emails: 16
|
| 133 |
+
email_types:
|
| 134 |
+
- IT audit phishing
|
| 135 |
+
- Fake invoice portal redirect
|
| 136 |
+
- HR credential capture
|
| 137 |
+
- Fake account suspension
|
| 138 |
+
- Business Email Compromise (BEC)
|
| 139 |
+
- Sign-in alert phishing
|
| 140 |
+
- Payroll migration phish
|
| 141 |
+
- License renewal BEC
|
| 142 |
+
- GDPR phishing with legal overlay
|
| 143 |
+
- Ransomware disguised as audit
|
| 144 |
+
- Data extortion threat
|
| 145 |
+
- Fake law firm letter
|
| 146 |
+
- Account hacked urgent
|
| 147 |
+
- Data breach notification
|
| 148 |
+
- Legal lawsuit threat
|
| 149 |
+
- Ransomware extortion
|
| 150 |
+
target_score: 1.0
|
| 151 |
+
baseline_score: 0.90
|
| 152 |
+
success_threshold: 0.70
|
| 153 |
+
|
| 154 |
+
# ββ Environment parameters ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
parameters:
|
| 156 |
+
shuffle:
|
| 157 |
+
type: bool
|
| 158 |
+
default: true
|
| 159 |
+
description: Shuffle email order on each reset for training variety
|
| 160 |
+
task:
|
| 161 |
+
type: str
|
| 162 |
+
default: all
|
| 163 |
+
choices: [easy, medium, hard, all]
|
| 164 |
+
description: Which difficulty subset to load
|
| 165 |
+
|
| 166 |
+
# ββ Dependencies ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
+
dependencies:
|
| 168 |
+
- gymnasium>=0.29.0
|
| 169 |
+
- numpy>=1.24.0
|
| 170 |
+
- pydantic>=2.0.0
|
| 171 |
+
|
| 172 |
+
# ββ Reproducibility βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
+
reproducibility:
|
| 174 |
+
seed: 42
|
| 175 |
+
deterministic: true
|
| 176 |
+
baseline_script: inference.py
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
gymnasium
|
| 4 |
+
numpy>
|
| 5 |
+
pydantic
|
| 6 |
+
gradio
|
| 7 |
+
pyyaml
|
| 8 |
+
google-generativeai
|
sagemaker/classifier.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
classifier.py β Production Rule-Based Email Classifier
|
| 3 |
+
=======================================================
|
| 4 |
+
Shared by SageMaker inference.py and Lambda handler.py.
|
| 5 |
+
Zero heavy dependencies β no numpy, no gymnasium.
|
| 6 |
+
|
| 7 |
+
Key fix vs lambda/classifier.py:
|
| 8 |
+
"legal" removed from _LEGAL_SECURITY_KW β it is a deception keyword
|
| 9 |
+
in phishing emails (TC-H-09), not a routing signal. Context field
|
| 10 |
+
is the authoritative source for legal routing.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
# ββ Label maps ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
URGENCY_LABELS = {0: "General", 1: "Billing", 2: "Security Breach"}
|
| 15 |
+
ROUTING_LABELS = {0: "AI Auto-Reply", 1: "Tech Support", 2: "Legal"}
|
| 16 |
+
RESOLUTION_LABELS = {0: "Archive", 1: "Draft Reply", 2: "Escalate"}
|
| 17 |
+
|
| 18 |
+
# Security emails that need Legal routing (ransomware / extortion / IP theft).
|
| 19 |
+
# NOTE: "legal" intentionally excluded β it appears in phishing deception text.
|
| 20 |
+
_LEGAL_SECURITY_KW = {"lawsuit", "attorney", "sue", "ransomware", "extortion"}
|
| 21 |
+
|
| 22 |
+
# Only "refund" escalates billing to Legal β "overdue" stays routine.
|
| 23 |
+
_BILLING_ESCALATE_KW = {"refund"}
|
| 24 |
+
|
| 25 |
+
# Canonical keyword vocabulary (must match environment.py KEYWORD_VOCAB)
|
| 26 |
+
KEYWORD_VOCAB = [
|
| 27 |
+
"invoice", "payment", "overdue", "refund",
|
| 28 |
+
"hacked", "breach", "unauthorized", "password",
|
| 29 |
+
"crash", "error", "bug", "slow",
|
| 30 |
+
"lawsuit", "legal", "attorney", "sue",
|
| 31 |
+
"spam", "offer", "win", "free",
|
| 32 |
+
"urgent", "critical", "angry", "threat",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
# Words used for sentiment scoring
|
| 36 |
+
_NEG_WORDS = {
|
| 37 |
+
"angry", "threat", "hacked", "breach", "lawsuit", "overdue",
|
| 38 |
+
"unauthorized", "ransomware", "critical", "urgent", "error",
|
| 39 |
+
"crash", "bug", "refund",
|
| 40 |
+
}
|
| 41 |
+
_POS_WORDS = {"win", "free", "offer", "congratulations", "prize"}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# ββ Feature extraction ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
|
| 46 |
+
def extract_features(subject: str, body: str) -> dict:
|
| 47 |
+
"""
|
| 48 |
+
Parse raw email text β feature dict {keywords, sentiment, context}.
|
| 49 |
+
Used when the caller does not supply pre-computed features.
|
| 50 |
+
"""
|
| 51 |
+
text = (subject + " " + body).lower()
|
| 52 |
+
tokens = set(text.split())
|
| 53 |
+
|
| 54 |
+
keywords = [kw for kw in KEYWORD_VOCAB if kw in tokens]
|
| 55 |
+
kw_set = set(keywords)
|
| 56 |
+
|
| 57 |
+
# Sentiment
|
| 58 |
+
neg_hits = len(tokens & _NEG_WORDS)
|
| 59 |
+
pos_hits = len(tokens & _POS_WORDS)
|
| 60 |
+
if neg_hits > pos_hits:
|
| 61 |
+
sentiment = "negative"
|
| 62 |
+
elif pos_hits > 0:
|
| 63 |
+
sentiment = "positive"
|
| 64 |
+
else:
|
| 65 |
+
sentiment = "neutral"
|
| 66 |
+
|
| 67 |
+
# Context β priority order matches the classifier decision tree
|
| 68 |
+
if kw_set & {"hacked", "breach", "unauthorized", "ransomware"}:
|
| 69 |
+
context = "security"
|
| 70 |
+
elif kw_set & {"lawsuit", "attorney", "sue"}:
|
| 71 |
+
context = "legal"
|
| 72 |
+
elif kw_set & {"invoice", "payment", "overdue", "refund"}:
|
| 73 |
+
context = "billing"
|
| 74 |
+
elif kw_set & {"crash", "error", "bug", "slow", "password"}:
|
| 75 |
+
context = "tech"
|
| 76 |
+
elif kw_set & {"spam", "offer", "win", "free"}:
|
| 77 |
+
context = "spam"
|
| 78 |
+
else:
|
| 79 |
+
context = "general"
|
| 80 |
+
|
| 81 |
+
return {"keywords": keywords, "sentiment": sentiment, "context": context}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ββ Classifier ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
+
|
| 86 |
+
def classify(email: dict) -> tuple[int, int, int]:
|
| 87 |
+
"""
|
| 88 |
+
Deterministic rule-based classifier.
|
| 89 |
+
Returns (urgency, routing, resolution) as plain ints.
|
| 90 |
+
|
| 91 |
+
Decision tree β first match wins:
|
| 92 |
+
Rule 1 legal context OR lawsuit/attorney/sue keywords β (2, 2, 2)
|
| 93 |
+
Rule 2a security + ransomware/extortion/hacked+breach β (2, 2, 2)
|
| 94 |
+
Rule 2b security (account-level attack) β (2, 1, 2)
|
| 95 |
+
Rule 3 billing + refund keyword β (1, 2, 2)
|
| 96 |
+
Rule 4 billing routine β (1, 0, 1)
|
| 97 |
+
Rule 5 tech context or crash/error/bug/slow β (0, 1, 1)
|
| 98 |
+
Rule 6 spam / default β (0, 0, 0)
|
| 99 |
+
"""
|
| 100 |
+
kw = set(email.get("keywords", []))
|
| 101 |
+
context = email.get("context", "").lower()
|
| 102 |
+
|
| 103 |
+
# Rule 1 β Legal
|
| 104 |
+
if context == "legal" or kw & {"lawsuit", "attorney", "sue"}:
|
| 105 |
+
return (2, 2, 2)
|
| 106 |
+
|
| 107 |
+
# Rule 2 β Security
|
| 108 |
+
if context == "security":
|
| 109 |
+
if kw & _LEGAL_SECURITY_KW or ("hacked" in kw and "breach" in kw):
|
| 110 |
+
return (2, 2, 2) # ransomware / extortion β Legal
|
| 111 |
+
return (2, 1, 2) # account-level attack β Tech Support
|
| 112 |
+
|
| 113 |
+
# Rule 3 & 4 β Billing
|
| 114 |
+
if context == "billing":
|
| 115 |
+
return (1, 2, 2) if kw & _BILLING_ESCALATE_KW else (1, 0, 1)
|
| 116 |
+
|
| 117 |
+
# Rule 5 β Tech
|
| 118 |
+
if context == "tech" or kw & {"crash", "error", "bug", "slow"}:
|
| 119 |
+
return (0, 1, 1)
|
| 120 |
+
|
| 121 |
+
# Rule 6 β Spam / default
|
| 122 |
+
return (0, 0, 0)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# ββ Decoder βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 126 |
+
|
| 127 |
+
def decode(urgency: int, routing: int, resolution: int) -> dict:
|
| 128 |
+
"""Convert integer action codes to human-readable label dict."""
|
| 129 |
+
return {
|
| 130 |
+
"urgency": URGENCY_LABELS[urgency],
|
| 131 |
+
"routing": ROUTING_LABELS[routing],
|
| 132 |
+
"resolution": RESOLUTION_LABELS[resolution],
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ββ Batch helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 137 |
+
|
| 138 |
+
def classify_batch(emails: list[dict]) -> list[dict]:
|
| 139 |
+
"""
|
| 140 |
+
Classify a list of email dicts in one call.
|
| 141 |
+
Each dict may contain pre-computed features OR raw subject+body.
|
| 142 |
+
Returns a list of decode() dicts with codes attached.
|
| 143 |
+
"""
|
| 144 |
+
results = []
|
| 145 |
+
for email in emails:
|
| 146 |
+
if not email.get("context"):
|
| 147 |
+
features = extract_features(
|
| 148 |
+
email.get("subject", ""),
|
| 149 |
+
email.get("body", ""),
|
| 150 |
+
)
|
| 151 |
+
else:
|
| 152 |
+
features = email
|
| 153 |
+
|
| 154 |
+
u, r, res = classify(features)
|
| 155 |
+
result = decode(u, r, res)
|
| 156 |
+
result.update({"urgency_code": u, "routing_code": r, "resolution_code": res})
|
| 157 |
+
results.append(result)
|
| 158 |
+
return results
|
sagemaker/deploy.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
deploy.py β SageMaker Endpoint Deployment Script
|
| 3 |
+
=================================================
|
| 4 |
+
Packages the rule-based classifier and deploys it as a real-time
|
| 5 |
+
SageMaker endpoint with full CloudWatch monitoring support.
|
| 6 |
+
|
| 7 |
+
Prerequisites:
|
| 8 |
+
pip install boto3 sagemaker
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python deploy.py # deploy only
|
| 12 |
+
python deploy.py --test # deploy + smoke tests
|
| 13 |
+
python deploy.py --monitor # deploy + smoke tests + CW dashboard
|
| 14 |
+
python deploy.py --delete # tear down endpoint
|
| 15 |
+
|
| 16 |
+
AWS permissions required on your IAM user/role:
|
| 17 |
+
sagemaker:CreateModel, CreateEndpointConfig, CreateEndpoint
|
| 18 |
+
sagemaker:InvokeEndpoint, DeleteEndpoint
|
| 19 |
+
s3:PutObject (SageMaker default bucket)
|
| 20 |
+
iam:PassRole (SageMaker execution role)
|
| 21 |
+
iam:PutRolePolicy (to attach CW policy to execution role)
|
| 22 |
+
cloudwatch:PutDashboard
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import argparse
|
| 26 |
+
import json
|
| 27 |
+
import os
|
| 28 |
+
import tarfile
|
| 29 |
+
import tempfile
|
| 30 |
+
|
| 31 |
+
import boto3
|
| 32 |
+
import sagemaker
|
| 33 |
+
from sagemaker.sklearn.model import SKLearnModel
|
| 34 |
+
|
| 35 |
+
# ββ Configuration β edit these ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
ENDPOINT_NAME = "email-gatekeeper-v1"
|
| 37 |
+
INSTANCE_TYPE = "ml.t2.medium" # cheapest real-time; upgrade for prod
|
| 38 |
+
SKLEARN_VERSION = "1.2-1"
|
| 39 |
+
CW_NAMESPACE = "EmailGatekeeper/Inference" # must match inference.py
|
| 40 |
+
REGION = boto3.session.Session().region_name or "us-east-1"
|
| 41 |
+
|
| 42 |
+
_MODEL_FILES = [
|
| 43 |
+
os.path.join(os.path.dirname(__file__), "inference.py"),
|
| 44 |
+
os.path.join(os.path.dirname(__file__), "classifier.py"),
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
|
| 50 |
+
def _build_model_tar(s3_client, bucket: str, prefix: str) -> str:
|
| 51 |
+
"""Bundle inference.py + classifier.py β model.tar.gz β S3, return URI."""
|
| 52 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 53 |
+
tar_path = os.path.join(tmpdir, "model.tar.gz")
|
| 54 |
+
with tarfile.open(tar_path, "w:gz") as tar:
|
| 55 |
+
for fpath in _MODEL_FILES:
|
| 56 |
+
tar.add(fpath, arcname=os.path.basename(fpath))
|
| 57 |
+
# config.json lets model_fn read runtime overrides without redeploying
|
| 58 |
+
config_path = os.path.join(tmpdir, "config.json")
|
| 59 |
+
with open(config_path, "w") as f:
|
| 60 |
+
json.dump({"version": "1.0.0", "cw_namespace": CW_NAMESPACE}, f)
|
| 61 |
+
tar.add(config_path, arcname="config.json")
|
| 62 |
+
|
| 63 |
+
s3_key = f"{prefix}/model.tar.gz"
|
| 64 |
+
s3_client.upload_file(tar_path, bucket, s3_key)
|
| 65 |
+
s3_uri = f"s3://{bucket}/{s3_key}"
|
| 66 |
+
print(f" β
model.tar.gz β {s3_uri}")
|
| 67 |
+
return s3_uri
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _ensure_cloudwatch_policy(role_name: str) -> None:
|
| 71 |
+
"""
|
| 72 |
+
Attach an inline IAM policy to the SageMaker execution role so the
|
| 73 |
+
container can call cloudwatch:PutMetricData. Idempotent.
|
| 74 |
+
Scoped to CW_NAMESPACE only β least-privilege.
|
| 75 |
+
"""
|
| 76 |
+
iam = boto3.client("iam", region_name=REGION)
|
| 77 |
+
policy = {
|
| 78 |
+
"Version": "2012-10-17",
|
| 79 |
+
"Statement": [{
|
| 80 |
+
"Sid": "EmailGatekeeperCWMetrics",
|
| 81 |
+
"Effect": "Allow",
|
| 82 |
+
"Action": ["cloudwatch:PutMetricData"],
|
| 83 |
+
"Resource": "*",
|
| 84 |
+
"Condition": {
|
| 85 |
+
"StringEquals": {"cloudwatch:namespace": CW_NAMESPACE}
|
| 86 |
+
},
|
| 87 |
+
}],
|
| 88 |
+
}
|
| 89 |
+
iam.put_role_policy(
|
| 90 |
+
RoleName=role_name,
|
| 91 |
+
PolicyName="EmailGatekeeperCloudWatchMetrics",
|
| 92 |
+
PolicyDocument=json.dumps(policy),
|
| 93 |
+
)
|
| 94 |
+
print(f" β
CloudWatch IAM policy attached β role: {role_name}")
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _create_cloudwatch_dashboard() -> None:
|
| 98 |
+
"""
|
| 99 |
+
Create (or overwrite) a 6-widget CloudWatch dashboard:
|
| 100 |
+
Row 1 β ExactMatch rate | PartialMatch rate
|
| 101 |
+
Row 2 β SecurityMiss count | WrongClassification count
|
| 102 |
+
Row 3 β Avg RewardScore | SecurityBreachFlag count
|
| 103 |
+
"""
|
| 104 |
+
cw = boto3.client("cloudwatch", region_name=REGION)
|
| 105 |
+
|
| 106 |
+
def _widget(title, metric, stat="Sum", color="#1f77b4"):
|
| 107 |
+
return {
|
| 108 |
+
"type": "metric",
|
| 109 |
+
"width": 12,
|
| 110 |
+
"height": 6,
|
| 111 |
+
"properties": {
|
| 112 |
+
"title": title,
|
| 113 |
+
"metrics": [[CW_NAMESPACE, metric,
|
| 114 |
+
"EndpointName", ENDPOINT_NAME]],
|
| 115 |
+
"stat": stat,
|
| 116 |
+
"period": 300,
|
| 117 |
+
"view": "timeSeries",
|
| 118 |
+
"color": color,
|
| 119 |
+
"region": REGION,
|
| 120 |
+
},
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
dashboard_body = {
|
| 124 |
+
"widgets": [
|
| 125 |
+
_widget("β
Exact Matches (5-min)", "ExactMatch", color="#2ca02c"),
|
| 126 |
+
_widget("πΆ Partial Matches (5-min)", "PartialMatch", color="#ff7f0e"),
|
| 127 |
+
_widget("π¨ Security Misses (5-min)", "SecurityMiss", color="#d62728"),
|
| 128 |
+
_widget("β Wrong Classifications", "WrongClassification", color="#9467bd"),
|
| 129 |
+
_widget("π Avg Reward Score", "RewardScore",
|
| 130 |
+
stat="Average", color="#8c564b"),
|
| 131 |
+
_widget("π Security Breach Flags", "SecurityBreachFlag", color="#e377c2"),
|
| 132 |
+
]
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
cw.put_dashboard(
|
| 136 |
+
DashboardName="EmailGatekeeper-Inference",
|
| 137 |
+
DashboardBody=json.dumps(dashboard_body),
|
| 138 |
+
)
|
| 139 |
+
print(" β
CloudWatch dashboard created: EmailGatekeeper-Inference")
|
| 140 |
+
print(f" https://{REGION}.console.aws.amazon.com/cloudwatch/home"
|
| 141 |
+
f"?region={REGION}#dashboards:name=EmailGatekeeper-Inference")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def _smoke_test(sm_runtime) -> None:
|
| 145 |
+
"""Run 3 labelled test cases β one per urgency level β against the live endpoint."""
|
| 146 |
+
test_cases = [
|
| 147 |
+
{
|
| 148 |
+
"name": "Security Breach",
|
| 149 |
+
"payload": {
|
| 150 |
+
"subject": "Your account has been hacked",
|
| 151 |
+
"body": "Unauthorized access detected. Reset your password immediately.",
|
| 152 |
+
# ground_truth triggers CW metric emission during smoke test
|
| 153 |
+
"ground_truth": {"urgency": 2, "routing": 1, "resolution": 2},
|
| 154 |
+
},
|
| 155 |
+
"expected_category": "Security Breach",
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"name": "Billing Dispute",
|
| 159 |
+
"payload": {
|
| 160 |
+
"subject": "Refund not received",
|
| 161 |
+
"body": "I requested a refund 3 weeks ago and have not received it.",
|
| 162 |
+
"ground_truth": {"urgency": 1, "routing": 2, "resolution": 2},
|
| 163 |
+
},
|
| 164 |
+
"expected_category": "Billing",
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"name": "Spam",
|
| 168 |
+
"payload": {
|
| 169 |
+
"subject": "You won a free prize!",
|
| 170 |
+
"body": "Claim your free offer now. Win big today!",
|
| 171 |
+
"ground_truth": {"urgency": 0, "routing": 0, "resolution": 0},
|
| 172 |
+
},
|
| 173 |
+
"expected_category": "General",
|
| 174 |
+
},
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
print("\n Running smoke tests...")
|
| 178 |
+
all_passed = True
|
| 179 |
+
|
| 180 |
+
for tc in test_cases:
|
| 181 |
+
response = sm_runtime.invoke_endpoint(
|
| 182 |
+
EndpointName=ENDPOINT_NAME,
|
| 183 |
+
ContentType="application/json",
|
| 184 |
+
Accept="application/json",
|
| 185 |
+
Body=json.dumps(tc["payload"]),
|
| 186 |
+
)
|
| 187 |
+
result = json.loads(response["Body"].read())
|
| 188 |
+
category = result["triage"]["category"]
|
| 189 |
+
match = result.get("match_result", {})
|
| 190 |
+
passed = category == tc["expected_category"]
|
| 191 |
+
icon = "β
" if passed else "β"
|
| 192 |
+
all_passed = all_passed and passed
|
| 193 |
+
|
| 194 |
+
print(f" {icon} [{tc['name']}] "
|
| 195 |
+
f"category='{category}' "
|
| 196 |
+
f"match={match.get('status','?')} "
|
| 197 |
+
f"reward={match.get('reward','?')}")
|
| 198 |
+
|
| 199 |
+
print(f"\n Smoke tests: {'ALL PASSED β
' if all_passed else 'SOME FAILED β'}")
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
# ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 203 |
+
|
| 204 |
+
def deploy(create_dashboard: bool = False) -> object:
|
| 205 |
+
sess = sagemaker.Session()
|
| 206 |
+
bucket = sess.default_bucket()
|
| 207 |
+
role = sagemaker.get_execution_role()
|
| 208 |
+
role_name = role.split("/")[-1]
|
| 209 |
+
s3_client = boto3.client("s3", region_name=REGION)
|
| 210 |
+
|
| 211 |
+
print(f"\n{'β' * 62}")
|
| 212 |
+
print(" Email Gatekeeper β SageMaker + CloudWatch Deployment")
|
| 213 |
+
print(f" Endpoint : {ENDPOINT_NAME}")
|
| 214 |
+
print(f" Instance : {INSTANCE_TYPE}")
|
| 215 |
+
print(f" Region : {REGION}")
|
| 216 |
+
print(f" CW Namespace : {CW_NAMESPACE}")
|
| 217 |
+
print(f"{'β' * 62}\n")
|
| 218 |
+
|
| 219 |
+
# 1. Attach CloudWatch IAM policy to execution role
|
| 220 |
+
print(" Attaching CloudWatch IAM policy...")
|
| 221 |
+
_ensure_cloudwatch_policy(role_name)
|
| 222 |
+
|
| 223 |
+
# 2. Package and upload model artifacts
|
| 224 |
+
print(" Packaging model artifacts...")
|
| 225 |
+
model_uri = _build_model_tar(s3_client, bucket, "email-gatekeeper/model")
|
| 226 |
+
|
| 227 |
+
# 3. Create SageMaker SKLearn model
|
| 228 |
+
# env passes ENDPOINT_NAME into the container so model_fn can read it
|
| 229 |
+
model = SKLearnModel(
|
| 230 |
+
model_data=model_uri,
|
| 231 |
+
role=role,
|
| 232 |
+
entry_point="inference.py",
|
| 233 |
+
framework_version=SKLEARN_VERSION,
|
| 234 |
+
sagemaker_session=sess,
|
| 235 |
+
name=f"{ENDPOINT_NAME}-model",
|
| 236 |
+
env={"SAGEMAKER_ENDPOINT_NAME": ENDPOINT_NAME},
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
# 4. Deploy real-time endpoint
|
| 240 |
+
print(" Deploying endpoint (~5 min)...")
|
| 241 |
+
predictor = model.deploy(
|
| 242 |
+
initial_instance_count=1,
|
| 243 |
+
instance_type=INSTANCE_TYPE,
|
| 244 |
+
endpoint_name=ENDPOINT_NAME,
|
| 245 |
+
)
|
| 246 |
+
print(f"\n β
Endpoint live: {ENDPOINT_NAME}")
|
| 247 |
+
|
| 248 |
+
# 5. Optional CloudWatch dashboard
|
| 249 |
+
if create_dashboard:
|
| 250 |
+
print(" Creating CloudWatch dashboard...")
|
| 251 |
+
_create_cloudwatch_dashboard()
|
| 252 |
+
|
| 253 |
+
return predictor
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def delete_endpoint() -> None:
|
| 257 |
+
sm = boto3.client("sagemaker", region_name=REGION)
|
| 258 |
+
print(f" Deleting endpoint: {ENDPOINT_NAME}")
|
| 259 |
+
sm.delete_endpoint(EndpointName=ENDPOINT_NAME)
|
| 260 |
+
print(" β
Endpoint deleted")
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
if __name__ == "__main__":
|
| 264 |
+
parser = argparse.ArgumentParser()
|
| 265 |
+
parser.add_argument("--delete", action="store_true", help="Delete the endpoint")
|
| 266 |
+
parser.add_argument("--test", action="store_true", help="Run smoke tests after deploy")
|
| 267 |
+
parser.add_argument("--monitor", action="store_true", help="Create CloudWatch dashboard")
|
| 268 |
+
args = parser.parse_args()
|
| 269 |
+
|
| 270 |
+
if args.delete:
|
| 271 |
+
delete_endpoint()
|
| 272 |
+
else:
|
| 273 |
+
predictor = deploy(create_dashboard=args.monitor)
|
| 274 |
+
sm_runtime = boto3.client("sagemaker-runtime", region_name=REGION)
|
| 275 |
+
if args.test or args.monitor:
|
| 276 |
+
_smoke_test(sm_runtime)
|
| 277 |
+
print(
|
| 278 |
+
f"\n Invoke example (with ground_truth for CW metrics):\n"
|
| 279 |
+
f" aws sagemaker-runtime invoke-endpoint \\\n"
|
| 280 |
+
f" --endpoint-name {ENDPOINT_NAME} \\\n"
|
| 281 |
+
f" --content-type application/json \\\n"
|
| 282 |
+
f" --body '{{\"subject\":\"hacked\",\"body\":\"unauthorized access\","
|
| 283 |
+
f"\"ground_truth\":{{\"urgency\":2,\"routing\":1,\"resolution\":2}}}}' \\\n"
|
| 284 |
+
f" response.json && cat response.json\n"
|
| 285 |
+
)
|
sagemaker/inference.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
inference.py β SageMaker Entry Point | Email Gatekeeper | Phase 1
|
| 3 |
+
========================================================================
|
| 4 |
+
|
| 5 |
+
Think of this file like a circuit board with 4 connectors.
|
| 6 |
+
SageMaker plugs into each one in order, every time a request arrives:
|
| 7 |
+
|
| 8 |
+
[1] model_fn β Power-on. Runs ONCE when the server starts.
|
| 9 |
+
[2] input_fn β Input pin. Reads the raw HTTP request bytes.
|
| 10 |
+
[3] predict_fn β Logic gate. Runs your classifier, scores the result.
|
| 11 |
+
[4] output_fn β Output pin. Sends the JSON response back.
|
| 12 |
+
|
| 13 |
+
Your classifier lives in classifier.py (same folder).
|
| 14 |
+
No GPU, no heavy ML libraries needed β pure Python logic.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
import uuid
|
| 20 |
+
import logging
|
| 21 |
+
from datetime import datetime, timezone
|
| 22 |
+
|
| 23 |
+
# classifier.py must be in the same folder as this file
|
| 24 |
+
from classifier import classify, decode, extract_features
|
| 25 |
+
|
| 26 |
+
# SageMaker streams all logger.info() calls to CloudWatch Logs automatically
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
logger.setLevel(logging.INFO)
|
| 29 |
+
|
| 30 |
+
# ββ Reward weights (must match environment.py exactly) ββββββββββββββββββββββββ
|
| 31 |
+
# These are the scores your RL agent learned against.
|
| 32 |
+
# They are used here only for logging β not for routing decisions.
|
| 33 |
+
_REWARDS = {
|
| 34 |
+
"EXACT": 1.0, # all 3 dimensions correct
|
| 35 |
+
"PARTIAL_1": 0.2, # urgency correct, 1 other dimension wrong
|
| 36 |
+
"PARTIAL_2": 0.1, # urgency correct, both other dimensions wrong
|
| 37 |
+
"SECURITY_MISS": -2.0, # security email but urgency was NOT flagged as 2
|
| 38 |
+
"WRONG": 0.0, # urgency wrong on a non-security email
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# ββ SLA table: urgency code β response time target βββββββββββββββββββββββββββ
|
| 42 |
+
_SLA = {
|
| 43 |
+
0: {"priority": "P3", "respond_within_minutes": 1440}, # General β 24 h
|
| 44 |
+
1: {"priority": "P2", "respond_within_minutes": 240}, # Billing β 4 h
|
| 45 |
+
2: {"priority": "P1", "respond_within_minutes": 15}, # Security β 15 min
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
# HELPER: Partial-match scorer
|
| 51 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
|
| 53 |
+
def _score_match(predicted: tuple, ground_truth: dict) -> dict:
|
| 54 |
+
"""
|
| 55 |
+
Compare the 3 predicted dimensions against the known correct answer.
|
| 56 |
+
|
| 57 |
+
Only called when the request includes a "ground_truth" field.
|
| 58 |
+
Useful for:
|
| 59 |
+
- Offline evaluation / batch testing
|
| 60 |
+
- Logging accuracy metrics to CloudWatch
|
| 61 |
+
|
| 62 |
+
Returns a dict with:
|
| 63 |
+
status β one of EXACT / PARTIAL_1 / PARTIAL_2 / SECURITY_MISS / WRONG
|
| 64 |
+
reward β float score matching your RL reward function
|
| 65 |
+
wrong_fields β list of dimension names that were predicted incorrectly
|
| 66 |
+
"""
|
| 67 |
+
p_urgency, p_routing, p_resolution = predicted
|
| 68 |
+
|
| 69 |
+
g_urgency = int(ground_truth["urgency"])
|
| 70 |
+
g_routing = int(ground_truth["routing"])
|
| 71 |
+
g_resolution = int(ground_truth["resolution"])
|
| 72 |
+
|
| 73 |
+
# Which of the 3 dimensions are correct?
|
| 74 |
+
correct = {
|
| 75 |
+
"urgency": p_urgency == g_urgency,
|
| 76 |
+
"routing": p_routing == g_routing,
|
| 77 |
+
"resolution": p_resolution == g_resolution,
|
| 78 |
+
}
|
| 79 |
+
wrong = [dim for dim, ok in correct.items() if not ok]
|
| 80 |
+
|
| 81 |
+
# ββ Decision tree (same priority order as environment.py) βββββββββββββββββ
|
| 82 |
+
# Rule 1: Security email that was NOT flagged as security β worst outcome
|
| 83 |
+
if g_urgency == 2 and p_urgency != 2:
|
| 84 |
+
status = "SECURITY_MISS"
|
| 85 |
+
|
| 86 |
+
# Rule 2: All 3 correct β perfect
|
| 87 |
+
elif not wrong:
|
| 88 |
+
status = "EXACT"
|
| 89 |
+
|
| 90 |
+
# Rule 3: Urgency correct but 1 other dimension wrong β partial credit
|
| 91 |
+
elif correct["urgency"] and len(wrong) == 1:
|
| 92 |
+
status = "PARTIAL_1"
|
| 93 |
+
|
| 94 |
+
# Rule 4: Urgency correct but both other dimensions wrong β small credit
|
| 95 |
+
elif correct["urgency"] and len(wrong) == 2:
|
| 96 |
+
status = "PARTIAL_2"
|
| 97 |
+
|
| 98 |
+
# Rule 5: Urgency itself wrong β no credit
|
| 99 |
+
else:
|
| 100 |
+
status = "WRONG"
|
| 101 |
+
|
| 102 |
+
logger.info(
|
| 103 |
+
"MATCH_EVAL | status=%s reward=%.1f wrong_fields=%s",
|
| 104 |
+
status, _REWARDS[status], wrong
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
return {
|
| 108 |
+
"status": status,
|
| 109 |
+
"reward": _REWARDS[status],
|
| 110 |
+
"correct_dims": correct,
|
| 111 |
+
"wrong_fields": wrong,
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 116 |
+
# [1] model_fn β Power-on. Runs ONCE at container start.
|
| 117 |
+
# βββββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
+
|
| 119 |
+
def model_fn(model_dir: str) -> dict:
|
| 120 |
+
"""
|
| 121 |
+
SageMaker calls this once when the container boots up.
|
| 122 |
+
model_dir is the folder where SageMaker unpacks your model.tar.gz.
|
| 123 |
+
|
| 124 |
+
For a rule-based classifier there are no weights to load.
|
| 125 |
+
We just return a config dict that predict_fn will use.
|
| 126 |
+
"""
|
| 127 |
+
logger.info("model_fn | model_dir=%s", model_dir)
|
| 128 |
+
|
| 129 |
+
# Optional: load a config.json from your model.tar.gz to override defaults
|
| 130 |
+
# at runtime without redeploying (e.g. change SLA targets).
|
| 131 |
+
config_path = os.path.join(model_dir, "config.json")
|
| 132 |
+
config = {}
|
| 133 |
+
if os.path.exists(config_path):
|
| 134 |
+
with open(config_path) as f:
|
| 135 |
+
config = json.load(f)
|
| 136 |
+
logger.info("Config loaded: %s", config)
|
| 137 |
+
|
| 138 |
+
model = {
|
| 139 |
+
"version": config.get("version", "1.0.0"),
|
| 140 |
+
"sla": config.get("sla", _SLA),
|
| 141 |
+
# SageMaker injects the endpoint name as an env var
|
| 142 |
+
"endpoint_name": os.environ.get("SAGEMAKER_ENDPOINT_NAME", "local"),
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
logger.info("Model ready | version=%s endpoint=%s",
|
| 146 |
+
model["version"], model["endpoint_name"])
|
| 147 |
+
return model
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
+
# [2] input_fn β Input pin. Deserialise the raw HTTP request.
|
| 152 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
|
| 154 |
+
def input_fn(request_body: str | bytes, content_type: str) -> dict:
|
| 155 |
+
"""
|
| 156 |
+
Converts the raw bytes from the HTTP POST body into a Python dict.
|
| 157 |
+
|
| 158 |
+
Accepted request formats:
|
| 159 |
+
|
| 160 |
+
Format A β JSON with raw email text (most common):
|
| 161 |
+
{
|
| 162 |
+
"subject": "Your account was hacked",
|
| 163 |
+
"body": "We detected unauthorized access..."
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
Format B β JSON with pre-extracted features (faster, skips NLP):
|
| 167 |
+
{
|
| 168 |
+
"keywords": ["hacked", "password"],
|
| 169 |
+
"sentiment": "negative",
|
| 170 |
+
"context": "security"
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
Format C β Add ground_truth to either format above for accuracy scoring:
|
| 174 |
+
{
|
| 175 |
+
"subject": "...",
|
| 176 |
+
"body": "...",
|
| 177 |
+
"ground_truth": {"urgency": 2, "routing": 1, "resolution": 2}
|
| 178 |
+
}
|
| 179 |
+
"""
|
| 180 |
+
logger.info("input_fn | content_type=%s", content_type)
|
| 181 |
+
|
| 182 |
+
ct = content_type.lower().split(";")[0].strip()
|
| 183 |
+
|
| 184 |
+
if ct == "application/json":
|
| 185 |
+
if isinstance(request_body, bytes):
|
| 186 |
+
request_body = request_body.decode("utf-8")
|
| 187 |
+
payload = json.loads(request_body)
|
| 188 |
+
|
| 189 |
+
elif ct == "text/plain":
|
| 190 |
+
# Accept raw email text directly β treat entire body as email body
|
| 191 |
+
text = request_body.decode("utf-8") if isinstance(request_body, bytes) else request_body
|
| 192 |
+
payload = {"subject": "", "body": text}
|
| 193 |
+
|
| 194 |
+
else:
|
| 195 |
+
raise ValueError(
|
| 196 |
+
f"Unsupported content type: '{content_type}'. "
|
| 197 |
+
"Send 'application/json' or 'text/plain'."
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
# Must have at least something to classify
|
| 201 |
+
if not any([payload.get("subject"), payload.get("body"),
|
| 202 |
+
payload.get("keywords"), payload.get("context")]):
|
| 203 |
+
raise ValueError(
|
| 204 |
+
"Request must include 'subject', 'body', 'keywords', or 'context'."
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
return payload
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
+
# [3] predict_fn β Logic gate. Run the classifier.
|
| 212 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 213 |
+
|
| 214 |
+
def predict_fn(data: dict, model: dict) -> dict:
|
| 215 |
+
"""
|
| 216 |
+
The main classification step. Runs on every request.
|
| 217 |
+
|
| 218 |
+
Step 1: Extract features from raw text (or use pre-supplied features)
|
| 219 |
+
Step 2: Classify β 3 integer codes (urgency, routing, resolution)
|
| 220 |
+
Step 3: Decode codes β human labels ("Security Breach", "Escalate", ...)
|
| 221 |
+
Step 4: Score against ground_truth (only if ground_truth is in request)
|
| 222 |
+
Step 5: Return everything as a dict (output_fn will format it as JSON)
|
| 223 |
+
"""
|
| 224 |
+
logger.info("predict_fn | keys=%s", list(data.keys()))
|
| 225 |
+
|
| 226 |
+
# ββ Step 1: Feature extraction ββββββββββββββββββββββββββββββββββββββββββββ
|
| 227 |
+
# Fast path: caller already extracted features
|
| 228 |
+
if data.get("context"):
|
| 229 |
+
features = {
|
| 230 |
+
"keywords": data.get("keywords", []),
|
| 231 |
+
"sentiment": data.get("sentiment", "neutral"),
|
| 232 |
+
"context": data["context"],
|
| 233 |
+
}
|
| 234 |
+
# NLP path: extract from raw subject + body text
|
| 235 |
+
else:
|
| 236 |
+
features = extract_features(
|
| 237 |
+
subject=data.get("subject", ""),
|
| 238 |
+
body=data.get("body", ""),
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
# ββ Step 2: Classify β 3 codes ββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
+
urgency, routing, resolution = classify(features)
|
| 243 |
+
|
| 244 |
+
# ββ Step 3: Decode to human-readable labels βββββββββββββββββββββββββββββββ
|
| 245 |
+
labels = decode(urgency, routing, resolution)
|
| 246 |
+
|
| 247 |
+
logger.info(
|
| 248 |
+
"CLASSIFIED | category=%s dept=%s action=%s | context=%s keywords=%s",
|
| 249 |
+
labels["urgency"], labels["routing"], labels["resolution"],
|
| 250 |
+
features["context"], features["keywords"],
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
# ββ Step 4: Score against ground_truth (optional) βββββββββββββββββββββββββ
|
| 254 |
+
ground_truth = data.get("ground_truth")
|
| 255 |
+
if ground_truth:
|
| 256 |
+
match = _score_match((urgency, routing, resolution), ground_truth)
|
| 257 |
+
else:
|
| 258 |
+
# No ground_truth supplied β this is a live production request
|
| 259 |
+
match = {"status": "UNVERIFIED", "reward": None,
|
| 260 |
+
"correct_dims": {}, "wrong_fields": []}
|
| 261 |
+
|
| 262 |
+
# ββ Step 5: Return raw prediction dict ββββββββββββββββββββββββββββββββββββ
|
| 263 |
+
return {
|
| 264 |
+
"urgency_code": urgency,
|
| 265 |
+
"routing_code": routing,
|
| 266 |
+
"resolution_code": resolution,
|
| 267 |
+
"labels": labels,
|
| 268 |
+
"features": features,
|
| 269 |
+
"match": match,
|
| 270 |
+
"sla": model["sla"][urgency],
|
| 271 |
+
"endpoint": model["endpoint_name"],
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 276 |
+
# [4] output_fn β Output pin. Format and send the response.
|
| 277 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 278 |
+
|
| 279 |
+
def output_fn(prediction: dict, accept: str) -> tuple[str, str]:
|
| 280 |
+
"""
|
| 281 |
+
Converts the prediction dict into the final HTTP response body.
|
| 282 |
+
|
| 283 |
+
Default response format: application/json
|
| 284 |
+
Optional CSV format: text/csv (useful for batch jobs writing to S3)
|
| 285 |
+
|
| 286 |
+
JSON response shape:
|
| 287 |
+
{
|
| 288 |
+
"request_id": "uuid",
|
| 289 |
+
"timestamp": "2024-01-15T10:30:00Z",
|
| 290 |
+
|
| 291 |
+
"triage": {
|
| 292 |
+
"category": "Security Breach", β urgency label
|
| 293 |
+
"department": "Tech Support", β routing label
|
| 294 |
+
"action": "Escalate" β resolution label
|
| 295 |
+
},
|
| 296 |
+
|
| 297 |
+
"codes": {
|
| 298 |
+
"urgency": 2, "routing": 1, "resolution": 2
|
| 299 |
+
},
|
| 300 |
+
|
| 301 |
+
"match_result": {
|
| 302 |
+
"status": "EXACT", β or PARTIAL_1 / PARTIAL_2 / SECURITY_MISS / WRONG
|
| 303 |
+
"reward": 1.0, β RL reward score
|
| 304 |
+
"wrong_fields": [] β which dimensions were wrong
|
| 305 |
+
},
|
| 306 |
+
|
| 307 |
+
"sla": {
|
| 308 |
+
"priority": "P1",
|
| 309 |
+
"respond_within_minutes": 15
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
"""
|
| 313 |
+
accept_type = (accept or "application/json").lower().split(";")[0].strip()
|
| 314 |
+
|
| 315 |
+
response = {
|
| 316 |
+
"request_id": str(uuid.uuid4()),
|
| 317 |
+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
| 318 |
+
"triage": {
|
| 319 |
+
"category": prediction["labels"]["urgency"],
|
| 320 |
+
"department": prediction["labels"]["routing"],
|
| 321 |
+
"action": prediction["labels"]["resolution"],
|
| 322 |
+
},
|
| 323 |
+
"codes": {
|
| 324 |
+
"urgency": prediction["urgency_code"],
|
| 325 |
+
"routing": prediction["routing_code"],
|
| 326 |
+
"resolution": prediction["resolution_code"],
|
| 327 |
+
},
|
| 328 |
+
"features": {
|
| 329 |
+
"keywords": prediction["features"]["keywords"],
|
| 330 |
+
"sentiment": prediction["features"]["sentiment"],
|
| 331 |
+
"context": prediction["features"]["context"],
|
| 332 |
+
},
|
| 333 |
+
"match_result": {
|
| 334 |
+
"status": prediction["match"]["status"],
|
| 335 |
+
"reward": prediction["match"]["reward"],
|
| 336 |
+
"wrong_fields": prediction["match"]["wrong_fields"],
|
| 337 |
+
},
|
| 338 |
+
"sla": prediction["sla"],
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
# ββ CSV output (for SageMaker Batch Transform jobs) βββββββββββββββββββββββ
|
| 342 |
+
if accept_type == "text/csv":
|
| 343 |
+
row = ",".join([
|
| 344 |
+
response["request_id"],
|
| 345 |
+
response["triage"]["category"],
|
| 346 |
+
response["triage"]["department"],
|
| 347 |
+
response["triage"]["action"],
|
| 348 |
+
str(response["codes"]["urgency"]),
|
| 349 |
+
str(response["codes"]["routing"]),
|
| 350 |
+
str(response["codes"]["resolution"]),
|
| 351 |
+
str(response["match_result"]["status"]),
|
| 352 |
+
str(response["match_result"]["reward"] or ""),
|
| 353 |
+
response["sla"]["priority"],
|
| 354 |
+
])
|
| 355 |
+
return row, "text/csv"
|
| 356 |
+
|
| 357 |
+
return json.dumps(response, ensure_ascii=False), "application/json"
|
sagemaker/model.tar.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1b5f5f36441ff75a3014c21ff9e85dcbada8f33fc888d8458459ae3b5cc1fdc
|
| 3 |
+
size 6319
|
sagemaker/package.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
package.py β Build model.tar.gz for SageMaker
|
| 3 |
+
================================================
|
| 4 |
+
Run this script once before deploying.
|
| 5 |
+
It bundles your code/ folder into the exact archive structure SageMaker expects.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
cd "RL Envir"
|
| 9 |
+
python sagemaker/package.py
|
| 10 |
+
|
| 11 |
+
Output:
|
| 12 |
+
sagemaker/model.tar.gz β upload this to S3, then point SageMaker at it
|
| 13 |
+
|
| 14 |
+
What goes inside model.tar.gz:
|
| 15 |
+
code/
|
| 16 |
+
βββ inference.py β SageMaker entry point (the 4 handlers)
|
| 17 |
+
βββ classifier.py β your rule-based classifier logic
|
| 18 |
+
|
| 19 |
+
SageMaker unpacks the archive and looks for code/inference.py automatically
|
| 20 |
+
when you use the SKLearn or generic Python containers.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import os
|
| 24 |
+
import tarfile
|
| 25 |
+
|
| 26 |
+
# ββ Paths β all relative to this script's location βββββββββββββββββββββββββββ
|
| 27 |
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
| 28 |
+
OUTPUT_TAR = os.path.join(HERE, "model.tar.gz")
|
| 29 |
+
|
| 30 |
+
# Files to include β add more here if you create extra helper modules
|
| 31 |
+
FILES_TO_PACK = {
|
| 32 |
+
"code/inference.py": os.path.join(HERE, "inference.py"),
|
| 33 |
+
"code/classifier.py": os.path.join(HERE, "classifier.py"),
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def build():
|
| 38 |
+
print("\n Building model.tar.gz ...")
|
| 39 |
+
print(f" Output β {OUTPUT_TAR}\n")
|
| 40 |
+
|
| 41 |
+
# Verify all source files exist before starting
|
| 42 |
+
missing = [src for src in FILES_TO_PACK.values() if not os.path.exists(src)]
|
| 43 |
+
if missing:
|
| 44 |
+
print(" β Missing files:")
|
| 45 |
+
for f in missing:
|
| 46 |
+
print(f" {f}")
|
| 47 |
+
raise FileNotFoundError("Fix missing files then re-run.")
|
| 48 |
+
|
| 49 |
+
# Build the archive
|
| 50 |
+
with tarfile.open(OUTPUT_TAR, "w:gz") as tar:
|
| 51 |
+
for archive_name, source_path in FILES_TO_PACK.items():
|
| 52 |
+
tar.add(source_path, arcname=archive_name)
|
| 53 |
+
size_kb = os.path.getsize(source_path) / 1024
|
| 54 |
+
print(f" + {archive_name:<30} ({size_kb:.1f} KB)")
|
| 55 |
+
|
| 56 |
+
# Verify and report
|
| 57 |
+
tar_size_kb = os.path.getsize(OUTPUT_TAR) / 1024
|
| 58 |
+
print(f"\n β
Done! model.tar.gz = {tar_size_kb:.1f} KB")
|
| 59 |
+
|
| 60 |
+
# Show contents as confirmation
|
| 61 |
+
print("\n Contents of model.tar.gz:")
|
| 62 |
+
with tarfile.open(OUTPUT_TAR, "r:gz") as tar:
|
| 63 |
+
for member in tar.getmembers():
|
| 64 |
+
print(f" {member.name:<35} {member.size / 1024:.1f} KB")
|
| 65 |
+
|
| 66 |
+
print(f"\n Next step β upload to S3:")
|
| 67 |
+
print(f" aws s3 cp {OUTPUT_TAR} s3://YOUR-BUCKET/email-gatekeeper/model.tar.gz\n")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
build()
|
sagemaker/requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SageMaker inference container dependencies
|
| 2 |
+
# The SKLearn container already includes: boto3, numpy, scikit-learn
|
| 3 |
+
# Only add packages your inference.py actually imports beyond stdlib
|
| 4 |
+
|
| 5 |
+
# No additional packages needed for the rule-based classifier.
|
| 6 |
+
# Uncomment below if you switch to an ML model later:
|
| 7 |
+
|
| 8 |
+
# torch==2.1.0
|
| 9 |
+
# transformers==4.35.0
|
| 10 |
+
# huggingface_hub==0.19.0
|
sagemaker/upload_to_hf.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
upload_to_hf.py β Upload model.tar.gz to Hugging Face
|
| 3 |
+
=========================================================
|
| 4 |
+
Handles large files (200MB+) using the huggingface_hub library,
|
| 5 |
+
which automatically uses Git LFS for files over 10MB.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python sagemaker/upload_to_hf.py
|
| 9 |
+
|
| 10 |
+
Prerequisites:
|
| 11 |
+
pip install huggingface_hub
|
| 12 |
+
huggingface-cli login β run once, saves token to ~/.cache/huggingface
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
from huggingface_hub import HfApi, login
|
| 17 |
+
|
| 18 |
+
# ββ Configuration β edit these 3 lines βββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
HF_REPO_ID = "YOUR-USERNAME/YOUR-REPO-NAME" # e.g. "zerogravity/email-gatekeeper"
|
| 20 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # set env var OR paste token below
|
| 21 |
+
# HF_TOKEN = "hf_xxxxxxxxxxxxxxxxxxxx" # β dev only, never commit this
|
| 22 |
+
|
| 23 |
+
LOCAL_FILE = os.path.join(
|
| 24 |
+
os.path.dirname(os.path.abspath(__file__)),
|
| 25 |
+
"model.tar.gz"
|
| 26 |
+
)
|
| 27 |
+
REPO_FILE = "model.tar.gz" # path inside the HF repo
|
| 28 |
+
REPO_TYPE = "model" # "model" | "dataset" | "space"
|
| 29 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def upload():
|
| 33 |
+
# ββ Validate local file exists ββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
if not os.path.exists(LOCAL_FILE):
|
| 35 |
+
raise FileNotFoundError(
|
| 36 |
+
f"model.tar.gz not found at:\n {LOCAL_FILE}\n"
|
| 37 |
+
"Run python sagemaker/package.py first to build it."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
size_mb = os.path.getsize(LOCAL_FILE) / (1024 * 1024)
|
| 41 |
+
print(f"\n File : {LOCAL_FILE}")
|
| 42 |
+
print(f" Size : {size_mb:.1f} MB")
|
| 43 |
+
print(f" Repo : {HF_REPO_ID}")
|
| 44 |
+
print(f" Dest : {REPO_FILE}\n")
|
| 45 |
+
|
| 46 |
+
# ββ Authenticate ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
if HF_TOKEN:
|
| 48 |
+
login(token=HF_TOKEN, add_to_git_credential=False)
|
| 49 |
+
else:
|
| 50 |
+
# Falls back to cached token from huggingface-cli login
|
| 51 |
+
print(" No HF_TOKEN env var found β using cached login credentials.")
|
| 52 |
+
|
| 53 |
+
# ββ Upload ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
api = HfApi()
|
| 55 |
+
|
| 56 |
+
print(" Uploading... (large files use Git LFS automatically)\n")
|
| 57 |
+
|
| 58 |
+
url = api.upload_file(
|
| 59 |
+
path_or_fileobj=LOCAL_FILE,
|
| 60 |
+
path_in_repo=REPO_FILE,
|
| 61 |
+
repo_id=HF_REPO_ID,
|
| 62 |
+
repo_type=REPO_TYPE,
|
| 63 |
+
commit_message="Upload model.tar.gz β Email Gatekeeper RL Agent",
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
print(f"\n β
Upload complete!")
|
| 67 |
+
print(f" View : https://huggingface.co/{HF_REPO_ID}")
|
| 68 |
+
print(f" File : {url}\n")
|
| 69 |
+
|
| 70 |
+
# ββ Print the download URL for use in deploy.py βββββββββββββββββββββββββββ
|
| 71 |
+
download_url = (
|
| 72 |
+
f"https://huggingface.co/{HF_REPO_ID}/resolve/main/{REPO_FILE}"
|
| 73 |
+
)
|
| 74 |
+
print(f" Direct download URL (use in deploy.py):")
|
| 75 |
+
print(f" {download_url}\n")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
upload()
|
test_cases_advanced.json
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"_comment": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 4 |
+
"_section": "MEDIUM β Passive-Aggressive Legal Threats (polite tone, legal intent)",
|
| 5 |
+
"_comment_end": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"id": "TC-M-01",
|
| 9 |
+
"difficulty": "medium",
|
| 10 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 11 |
+
"description": "Polite refund ultimatum with implied legal action",
|
| 12 |
+
"raw_email": {
|
| 13 |
+
"subject": "Following Up on My Refund Request β Third Attempt",
|
| 14 |
+
"body": "Dear Support Team, I have now contacted you three times regarding my refund of $340. I have been patient, but I want to make it clear that I am aware of my consumer rights and have already spoken with a legal advisor about my options. I would strongly prefer to resolve this amicably before I am forced to take further steps. Please process my refund within 48 hours. Regards, A. Chen"
|
| 15 |
+
},
|
| 16 |
+
"keywords": ["refund", "legal", "angry", "threat"],
|
| 17 |
+
"sentiment": "negative",
|
| 18 |
+
"context": "legal",
|
| 19 |
+
"deception_tactic": "Polite tone masks explicit legal advisor mention and ultimatum deadline",
|
| 20 |
+
"classifier_trap": "Reads like a billing complaint β 'refund' + 'legal advisor' pushes it to legal escalation",
|
| 21 |
+
"correct_actions": [2, 2, 2],
|
| 22 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 23 |
+
"reward_note": "urgency=2 because legal advisor + threat combo; routing=Legal; resolution=Escalate"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"id": "TC-M-02",
|
| 27 |
+
"difficulty": "medium",
|
| 28 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 29 |
+
"description": "Overdue invoice with attorney CC warning",
|
| 30 |
+
"raw_email": {
|
| 31 |
+
"subject": "Invoice #4821 β Final Notice Before Referral",
|
| 32 |
+
"body": "Hi, This is a courtesy reminder that Invoice #4821 for $1,200 remains overdue by 45 days. We have CC'd our attorney on this correspondence as a precautionary measure. We remain open to resolving this without formal proceedings, but require payment or a written payment plan by Friday. Thank you."
|
| 33 |
+
},
|
| 34 |
+
"keywords": ["invoice", "overdue", "attorney", "legal", "payment", "threat"],
|
| 35 |
+
"sentiment": "negative",
|
| 36 |
+
"context": "legal",
|
| 37 |
+
"deception_tactic": "Framed as a 'courtesy reminder' β attorney CC buried mid-sentence",
|
| 38 |
+
"classifier_trap": "Surface keywords look like billing; 'attorney' CC elevates to legal",
|
| 39 |
+
"correct_actions": [2, 2, 2],
|
| 40 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 41 |
+
"reward_note": "attorney keyword present β legal context wins over billing"
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"id": "TC-M-03",
|
| 45 |
+
"difficulty": "medium",
|
| 46 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 47 |
+
"description": "Disappointed customer implying regulatory complaint",
|
| 48 |
+
"raw_email": {
|
| 49 |
+
"subject": "Extremely Disappointed β Will Be Escalating This Matter",
|
| 50 |
+
"body": "To Whom It May Concern, I am writing to express my deep dissatisfaction with how my case has been handled. I have documented every interaction and intend to escalate this to the relevant regulatory authority if I do not receive a satisfactory response within 5 business days. I trust you understand the implications. Sincerely, M. Okafor"
|
| 51 |
+
},
|
| 52 |
+
"keywords": ["angry", "threat", "legal"],
|
| 53 |
+
"sentiment": "negative",
|
| 54 |
+
"context": "legal",
|
| 55 |
+
"deception_tactic": "No explicit 'lawsuit' word β uses 'regulatory authority' and 'implications' as proxies",
|
| 56 |
+
"classifier_trap": "Vague enough to be misclassified as general complaint; 'escalate' + 'legal' intent is clear",
|
| 57 |
+
"correct_actions": [2, 2, 2],
|
| 58 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 59 |
+
"reward_note": "threat + legal context β (2,2,2)"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"id": "TC-M-04",
|
| 63 |
+
"difficulty": "medium",
|
| 64 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 65 |
+
"description": "SLA breach complaint with legal remedy language",
|
| 66 |
+
"raw_email": {
|
| 67 |
+
"subject": "SLA Violation β Formal Notice of Breach",
|
| 68 |
+
"body": "Dear Account Manager, As per our signed service agreement, your platform guarantees 99.9% uptime. Your service has been unavailable for 14 hours this month, constituting a material breach of contract. We are formally notifying you of this breach and reserving all legal remedies available to us under the agreement. Please respond within 2 business days. β Legal Dept, Nexus Corp"
|
| 69 |
+
},
|
| 70 |
+
"keywords": ["breach", "legal", "threat", "angry"],
|
| 71 |
+
"sentiment": "negative",
|
| 72 |
+
"context": "legal",
|
| 73 |
+
"deception_tactic": "Uses contract/SLA language β sounds like a tech complaint but is a formal legal notice",
|
| 74 |
+
"classifier_trap": "Contains 'breach' which could trigger security; context is contractual/legal",
|
| 75 |
+
"correct_actions": [2, 2, 2],
|
| 76 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 77 |
+
"reward_note": "breach + legal dept sender + formal notice β (2,2,2)"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": "TC-M-05",
|
| 81 |
+
"difficulty": "medium",
|
| 82 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 83 |
+
"description": "Billing dispute with small claims court mention",
|
| 84 |
+
"raw_email": {
|
| 85 |
+
"subject": "Re: Disputed Charge β My Next Step",
|
| 86 |
+
"body": "Hello, I have disputed the $89 charge on my account twice now with no resolution. I want to be transparent: I have already filed the paperwork for small claims court as a precaution. I am still willing to resolve this directly if you can process my refund this week. Please treat this as urgent. β T. Vasquez"
|
| 87 |
+
},
|
| 88 |
+
"keywords": ["refund", "payment", "angry", "lawsuit", "urgent"],
|
| 89 |
+
"sentiment": "negative",
|
| 90 |
+
"context": "legal",
|
| 91 |
+
"deception_tactic": "Framed as cooperative ('still willing to resolve') while small claims filing already done",
|
| 92 |
+
"classifier_trap": "Refund + payment looks like billing dispute; 'lawsuit' (small claims) escalates to legal",
|
| 93 |
+
"correct_actions": [2, 2, 2],
|
| 94 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 95 |
+
"reward_note": "lawsuit keyword present β legal routing regardless of billing surface"
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"id": "TC-M-06",
|
| 99 |
+
"difficulty": "medium",
|
| 100 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 101 |
+
"description": "GDPR deletion request with legal consequence warning",
|
| 102 |
+
"raw_email": {
|
| 103 |
+
"subject": "Data Deletion Request β GDPR Article 17",
|
| 104 |
+
"body": "Dear Data Controller, I am formally requesting erasure of all my personal data under GDPR Article 17 (Right to Erasure). Failure to comply within 30 days constitutes a violation subject to regulatory fines of up to 4% of annual turnover. I have retained legal counsel and will file a complaint with the supervisory authority if this is not actioned. Please confirm receipt."
|
| 105 |
+
},
|
| 106 |
+
"keywords": ["legal", "breach", "threat", "angry"],
|
| 107 |
+
"sentiment": "negative",
|
| 108 |
+
"context": "legal",
|
| 109 |
+
"deception_tactic": "Sounds like a routine data request β legal counsel + regulatory fine threat is the real signal",
|
| 110 |
+
"classifier_trap": "Could be misrouted to tech support as a 'data/account request'; it is a legal compliance matter",
|
| 111 |
+
"correct_actions": [2, 2, 2],
|
| 112 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 113 |
+
"reward_note": "legal counsel + regulatory threat β (2,2,2)"
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"id": "TC-M-07",
|
| 117 |
+
"difficulty": "medium",
|
| 118 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 119 |
+
"description": "Subscription cancellation with chargeback threat",
|
| 120 |
+
"raw_email": {
|
| 121 |
+
"subject": "Cancellation and Chargeback Notice",
|
| 122 |
+
"body": "Hi, I cancelled my subscription 3 weeks ago but was charged again this month. I have already contacted my bank to initiate a chargeback and have documented all correspondence. If this is not refunded within 24 hours, I will also be filing a complaint with the Consumer Financial Protection Bureau. I expect a prompt response."
|
| 123 |
+
},
|
| 124 |
+
"keywords": ["payment", "refund", "angry", "threat", "legal"],
|
| 125 |
+
"sentiment": "negative",
|
| 126 |
+
"context": "legal",
|
| 127 |
+
"deception_tactic": "Chargeback + CFPB complaint = regulatory/legal action disguised as billing complaint",
|
| 128 |
+
"classifier_trap": "Payment + refund looks like billing; CFPB filing is a legal/regulatory escalation",
|
| 129 |
+
"correct_actions": [2, 2, 2],
|
| 130 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 131 |
+
"reward_note": "regulatory body complaint + threat β legal routing"
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"id": "TC-M-08",
|
| 135 |
+
"difficulty": "medium",
|
| 136 |
+
"category": "Passive-Aggressive Legal Threat",
|
| 137 |
+
"description": "Vendor threatening IP infringement claim",
|
| 138 |
+
"raw_email": {
|
| 139 |
+
"subject": "Unauthorized Use of Proprietary Content",
|
| 140 |
+
"body": "Dear Sir/Madam, It has come to our attention that content owned by our company is being used on your platform without a valid license. We have documented evidence of this unauthorized use. Our legal team has been instructed to pursue all available remedies, including injunctive relief and damages, unless we receive written confirmation of removal within 72 hours. β IP Counsel, Meridian Media"
|
| 141 |
+
},
|
| 142 |
+
"keywords": ["unauthorized", "legal", "attorney", "threat"],
|
| 143 |
+
"sentiment": "negative",
|
| 144 |
+
"context": "legal",
|
| 145 |
+
"deception_tactic": "Formal legal letter tone β 'injunctive relief' and 'damages' are legal action signals",
|
| 146 |
+
"classifier_trap": "'Unauthorized' could trigger security breach path; this is IP legal, not account security",
|
| 147 |
+
"correct_actions": [2, 2, 2],
|
| 148 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 149 |
+
"reward_note": "attorney + legal team + unauthorized use β (2,2,2)"
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"_comment": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 153 |
+
"_section": "HARD β Subtle Phishing Attempts (disguised as legitimate IT/billing/HR emails)",
|
| 154 |
+
"_comment_end": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": "TC-H-01",
|
| 158 |
+
"difficulty": "hard",
|
| 159 |
+
"category": "Subtle Phishing",
|
| 160 |
+
"description": "IT password reset disguised as routine security audit",
|
| 161 |
+
"raw_email": {
|
| 162 |
+
"subject": "Mandatory Annual Security Audit β Action Required",
|
| 163 |
+
"body": "Dear Employee, As part of our annual security compliance audit, all staff are required to verify their account credentials by clicking the link below and re-entering their password. This process is mandatory and must be completed by EOD Friday. Failure to comply may result in temporary account suspension. IT Security Team."
|
| 164 |
+
},
|
| 165 |
+
"keywords": ["password", "unauthorized", "critical", "urgent", "threat"],
|
| 166 |
+
"sentiment": "negative",
|
| 167 |
+
"context": "security",
|
| 168 |
+
"deception_tactic": "Impersonates internal IT; 'mandatory audit' creates urgency; credential harvesting via fake link",
|
| 169 |
+
"classifier_trap": "Reads like a legitimate IT notice β urgency + password request is the phishing signal",
|
| 170 |
+
"correct_actions": [2, 1, 2],
|
| 171 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 172 |
+
"reward_note": "password + unauthorized access attempt β (2,1,2); no legal keywords so Tech Support not Legal"
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"id": "TC-H-02",
|
| 176 |
+
"difficulty": "hard",
|
| 177 |
+
"category": "Subtle Phishing",
|
| 178 |
+
"description": "Fake invoice payment portal redirect",
|
| 179 |
+
"raw_email": {
|
| 180 |
+
"subject": "Your Invoice #9923 is Ready β Secure Payment Required",
|
| 181 |
+
"body": "Hello, Your invoice for services rendered is now available. To avoid a late payment fee, please log in to our secure payment portal using your existing credentials to review and pay. Note: your current session has expired and you will need to re-enter your password to access the portal. Click here: http://pay-invoices-secure.net/login"
|
| 182 |
+
},
|
| 183 |
+
"keywords": ["invoice", "payment", "password", "unauthorized", "urgent"],
|
| 184 |
+
"sentiment": "neutral",
|
| 185 |
+
"context": "security",
|
| 186 |
+
"deception_tactic": "Billing-themed phish β 'session expired' forces credential re-entry on a fake domain",
|
| 187 |
+
"classifier_trap": "Invoice + payment looks like billing; password re-entry request on external URL is the phishing signal",
|
| 188 |
+
"correct_actions": [2, 1, 2],
|
| 189 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 190 |
+
"reward_note": "password harvesting attempt embedded in billing context β security wins"
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"id": "TC-H-03",
|
| 194 |
+
"difficulty": "hard",
|
| 195 |
+
"category": "Subtle Phishing",
|
| 196 |
+
"description": "HR benefits enrollment with credential capture",
|
| 197 |
+
"raw_email": {
|
| 198 |
+
"subject": "Open Enrollment Closes Friday β Update Your Benefits Now",
|
| 199 |
+
"body": "Hi Team, The annual benefits open enrollment window closes this Friday. To update your selections, please log in to the HR portal at the link below. You will be asked to verify your identity by entering your employee ID and current password. This is a one-time verification step. Don't miss the deadline β benefits cannot be changed until next year."
|
| 200 |
+
},
|
| 201 |
+
"keywords": ["password", "urgent", "critical"],
|
| 202 |
+
"sentiment": "neutral",
|
| 203 |
+
"context": "security",
|
| 204 |
+
"deception_tactic": "HR impersonation with artificial deadline; password verification step is credential theft",
|
| 205 |
+
"classifier_trap": "Completely routine-sounding HR email; password verification request is the only red flag",
|
| 206 |
+
"correct_actions": [2, 1, 2],
|
| 207 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 208 |
+
"reward_note": "password request + impersonation pattern β security breach classification"
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"id": "TC-H-04",
|
| 212 |
+
"difficulty": "hard",
|
| 213 |
+
"category": "Subtle Phishing",
|
| 214 |
+
"description": "Fake account suspension notice with login link",
|
| 215 |
+
"raw_email": {
|
| 216 |
+
"subject": "Your Account Has Been Temporarily Suspended",
|
| 217 |
+
"body": "We detected unusual activity on your account and have temporarily suspended access as a precaution. To restore your account, please verify your identity by logging in and confirming your password within 24 hours. If you do not take action, your account will be permanently closed. β Account Security Team"
|
| 218 |
+
},
|
| 219 |
+
"keywords": ["unauthorized", "password", "breach", "urgent", "threat"],
|
| 220 |
+
"sentiment": "negative",
|
| 221 |
+
"context": "security",
|
| 222 |
+
"deception_tactic": "Fear-based urgency β 'permanent closure' threat forces hasty credential submission",
|
| 223 |
+
"classifier_trap": "Looks like a legitimate security alert from the platform itself; it is an inbound phishing attempt",
|
| 224 |
+
"correct_actions": [2, 1, 2],
|
| 225 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 226 |
+
"reward_note": "unauthorized + password + breach β (2,1,2); account-level attack pattern"
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"id": "TC-H-05",
|
| 230 |
+
"difficulty": "hard",
|
| 231 |
+
"category": "Subtle Phishing",
|
| 232 |
+
"description": "Vendor onboarding form requesting system credentials",
|
| 233 |
+
"raw_email": {
|
| 234 |
+
"subject": "New Vendor Onboarding β Integration Credentials Needed",
|
| 235 |
+
"body": "Dear IT Team, We are completing the integration setup for your new vendor account. To finalize the API connection, please reply with your system admin username and current password so we can configure access on our end. This is a standard step in our onboarding process. Please respond by tomorrow to avoid delays."
|
| 236 |
+
},
|
| 237 |
+
"keywords": ["password", "unauthorized", "urgent"],
|
| 238 |
+
"sentiment": "neutral",
|
| 239 |
+
"context": "security",
|
| 240 |
+
"deception_tactic": "Business email compromise (BEC) β requests credentials via reply, not a link, to bypass URL filters",
|
| 241 |
+
"classifier_trap": "No suspicious link β credential request via email reply is the attack vector",
|
| 242 |
+
"correct_actions": [2, 1, 2],
|
| 243 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 244 |
+
"reward_note": "password request via email reply = credential phishing β (2,1,2)"
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"id": "TC-H-06",
|
| 248 |
+
"difficulty": "hard",
|
| 249 |
+
"category": "Subtle Phishing",
|
| 250 |
+
"description": "Fake security alert asking to 'confirm' recent login",
|
| 251 |
+
"raw_email": {
|
| 252 |
+
"subject": "New Sign-In Detected on Your Account",
|
| 253 |
+
"body": "We noticed a new sign-in to your account from an unrecognized device in Frankfurt, Germany. If this was you, no action is needed. If this was NOT you, please secure your account immediately by clicking below and resetting your password. Act within 1 hour to prevent unauthorized access."
|
| 254 |
+
},
|
| 255 |
+
"keywords": ["unauthorized", "password", "hacked", "breach", "urgent"],
|
| 256 |
+
"sentiment": "negative",
|
| 257 |
+
"context": "security",
|
| 258 |
+
"deception_tactic": "Classic 'was this you?' phish β creates panic about unauthorized access to harvest credentials",
|
| 259 |
+
"classifier_trap": "Indistinguishable from a real security alert; all security keywords present",
|
| 260 |
+
"correct_actions": [2, 1, 2],
|
| 261 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 262 |
+
"reward_note": "unauthorized + hacked + password reset request β (2,1,2)"
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"id": "TC-H-07",
|
| 266 |
+
"difficulty": "hard",
|
| 267 |
+
"category": "Subtle Phishing",
|
| 268 |
+
"description": "Payroll system update requiring credential re-verification",
|
| 269 |
+
"raw_email": {
|
| 270 |
+
"subject": "Payroll System Migration β Re-verify Your Account",
|
| 271 |
+
"body": "Dear Staff, We are migrating to a new payroll platform this weekend. To ensure your salary payment is not interrupted, you must re-verify your account credentials before Thursday. Please use the link below to log in with your current username and password. Accounts not verified by the deadline will be temporarily suspended pending manual review. β Payroll Department"
|
| 272 |
+
},
|
| 273 |
+
"keywords": ["payment", "password", "urgent", "threat"],
|
| 274 |
+
"sentiment": "negative",
|
| 275 |
+
"context": "security",
|
| 276 |
+
"deception_tactic": "Salary interruption fear + deadline + credential request β high-pressure BEC pattern",
|
| 277 |
+
"classifier_trap": "'Payment' keyword could route to billing; payroll phishing is a security/credential threat",
|
| 278 |
+
"correct_actions": [2, 1, 2],
|
| 279 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 280 |
+
"reward_note": "password + payment context is phishing not billing β security classification"
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"id": "TC-H-08",
|
| 284 |
+
"difficulty": "hard",
|
| 285 |
+
"category": "Subtle Phishing",
|
| 286 |
+
"description": "Software license renewal with admin credential request",
|
| 287 |
+
"raw_email": {
|
| 288 |
+
"subject": "License Renewal β Admin Verification Required",
|
| 289 |
+
"body": "Your enterprise software license expires in 3 days. To process the renewal without service interruption, our licensing team requires your system administrator credentials to apply the new license key directly. Please reply with your admin username and password at your earliest convenience. β Enterprise Licensing Support"
|
| 290 |
+
},
|
| 291 |
+
"keywords": ["password", "critical", "urgent", "error"],
|
| 292 |
+
"sentiment": "neutral",
|
| 293 |
+
"context": "security",
|
| 294 |
+
"deception_tactic": "Legitimate-sounding vendor request; 'apply license key' justification for admin credential theft",
|
| 295 |
+
"classifier_trap": "Routine renewal framing hides the admin credential request in the body",
|
| 296 |
+
"correct_actions": [2, 1, 2],
|
| 297 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 298 |
+
"reward_note": "admin password request = credential phishing β (2,1,2)"
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"_comment": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ",
|
| 302 |
+
"_section": "HARD β Combined: Phishing with Legal Threat Overlay (most dangerous category)",
|
| 303 |
+
"_comment_end": "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½βββββββββββββ"
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"id": "TC-H-09",
|
| 307 |
+
"difficulty": "hard",
|
| 308 |
+
"category": "Phishing + Legal Threat",
|
| 309 |
+
"description": "Fake GDPR breach notification demanding immediate credential verification",
|
| 310 |
+
"raw_email": {
|
| 311 |
+
"subject": "URGENT: Data Breach Affecting Your Account β Legal Obligation to Act",
|
| 312 |
+
"body": "Dear Account Holder, We are legally obligated to inform you that your account data may have been exposed in a recent breach. Under GDPR Article 33, you must verify your identity within 72 hours to confirm your data has not been compromised. Please log in immediately using the secure link and re-enter your password to initiate the verification process. Failure to act may result in your account being flagged for regulatory review. β Data Protection Officer"
|
| 313 |
+
},
|
| 314 |
+
"keywords": ["breach", "hacked", "password", "legal", "threat", "urgent", "unauthorized"],
|
| 315 |
+
"sentiment": "negative",
|
| 316 |
+
"context": "security",
|
| 317 |
+
"deception_tactic": "Weaponizes GDPR compliance language to force credential submission; DPO impersonation",
|
| 318 |
+
"classifier_trap": "Legal + breach + password all present β legal keywords could misdirect to (2,2,2) but credential harvest = Tech Support response",
|
| 319 |
+
"correct_actions": [2, 1, 2],
|
| 320 |
+
"correct_label": "Security Breach | Tech Support | Escalate",
|
| 321 |
+
"reward_note": "breach+password+hacked = account-level security attack; legal language is the deception layer not the context"
|
| 322 |
+
},
|
| 323 |
+
{
|
| 324 |
+
"id": "TC-H-10",
|
| 325 |
+
"difficulty": "hard",
|
| 326 |
+
"category": "Phishing + Legal Threat",
|
| 327 |
+
"description": "Ransomware demand disguised as a software audit notice",
|
| 328 |
+
"raw_email": {
|
| 329 |
+
"subject": "Software Compliance Audit β Unlicensed Software Detected",
|
| 330 |
+
"body": "Dear IT Administrator, Our automated audit system has detected unlicensed software installations on your network. Under the Software Protection Act, continued use constitutes copyright infringement. To avoid legal proceedings and a fine of $50,000, you must click the link below to download our compliance verification tool and run it with administrator privileges within 24 hours. β BSA Compliance Division"
|
| 331 |
+
},
|
| 332 |
+
"keywords": ["hacked", "breach", "unauthorized", "lawsuit", "legal", "threat", "critical", "urgent"],
|
| 333 |
+
"sentiment": "negative",
|
| 334 |
+
"context": "security",
|
| 335 |
+
"deception_tactic": "BSA impersonation + legal fine threat to trick admin into running malware with elevated privileges",
|
| 336 |
+
"classifier_trap": "Legal fine language could route to (2,2,2); but the attack vector is malware execution = security + legal",
|
| 337 |
+
"correct_actions": [2, 2, 2],
|
| 338 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 339 |
+
"reward_note": "hacked+breach+lawsuit+legal = ransomware with legal overlay β (2,2,2)"
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"id": "TC-H-11",
|
| 343 |
+
"difficulty": "hard",
|
| 344 |
+
"category": "Phishing + Legal Threat",
|
| 345 |
+
"description": "Extortion email threatening to publish stolen data",
|
| 346 |
+
"raw_email": {
|
| 347 |
+
"subject": "We Have Your Data β Read Carefully",
|
| 348 |
+
"body": "We have obtained a copy of your customer database through a vulnerability in your system. We are prepared to sell this data to your competitors or publish it publicly unless you transfer $15,000 in cryptocurrency within 48 hours. We have also retained an attorney to ensure our anonymity is protected. Do not contact law enforcement. This is your only warning."
|
| 349 |
+
},
|
| 350 |
+
"keywords": ["hacked", "breach", "unauthorized", "attorney", "threat", "critical", "urgent"],
|
| 351 |
+
"sentiment": "negative",
|
| 352 |
+
"context": "security",
|
| 353 |
+
"deception_tactic": "Classic extortion with attorney mention to add credibility and deter police contact",
|
| 354 |
+
"classifier_trap": "Attorney keyword present β but this is a security extortion event requiring both Tech + Legal response",
|
| 355 |
+
"correct_actions": [2, 2, 2],
|
| 356 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 357 |
+
"reward_note": "hacked+breach+attorney+threat = ransomware/extortion β (2,2,2)"
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"id": "TC-H-12",
|
| 361 |
+
"difficulty": "hard",
|
| 362 |
+
"category": "Phishing + Legal Threat",
|
| 363 |
+
"description": "Fake law firm letter claiming evidence of data misuse",
|
| 364 |
+
"raw_email": {
|
| 365 |
+
"subject": "Notice of Pending Legal Action β Hargrove & Associates",
|
| 366 |
+
"body": "Dear Sir/Madam, This firm represents a class of individuals whose personal data was allegedly misused by your organization. We have obtained forensic evidence of unauthorized data processing and intend to file suit unless we receive a response from your legal counsel within 10 business days. Please ensure this notice is forwarded to your Data Protection Officer and General Counsel immediately. β J. Hargrove, Esq."
|
| 367 |
+
},
|
| 368 |
+
"keywords": ["unauthorized", "breach", "attorney", "lawsuit", "legal", "threat"],
|
| 369 |
+
"sentiment": "negative",
|
| 370 |
+
"context": "legal",
|
| 371 |
+
"deception_tactic": "Convincing law firm letterhead format; 'forensic evidence' claim creates panic; may be fake to extract settlement",
|
| 372 |
+
"classifier_trap": "Looks identical to a real legal notice β all legal keywords present; requires Legal team verification",
|
| 373 |
+
"correct_actions": [2, 2, 2],
|
| 374 |
+
"correct_label": "Security Breach | Legal | Escalate",
|
| 375 |
+
"reward_note": "attorney+lawsuit+legal+breach = legal context confirmed β (2,2,2)"
|
| 376 |
+
}
|
| 377 |
+
]
|