Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +81 -0
- README.md +126 -5
- __init__.py +10 -0
- client.py +51 -0
- game/__init__.py +3 -0
- game/content_pools.py +190 -0
- game/corruptions.py +251 -0
- game/generator.py +176 -0
- game/grader.py +10 -0
- inference.py +174 -0
- models.py +45 -0
- openenv.yaml +7 -0
- openenv_doc_edit_game.egg-info/PKG-INFO +9 -0
- openenv_doc_edit_game.egg-info/SOURCES.txt +15 -0
- openenv_doc_edit_game.egg-info/dependency_links.txt +1 -0
- openenv_doc_edit_game.egg-info/entry_points.txt +2 -0
- openenv_doc_edit_game.egg-info/requires.txt +5 -0
- openenv_doc_edit_game.egg-info/top_level.txt +1 -0
- pyproject.toml +45 -0
- server/__init__.py +11 -0
- server/app.py +30 -0
- server/doc_edit_game_environment.py +198 -0
- server/requirements.txt +6 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=doc_edit_game
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
# Health check
|
| 75 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
+
|
| 78 |
+
# Run the FastAPI server
|
| 79 |
+
# The module path is constructed to work with the /app/env structure
|
| 80 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 81 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,131 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: DocEdit Game Environment
|
| 3 |
+
emoji: 📝
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# DocEdit Game — Procedural Document Editing RL Environment
|
| 15 |
+
|
| 16 |
+
A production-grade OpenEnv environment where AI agents learn to edit structured documents through a game-like interface. Documents are procedurally generated with random corruptions that the agent must reverse — spelling errors, case issues, name swaps, missing punctuation, structural changes, and formatting problems.
|
| 17 |
+
|
| 18 |
+
## Why This Matters
|
| 19 |
+
|
| 20 |
+
Document editing is one of the most common knowledge-work tasks on earth. Legal redlining, copy-editing, compliance patching, report formatting — billions of edits per day. This environment trains "applicator models" that can reliably execute editing instructions on structured documents.
|
| 21 |
+
|
| 22 |
+
## Game Mechanics
|
| 23 |
+
|
| 24 |
+
1. **Reset**: Environment generates a random document (business letter, legal contract, memo, technical report, or resume) and applies random corruptions
|
| 25 |
+
2. **Observe**: Agent receives the corrupted document + natural language edit instruction describing what needs fixing
|
| 26 |
+
3. **Act**: Agent submits one edit operation per step (replace, insert, delete, format, move)
|
| 27 |
+
4. **Reward**: Incremental similarity improvement to the hidden target document
|
| 28 |
+
5. **Win**: Achieve similarity ≥ 0.999 to complete the task
|
| 29 |
+
|
| 30 |
+
### Procedural Generation
|
| 31 |
+
|
| 32 |
+
Every task is unique — documents, corruptions, and instructions are generated from seeds. `reset(seed=42)` always produces the same task for reproducibility.
|
| 33 |
+
|
| 34 |
+
### 5 Document Types
|
| 35 |
+
- **Business letters** — formal correspondence with sender/recipient/body
|
| 36 |
+
- **Legal contracts** — service agreements with recitals, clauses, signatures
|
| 37 |
+
- **Office memos** — internal communications with subject/body/action items
|
| 38 |
+
- **Technical reports** — engineering reports with findings/recommendations
|
| 39 |
+
- **Resumes** — professional CVs with experience/education/skills
|
| 40 |
+
|
| 41 |
+
### 6 Corruption Types
|
| 42 |
+
| Type | What It Does | Example |
|
| 43 |
+
|------|-------------|---------|
|
| 44 |
+
| Spelling | Swaps words with common misspellings | "receive" → "recieve" |
|
| 45 |
+
| Case | Wrong capitalization | "MEMORANDUM" → "memorandum" |
|
| 46 |
+
| Names | Swaps person/company names | "James" → "Robert" |
|
| 47 |
+
| Punctuation | Removes/adds punctuation | Missing period at end of sentence |
|
| 48 |
+
| Content | Deletes paragraphs or adds junk | Missing clause in contract |
|
| 49 |
+
| Formatting | Strips bold/italic tags | `<bold>Important</bold>` → `Important` |
|
| 50 |
+
|
| 51 |
+
## Action Space
|
| 52 |
+
|
| 53 |
+
| Field | Type | Description |
|
| 54 |
+
|-------|------|-------------|
|
| 55 |
+
| `operation` | str | `"replace"`, `"insert"`, `"delete"`, `"format"`, `"move"` |
|
| 56 |
+
| `target` | str | Text to find in document |
|
| 57 |
+
| `content` | str | Replacement/new text |
|
| 58 |
+
| `position` | int | Paragraph index for insert/move (-1 = end) |
|
| 59 |
+
| `format_type` | str | `"bold"`, `"italic"`, `"uppercase"`, `"lowercase"`, `"none"` |
|
| 60 |
+
|
| 61 |
+
## Observation Space
|
| 62 |
+
|
| 63 |
+
| Field | Type | Description |
|
| 64 |
+
|-------|------|-------------|
|
| 65 |
+
| `document` | str | Current document (XML-tagged paragraphs) |
|
| 66 |
+
| `edit_instruction` | str | Natural language description of edits needed |
|
| 67 |
+
| `similarity` | float | Similarity to target (0.0–1.0) |
|
| 68 |
+
| `task_difficulty` | str | easy / medium / hard |
|
| 69 |
+
| `doc_type` | str | Document template type |
|
| 70 |
+
| `corruption_types` | list | Which corruption types were applied |
|
| 71 |
+
| `steps_remaining` | int | Steps left |
|
| 72 |
+
| `edits_estimated` | int | Estimated edits needed |
|
| 73 |
+
|
| 74 |
+
## 3 Fixed Tasks (for evaluation)
|
| 75 |
+
|
| 76 |
+
| Task | Difficulty | Max Steps | Corruption Types |
|
| 77 |
+
|------|-----------|-----------|-----------------|
|
| 78 |
+
| `easy` | easy | 15 | spelling, case |
|
| 79 |
+
| `medium` | medium | 25 | spelling, case, names, punctuation |
|
| 80 |
+
| `hard` | hard | 40 | all types |
|
| 81 |
+
|
| 82 |
+
## Reward Design
|
| 83 |
+
|
| 84 |
+
```
|
| 85 |
+
reward = similarity_after - similarity_before # incremental
|
| 86 |
+
if exact_match: reward += 0.5 # completion bonus
|
| 87 |
+
if noop: reward -= 0.01 # wasted step penalty
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
## Quick Start
|
| 91 |
+
|
| 92 |
+
```bash
|
| 93 |
+
uv sync
|
| 94 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8001
|
| 95 |
+
|
| 96 |
+
# Or Docker
|
| 97 |
+
docker build -t doc_edit_game-env:latest -f server/Dockerfile .
|
| 98 |
+
docker run -p 8000:8000 doc_edit_game-env:latest
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## Run Inference
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
export API_BASE_URL="https://api.openai.com/v1"
|
| 105 |
+
export MODEL_NAME="gpt-4o-mini"
|
| 106 |
+
export HF_TOKEN="your-key"
|
| 107 |
+
python inference.py
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
## Project Structure
|
| 111 |
+
|
| 112 |
+
```
|
| 113 |
+
doc_edit_game/
|
| 114 |
+
├── openenv.yaml
|
| 115 |
+
├── pyproject.toml
|
| 116 |
+
├── README.md
|
| 117 |
+
├── inference.py
|
| 118 |
+
├── models.py
|
| 119 |
+
├── client.py
|
| 120 |
+
├── __init__.py
|
| 121 |
+
├── game/
|
| 122 |
+
│ ├── __init__.py
|
| 123 |
+
│ ├── generator.py # Procedural document + task generation
|
| 124 |
+
│ ├── corruptions.py # 6 corruption types
|
| 125 |
+
│ ├── grader.py # Similarity scoring
|
| 126 |
+
│ └── content_pools.py # Names, phrases, misspellings, templates
|
| 127 |
+
└── server/
|
| 128 |
+
├── doc_edit_game_environment.py
|
| 129 |
+
├── app.py
|
| 130 |
+
└── Dockerfile
|
| 131 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""DocEdit Game Environment."""
|
| 2 |
+
|
| 3 |
+
from .client import DocEditGameEnv
|
| 4 |
+
from .models import DocEditGameAction, DocEditGameObservation
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"DocEditGameAction",
|
| 8 |
+
"DocEditGameObservation",
|
| 9 |
+
"DocEditGameEnv",
|
| 10 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""DocEdit Game Environment Client."""
|
| 2 |
+
|
| 3 |
+
from typing import Dict
|
| 4 |
+
|
| 5 |
+
from openenv.core import EnvClient
|
| 6 |
+
from openenv.core.client_types import StepResult
|
| 7 |
+
from openenv.core.env_server.types import State
|
| 8 |
+
|
| 9 |
+
from .models import DocEditGameAction, DocEditGameObservation
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DocEditGameEnv(EnvClient[DocEditGameAction, DocEditGameObservation, State]):
|
| 13 |
+
"""WebSocket client for the DocEdit Game environment."""
|
| 14 |
+
|
| 15 |
+
def _step_payload(self, action: DocEditGameAction) -> Dict:
|
| 16 |
+
return {
|
| 17 |
+
"operation": action.operation,
|
| 18 |
+
"target": action.target,
|
| 19 |
+
"content": action.content,
|
| 20 |
+
"position": action.position,
|
| 21 |
+
"format_type": action.format_type,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
def _parse_result(self, payload: Dict) -> StepResult[DocEditGameObservation]:
|
| 25 |
+
obs_data = payload.get("observation", {})
|
| 26 |
+
observation = DocEditGameObservation(
|
| 27 |
+
document=obs_data.get("document", ""),
|
| 28 |
+
edit_instruction=obs_data.get("edit_instruction", ""),
|
| 29 |
+
similarity=obs_data.get("similarity", 0.0),
|
| 30 |
+
task_id=obs_data.get("task_id", ""),
|
| 31 |
+
task_difficulty=obs_data.get("task_difficulty", "easy"),
|
| 32 |
+
doc_type=obs_data.get("doc_type", ""),
|
| 33 |
+
corruption_types=obs_data.get("corruption_types", []),
|
| 34 |
+
steps_remaining=obs_data.get("steps_remaining", 0),
|
| 35 |
+
edits_made=obs_data.get("edits_made", 0),
|
| 36 |
+
edits_estimated=obs_data.get("edits_estimated", 0),
|
| 37 |
+
done=payload.get("done", False),
|
| 38 |
+
reward=payload.get("reward"),
|
| 39 |
+
metadata=obs_data.get("metadata", {}),
|
| 40 |
+
)
|
| 41 |
+
return StepResult(
|
| 42 |
+
observation=observation,
|
| 43 |
+
reward=payload.get("reward"),
|
| 44 |
+
done=payload.get("done", False),
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 48 |
+
return State(
|
| 49 |
+
episode_id=payload.get("episode_id"),
|
| 50 |
+
step_count=payload.get("step_count", 0),
|
| 51 |
+
)
|
game/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .generator import generate_task
|
| 2 |
+
from .corruptions import apply_corruptions
|
| 3 |
+
from .grader import compute_similarity
|
game/content_pools.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Content pools for procedural document generation."""
|
| 2 |
+
|
| 3 |
+
import random as _random
|
| 4 |
+
|
| 5 |
+
FIRST_NAMES = [
|
| 6 |
+
"James", "Sarah", "Michael", "Emily", "David", "Jennifer", "Robert", "Maria",
|
| 7 |
+
"William", "Elizabeth", "Richard", "Patricia", "Thomas", "Linda", "Charles",
|
| 8 |
+
"Barbara", "Daniel", "Susan", "Matthew", "Jessica", "Anthony", "Karen",
|
| 9 |
+
"Andrew", "Nancy", "Christopher", "Lisa", "Joseph", "Margaret", "Steven", "Dorothy",
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
LAST_NAMES = [
|
| 13 |
+
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
|
| 14 |
+
"Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson",
|
| 15 |
+
"Thomas", "Taylor", "Moore", "Jackson", "Martin", "Lee", "Perez", "Thompson",
|
| 16 |
+
"White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
COMPANY_NAMES = [
|
| 20 |
+
"Acme Corporation", "GlobalTech Solutions", "Summit Industries", "Vertex Partners",
|
| 21 |
+
"Pinnacle Holdings", "Atlas Dynamics", "Meridian Group", "Cascade Systems",
|
| 22 |
+
"Horizon Enterprises", "Sterling Consulting", "Nexus Financial", "Vanguard Legal",
|
| 23 |
+
"Pacific Ventures", "Continental Services", "Apex Analytics",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
CITIES = [
|
| 27 |
+
"New York", "San Francisco", "Chicago", "Los Angeles", "Seattle", "Boston",
|
| 28 |
+
"Austin", "Denver", "Miami", "Washington DC", "Portland", "Atlanta",
|
| 29 |
+
"Dallas", "Philadelphia", "Minneapolis",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
DATES = [
|
| 33 |
+
"January 2026", "February 2026", "March 2026", "April 2026", "May 2026",
|
| 34 |
+
"June 2025", "July 2025", "August 2025", "September 2025", "October 2025",
|
| 35 |
+
"November 2025", "December 2025", "January 2025", "March 2025", "June 2024",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
DOLLAR_AMOUNTS = [
|
| 39 |
+
"$50,000", "$100,000", "$250,000", "$500,000", "$750,000",
|
| 40 |
+
"$1,000,000", "$1,500,000", "$2,000,000", "$5,000,000", "$10,000,000",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
SUBJECTS = [
|
| 44 |
+
"Updated Remote Work Policy", "Q3 Performance Review", "Annual Budget Proposal",
|
| 45 |
+
"Client Engagement Strategy", "Office Relocation Plan", "Software Upgrade Timeline",
|
| 46 |
+
"Employee Training Program", "Marketing Campaign Launch", "Vendor Contract Renewal",
|
| 47 |
+
"Data Security Protocol Update", "Team Restructuring Announcement",
|
| 48 |
+
"Holiday Schedule Notification", "New Hire Onboarding Process",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
LEGAL_CLAUSE_TEMPLATES = [
|
| 52 |
+
"The {party} shall deliver all materials within {days} business days of execution.",
|
| 53 |
+
"Payment of {amount} shall be made in {installments} equal installments.",
|
| 54 |
+
"Either party may terminate this agreement with {days} days written notice.",
|
| 55 |
+
"All intellectual property created during the term shall remain the property of {party}.",
|
| 56 |
+
"The {party} agrees to maintain confidentiality of all proprietary information.",
|
| 57 |
+
"This agreement shall be governed by the laws of the State of {state}.",
|
| 58 |
+
"Any disputes arising under this agreement shall be resolved through binding arbitration.",
|
| 59 |
+
"The {party} shall indemnify and hold harmless the other party against all claims.",
|
| 60 |
+
"Force majeure events shall excuse performance for the duration of the event.",
|
| 61 |
+
"Neither party may assign this agreement without prior written consent.",
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
BUSINESS_PARAGRAPHS = [
|
| 65 |
+
"We are pleased to inform you that the quarterly targets have been exceeded by {percent}%. The team's dedication to excellence has been instrumental in achieving these results.",
|
| 66 |
+
"Following our recent review, we recommend implementing the proposed changes effective {date}. This will ensure alignment with our strategic objectives for the fiscal year.",
|
| 67 |
+
"The analysis of current market conditions indicates a strong opportunity for expansion into the {region} market. Our competitive positioning remains favorable.",
|
| 68 |
+
"Please ensure all relevant documentation is submitted to the {department} department by {date}. Late submissions may result in processing delays.",
|
| 69 |
+
"The board of directors has approved the allocation of {amount} for the proposed initiative. Implementation is expected to begin in {date}.",
|
| 70 |
+
"Customer satisfaction metrics have shown a {percent}% improvement over the previous quarter. This trend is attributed to our enhanced service delivery framework.",
|
| 71 |
+
"The proposed timeline for project completion is {days} days from the date of approval. Key milestones will be tracked through our project management system.",
|
| 72 |
+
"We have identified several areas for operational improvement including process automation, resource optimization, and enhanced quality controls.",
|
| 73 |
+
"The annual compliance review has been completed with no significant findings. All regulatory requirements have been met as of the reporting date.",
|
| 74 |
+
"Stakeholder feedback has been overwhelmingly positive regarding the new initiative. We will continue to refine our approach based on ongoing input.",
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
TECHNICAL_PARAGRAPHS = [
|
| 78 |
+
"The system architecture employs a microservices pattern with {count} independent services communicating via REST APIs and message queues.",
|
| 79 |
+
"Performance benchmarks indicate average response times of {ms}ms under standard load conditions. Stress testing showed graceful degradation at {percent}% capacity.",
|
| 80 |
+
"Database migration from the legacy system was completed with zero data loss. The new schema supports {count}x faster query performance.",
|
| 81 |
+
"Security audit findings have been addressed in this release. All critical vulnerabilities have been patched and verified through penetration testing.",
|
| 82 |
+
"The deployment pipeline now supports automated rollbacks within {minutes} minutes of failure detection. Monitoring coverage has been extended to all production endpoints.",
|
| 83 |
+
"Code review analysis shows a {percent}% reduction in defect density compared to the previous release cycle. Test coverage stands at {coverage}%.",
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
RESUME_ACHIEVEMENTS = [
|
| 87 |
+
"Led cross-functional team of {count} to deliver {project} {percent}% under budget",
|
| 88 |
+
"Increased revenue by {amount} through strategic partnership development",
|
| 89 |
+
"Reduced operational costs by {percent}% through process automation initiatives",
|
| 90 |
+
"Managed portfolio of {count} client accounts totaling {amount} in annual revenue",
|
| 91 |
+
"Implemented new {system} system resulting in {percent}% efficiency improvement",
|
| 92 |
+
"Spearheaded company-wide digital transformation initiative across {count} departments",
|
| 93 |
+
"Negotiated contracts worth {amount} with key enterprise clients",
|
| 94 |
+
"Developed and launched {count} products generating {amount} in first-year revenue",
|
| 95 |
+
]
|
| 96 |
+
|
| 97 |
+
SKILLS_LISTS = [
|
| 98 |
+
"Python, JavaScript, SQL, Docker, Kubernetes, AWS, GCP",
|
| 99 |
+
"Project Management, Agile, Scrum, JIRA, Confluence, Stakeholder Management",
|
| 100 |
+
"Financial Analysis, Excel Modeling, Bloomberg Terminal, Risk Assessment",
|
| 101 |
+
"Contract Negotiation, Regulatory Compliance, Due Diligence, M&A Advisory",
|
| 102 |
+
"Machine Learning, TensorFlow, PyTorch, NLP, Computer Vision, MLOps",
|
| 103 |
+
"Marketing Strategy, SEO, Content Marketing, Google Analytics, HubSpot",
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
UNIVERSITIES = [
|
| 107 |
+
"Stanford University", "MIT", "Harvard University", "UC Berkeley",
|
| 108 |
+
"Columbia University", "University of Chicago", "Yale University",
|
| 109 |
+
"Princeton University", "University of Michigan", "Georgia Tech",
|
| 110 |
+
"Carnegie Mellon University", "University of Texas at Austin",
|
| 111 |
+
]
|
| 112 |
+
|
| 113 |
+
DEGREES = [
|
| 114 |
+
"Bachelor of Science in Computer Science",
|
| 115 |
+
"Master of Business Administration",
|
| 116 |
+
"Juris Doctor",
|
| 117 |
+
"Bachelor of Arts in Economics",
|
| 118 |
+
"Master of Science in Data Science",
|
| 119 |
+
"Bachelor of Engineering in Electrical Engineering",
|
| 120 |
+
"Master of Arts in Communication",
|
| 121 |
+
"Doctor of Philosophy in Physics",
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
# Common misspellings: correct → misspelled
|
| 125 |
+
MISSPELLINGS = {
|
| 126 |
+
"receive": "recieve", "management": "managment", "definitely": "definately",
|
| 127 |
+
"separate": "seperate", "occurrence": "occurence", "accommodate": "accomodate",
|
| 128 |
+
"necessary": "neccessary", "environment": "enviroment", "government": "goverment",
|
| 129 |
+
"professional": "proffesional", "recommend": "reccomend", "maintenance": "maintainance",
|
| 130 |
+
"independent": "independant", "committee": "commitee", "assessment": "assesment",
|
| 131 |
+
"achievement": "achievment", "development": "developement", "immediately": "immediatly",
|
| 132 |
+
"experience": "experiance", "performance": "preformance", "agreement": "agremeent",
|
| 133 |
+
"department": "departmnet", "implementation": "implemenation", "comprehensive": "comperhensive",
|
| 134 |
+
"communication": "comunication", "approximately": "approximatly", "significant": "signifcant",
|
| 135 |
+
"responsibility": "responsibilty", "opportunity": "oppertunity", "requirements": "requirments",
|
| 136 |
+
"acquisition": "aquisition", "beneficial": "benefical", "competitive": "competative",
|
| 137 |
+
"consistency": "consistancy", "corporation": "corparation", "efficiency": "effeciency",
|
| 138 |
+
"guarantee": "gaurantee", "infrastructure": "infastructure", "preliminary": "prelimanary",
|
| 139 |
+
"recognition": "reconition", "regulatory": "regulatary", "specifically": "specificaly",
|
| 140 |
+
"sufficient": "sufficent", "technical": "techincal", "transformation": "tranformation",
|
| 141 |
+
"compliance": "complience", "quarterly": "quartely", "delivery": "delivrey",
|
| 142 |
+
"schedule": "scedule", "revenue": "revnue", "analysis": "anaylsis",
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
# Alternate names for entity swap corruptions
|
| 146 |
+
ALTERNATE_NAMES = {
|
| 147 |
+
"James": "Robert", "Sarah": "Jennifer", "Michael": "William", "Emily": "Patricia",
|
| 148 |
+
"David": "Thomas", "Jennifer": "Sarah", "Robert": "James", "Maria": "Linda",
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
ALTERNATE_COMPANIES = {
|
| 152 |
+
"Acme Corporation": "Beta Industries", "GlobalTech Solutions": "LocalTech Services",
|
| 153 |
+
"Summit Industries": "Valley Enterprises", "Vertex Partners": "Edge Associates",
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
STATES = [
|
| 157 |
+
"California", "New York", "Texas", "Delaware", "Massachusetts",
|
| 158 |
+
"Illinois", "Florida", "Washington", "Colorado", "Georgia",
|
| 159 |
+
]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def pick(rng: _random.Random, pool: list) -> str:
|
| 163 |
+
return rng.choice(pool)
|
| 164 |
+
|
| 165 |
+
def full_name(rng: _random.Random) -> str:
|
| 166 |
+
return f"{pick(rng, FIRST_NAMES)} {pick(rng, LAST_NAMES)}"
|
| 167 |
+
|
| 168 |
+
def fill_template(rng: _random.Random, template: str) -> str:
|
| 169 |
+
"""Fill {placeholders} in a template string with random content."""
|
| 170 |
+
replacements = {
|
| 171 |
+
"party": f"the {pick(rng, ['Vendor', 'Client', 'Contractor', 'Licensee'])}",
|
| 172 |
+
"days": str(rng.choice([5, 10, 15, 20, 30, 45, 60, 90])),
|
| 173 |
+
"amount": pick(rng, DOLLAR_AMOUNTS),
|
| 174 |
+
"installments": str(rng.choice([2, 3, 4, 6, 12])),
|
| 175 |
+
"state": pick(rng, STATES),
|
| 176 |
+
"percent": str(rng.randint(5, 45)),
|
| 177 |
+
"date": pick(rng, DATES),
|
| 178 |
+
"region": pick(rng, ["Northeast", "Pacific Northwest", "Southeast", "Midwest", "Southwest"]),
|
| 179 |
+
"department": pick(rng, ["Finance", "Operations", "Legal", "Human Resources", "Engineering"]),
|
| 180 |
+
"count": str(rng.randint(3, 25)),
|
| 181 |
+
"ms": str(rng.randint(15, 350)),
|
| 182 |
+
"minutes": str(rng.randint(2, 15)),
|
| 183 |
+
"coverage": str(rng.randint(75, 98)),
|
| 184 |
+
"project": pick(rng, ["CRM migration", "API platform", "data pipeline", "mobile app"]),
|
| 185 |
+
"system": pick(rng, ["ERP", "CRM", "HRIS", "BI", "inventory management"]),
|
| 186 |
+
}
|
| 187 |
+
result = template
|
| 188 |
+
for key, value in replacements.items():
|
| 189 |
+
result = result.replace("{" + key + "}", value)
|
| 190 |
+
return result
|
game/corruptions.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Corruption engine — applies reversible corruptions to target documents to create source documents."""
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
import re
|
| 5 |
+
from typing import List, Tuple
|
| 6 |
+
|
| 7 |
+
from .content_pools import (
|
| 8 |
+
ALTERNATE_COMPANIES, ALTERNATE_NAMES, FIRST_NAMES, LAST_NAMES,
|
| 9 |
+
MISSPELLINGS, full_name, pick,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _find_words_in_doc(document: str) -> List[str]:
|
| 14 |
+
"""Extract unique words (4+ chars, alphabetic) from the document text (ignoring tags)."""
|
| 15 |
+
text_only = re.sub(r"<[^>]+>", " ", document)
|
| 16 |
+
words = re.findall(r"\b[a-zA-Z]{4,}\b", text_only)
|
| 17 |
+
return list(set(words))
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _corrupt_spelling(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 21 |
+
"""Introduce spelling errors by swapping known words with misspelled versions."""
|
| 22 |
+
doc_words = _find_words_in_doc(document)
|
| 23 |
+
corruptible = [w for w in doc_words if w.lower() in MISSPELLINGS]
|
| 24 |
+
rng.shuffle(corruptible)
|
| 25 |
+
|
| 26 |
+
applied = []
|
| 27 |
+
result = document
|
| 28 |
+
for word in corruptible[:count]:
|
| 29 |
+
misspelled = MISSPELLINGS[word.lower()]
|
| 30 |
+
# Preserve original case
|
| 31 |
+
if word[0].isupper():
|
| 32 |
+
misspelled = misspelled[0].upper() + misspelled[1:]
|
| 33 |
+
if word in result:
|
| 34 |
+
result = result.replace(word, misspelled, 1)
|
| 35 |
+
applied.append({"type": "spelling", "original": word, "corrupted": misspelled})
|
| 36 |
+
return result, applied
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _corrupt_case(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 40 |
+
"""Introduce case errors — lowercase headings, uppercase random words, etc."""
|
| 41 |
+
lines = document.split("\n")
|
| 42 |
+
applied = []
|
| 43 |
+
indices = list(range(len(lines)))
|
| 44 |
+
rng.shuffle(indices)
|
| 45 |
+
|
| 46 |
+
for idx in indices:
|
| 47 |
+
if len(applied) >= count:
|
| 48 |
+
break
|
| 49 |
+
line = lines[idx]
|
| 50 |
+
# Lowercase a heading
|
| 51 |
+
heading_match = re.match(r'(<heading[^>]*>)(.*?)(</heading>)', line)
|
| 52 |
+
if heading_match and heading_match.group(2) == heading_match.group(2).upper():
|
| 53 |
+
original = heading_match.group(2)
|
| 54 |
+
corrupted = original.lower()
|
| 55 |
+
lines[idx] = line.replace(original, corrupted, 1)
|
| 56 |
+
applied.append({"type": "case", "original": original, "corrupted": corrupted, "line": idx})
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
# Randomly uppercase a normal word in a paragraph
|
| 60 |
+
if "<p>" in line:
|
| 61 |
+
words = re.findall(r"\b[a-z]{4,}\b", line)
|
| 62 |
+
if words:
|
| 63 |
+
word = rng.choice(words)
|
| 64 |
+
corrupted = word.upper()
|
| 65 |
+
lines[idx] = line.replace(word, corrupted, 1)
|
| 66 |
+
applied.append({"type": "case", "original": word, "corrupted": corrupted, "line": idx})
|
| 67 |
+
|
| 68 |
+
return "\n".join(lines), applied
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _corrupt_names(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 72 |
+
"""Swap person/company names with alternates."""
|
| 73 |
+
applied = []
|
| 74 |
+
result = document
|
| 75 |
+
|
| 76 |
+
# Find first names in the doc
|
| 77 |
+
for name in FIRST_NAMES:
|
| 78 |
+
if len(applied) >= count:
|
| 79 |
+
break
|
| 80 |
+
if name in result and name in ALTERNATE_NAMES:
|
| 81 |
+
alt = ALTERNATE_NAMES[name]
|
| 82 |
+
result = result.replace(name, alt, 1)
|
| 83 |
+
applied.append({"type": "name", "original": name, "corrupted": alt})
|
| 84 |
+
|
| 85 |
+
# Company names
|
| 86 |
+
for company, alt in ALTERNATE_COMPANIES.items():
|
| 87 |
+
if len(applied) >= count:
|
| 88 |
+
break
|
| 89 |
+
if company in result:
|
| 90 |
+
result = result.replace(company, alt, 1)
|
| 91 |
+
applied.append({"type": "name", "original": company, "corrupted": alt})
|
| 92 |
+
|
| 93 |
+
return result, applied
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _corrupt_punctuation(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 97 |
+
"""Remove or alter punctuation — drop periods, add extra commas, etc."""
|
| 98 |
+
lines = document.split("\n")
|
| 99 |
+
applied = []
|
| 100 |
+
indices = list(range(len(lines)))
|
| 101 |
+
rng.shuffle(indices)
|
| 102 |
+
|
| 103 |
+
for idx in indices:
|
| 104 |
+
if len(applied) >= count:
|
| 105 |
+
break
|
| 106 |
+
line = lines[idx]
|
| 107 |
+
if not line.startswith("<p>"):
|
| 108 |
+
continue
|
| 109 |
+
# Remove trailing period before </p>
|
| 110 |
+
if line.endswith(".</p>"):
|
| 111 |
+
lines[idx] = line[:-5] + "</p>"
|
| 112 |
+
applied.append({"type": "punctuation", "action": "removed_period", "line": idx})
|
| 113 |
+
elif ", " in line and rng.random() < 0.5:
|
| 114 |
+
# Remove a comma
|
| 115 |
+
pos = line.index(", ")
|
| 116 |
+
lines[idx] = line[:pos] + line[pos+1:] # remove the comma, keep space
|
| 117 |
+
applied.append({"type": "punctuation", "action": "removed_comma", "line": idx})
|
| 118 |
+
|
| 119 |
+
return "\n".join(lines), applied
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _corrupt_content(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 123 |
+
"""Structural corruptions — delete paragraphs, add junk, reorder."""
|
| 124 |
+
lines = document.split("\n")
|
| 125 |
+
applied = []
|
| 126 |
+
|
| 127 |
+
# Only corrupt <p> lines, not headings
|
| 128 |
+
p_indices = [i for i, l in enumerate(lines) if l.startswith("<p>") and len(l) > 20]
|
| 129 |
+
rng.shuffle(p_indices)
|
| 130 |
+
|
| 131 |
+
for idx in p_indices:
|
| 132 |
+
if len(applied) >= count:
|
| 133 |
+
break
|
| 134 |
+
if rng.random() < 0.6:
|
| 135 |
+
# Delete a paragraph
|
| 136 |
+
deleted = lines[idx]
|
| 137 |
+
lines[idx] = "" # mark for removal
|
| 138 |
+
applied.append({"type": "content", "action": "deleted", "line": idx, "text": deleted})
|
| 139 |
+
else:
|
| 140 |
+
# Add junk paragraph after this line
|
| 141 |
+
junk = "<p>THIS PARAGRAPH SHOULD NOT BE HERE AND MUST BE REMOVED.</p>"
|
| 142 |
+
lines.insert(idx + 1, junk)
|
| 143 |
+
applied.append({"type": "content", "action": "added_junk", "after_line": idx, "text": junk})
|
| 144 |
+
break # inserting shifts indices, stop after one
|
| 145 |
+
|
| 146 |
+
# Clean up empty lines from deletions
|
| 147 |
+
lines = [l for l in lines if l != ""]
|
| 148 |
+
return "\n".join(lines), applied
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _corrupt_formatting(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
|
| 152 |
+
"""Remove or misapply formatting tags — strip <bold>, <italic>, etc."""
|
| 153 |
+
applied = []
|
| 154 |
+
result = document
|
| 155 |
+
|
| 156 |
+
# Find existing bold tags and strip them
|
| 157 |
+
bold_matches = list(re.finditer(r"<bold>(.*?)</bold>", result))
|
| 158 |
+
rng.shuffle(bold_matches)
|
| 159 |
+
for match in bold_matches[:count]:
|
| 160 |
+
if len(applied) >= count:
|
| 161 |
+
break
|
| 162 |
+
original = match.group(0)
|
| 163 |
+
stripped = match.group(1)
|
| 164 |
+
if original in result:
|
| 165 |
+
result = result.replace(original, stripped, 1)
|
| 166 |
+
applied.append({"type": "formatting", "action": "stripped_bold", "text": stripped})
|
| 167 |
+
|
| 168 |
+
return result, applied
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
CORRUPTION_FUNCTIONS = {
|
| 172 |
+
"spelling": _corrupt_spelling,
|
| 173 |
+
"case": _corrupt_case,
|
| 174 |
+
"names": _corrupt_names,
|
| 175 |
+
"punctuation": _corrupt_punctuation,
|
| 176 |
+
"content": _corrupt_content,
|
| 177 |
+
"formatting": _corrupt_formatting,
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def apply_corruptions(
|
| 182 |
+
rng: random.Random,
|
| 183 |
+
target: str,
|
| 184 |
+
corruption_types: List[str],
|
| 185 |
+
total_count: int,
|
| 186 |
+
) -> Tuple[str, List[dict], str]:
|
| 187 |
+
"""
|
| 188 |
+
Apply corruptions to a target document to create a source document.
|
| 189 |
+
|
| 190 |
+
Returns: (corrupted_source, list_of_corruptions, natural_language_instruction)
|
| 191 |
+
"""
|
| 192 |
+
per_type = max(1, total_count // len(corruption_types))
|
| 193 |
+
remainder = total_count - per_type * len(corruption_types)
|
| 194 |
+
|
| 195 |
+
all_corruptions = []
|
| 196 |
+
source = target
|
| 197 |
+
|
| 198 |
+
for i, ctype in enumerate(corruption_types):
|
| 199 |
+
count = per_type + (1 if i < remainder else 0)
|
| 200 |
+
fn = CORRUPTION_FUNCTIONS.get(ctype)
|
| 201 |
+
if fn:
|
| 202 |
+
source, corruptions = fn(rng, source, count)
|
| 203 |
+
all_corruptions.extend(corruptions)
|
| 204 |
+
|
| 205 |
+
instruction = _build_instruction(all_corruptions)
|
| 206 |
+
return source, all_corruptions, instruction
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def _build_instruction(corruptions: List[dict]) -> str:
|
| 210 |
+
"""Generate a natural language edit instruction from the list of corruptions."""
|
| 211 |
+
if not corruptions:
|
| 212 |
+
return "The document appears correct. No edits needed."
|
| 213 |
+
|
| 214 |
+
parts = []
|
| 215 |
+
by_type = {}
|
| 216 |
+
for c in corruptions:
|
| 217 |
+
by_type.setdefault(c["type"], []).append(c)
|
| 218 |
+
|
| 219 |
+
if "spelling" in by_type:
|
| 220 |
+
items = by_type["spelling"]
|
| 221 |
+
examples = ", ".join(f"'{c['corrupted']}' should be '{c['original']}'" for c in items[:3])
|
| 222 |
+
suffix = f" and {len(items)-3} more" if len(items) > 3 else ""
|
| 223 |
+
parts.append(f"Fix {len(items)} spelling error(s): {examples}{suffix}.")
|
| 224 |
+
|
| 225 |
+
if "case" in by_type:
|
| 226 |
+
items = by_type["case"]
|
| 227 |
+
parts.append(f"Fix {len(items)} case error(s) — some text has incorrect capitalization.")
|
| 228 |
+
|
| 229 |
+
if "names" in by_type:
|
| 230 |
+
items = by_type["names"]
|
| 231 |
+
examples = ", ".join(f"'{c['corrupted']}' should be '{c['original']}'" for c in items[:2])
|
| 232 |
+
parts.append(f"Fix {len(items)} incorrect name(s): {examples}.")
|
| 233 |
+
|
| 234 |
+
if "punctuation" in by_type:
|
| 235 |
+
items = by_type["punctuation"]
|
| 236 |
+
parts.append(f"Fix {len(items)} punctuation error(s) — missing or extra punctuation marks.")
|
| 237 |
+
|
| 238 |
+
if "content" in by_type:
|
| 239 |
+
items = by_type["content"]
|
| 240 |
+
deleted = [c for c in items if c["action"] == "deleted"]
|
| 241 |
+
junk = [c for c in items if c["action"] == "added_junk"]
|
| 242 |
+
if deleted:
|
| 243 |
+
parts.append(f"Restore {len(deleted)} missing paragraph(s) that were removed.")
|
| 244 |
+
if junk:
|
| 245 |
+
parts.append(f"Delete {len(junk)} paragraph(s) that don't belong in the document.")
|
| 246 |
+
|
| 247 |
+
if "formatting" in by_type:
|
| 248 |
+
items = by_type["formatting"]
|
| 249 |
+
parts.append(f"Restore {len(items)} missing formatting tag(s) (e.g., bold text that lost its tags).")
|
| 250 |
+
|
| 251 |
+
return " ".join(parts)
|
game/generator.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Procedural document generator — creates target documents from templates + random content."""
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
from typing import Tuple
|
| 5 |
+
|
| 6 |
+
from .content_pools import (
|
| 7 |
+
BUSINESS_PARAGRAPHS, COMPANY_NAMES, DATES, DEGREES, DOLLAR_AMOUNTS,
|
| 8 |
+
LEGAL_CLAUSE_TEMPLATES, RESUME_ACHIEVEMENTS, SKILLS_LISTS, SUBJECTS,
|
| 9 |
+
TECHNICAL_PARAGRAPHS, UNIVERSITIES, fill_template, full_name, pick,
|
| 10 |
+
)
|
| 11 |
+
from .corruptions import apply_corruptions
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _gen_business_letter(rng: random.Random) -> str:
|
| 15 |
+
sender = full_name(rng)
|
| 16 |
+
recipient = full_name(rng)
|
| 17 |
+
company = pick(rng, COMPANY_NAMES)
|
| 18 |
+
date = pick(rng, DATES)
|
| 19 |
+
paras = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(2, 4), len(BUSINESS_PARAGRAPHS)))
|
| 20 |
+
body = "\n".join(f"<p>{fill_template(rng, p)}</p>" for p in paras)
|
| 21 |
+
return (
|
| 22 |
+
f"<heading level=\"1\">{company}</heading>\n"
|
| 23 |
+
f"<p>{date}</p>\n"
|
| 24 |
+
f"<p>Dear {recipient},</p>\n"
|
| 25 |
+
f"{body}\n"
|
| 26 |
+
f"<p>Please do not hesitate to contact us should you require further information.</p>\n"
|
| 27 |
+
f"<p>Sincerely,</p>\n"
|
| 28 |
+
f"<p>{sender}, Senior Vice President</p>"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _gen_legal_contract(rng: random.Random) -> str:
|
| 33 |
+
party_a = pick(rng, COMPANY_NAMES)
|
| 34 |
+
party_b = pick(rng, COMPANY_NAMES)
|
| 35 |
+
while party_b == party_a:
|
| 36 |
+
party_b = pick(rng, COMPANY_NAMES)
|
| 37 |
+
date = pick(rng, DATES)
|
| 38 |
+
amount = pick(rng, DOLLAR_AMOUNTS)
|
| 39 |
+
clauses = rng.sample(LEGAL_CLAUSE_TEMPLATES, k=min(rng.randint(3, 6), len(LEGAL_CLAUSE_TEMPLATES)))
|
| 40 |
+
clause_lines = "\n".join(
|
| 41 |
+
f"<p>{i+1}. {fill_template(rng, c)}</p>" for i, c in enumerate(clauses)
|
| 42 |
+
)
|
| 43 |
+
return (
|
| 44 |
+
f"<heading level=\"1\">SERVICE AGREEMENT</heading>\n"
|
| 45 |
+
f"<p>This Service Agreement (the \"Agreement\") is entered into as of {date} between "
|
| 46 |
+
f"{party_a} (\"Provider\") and {party_b} (\"Client\").</p>\n"
|
| 47 |
+
f"<heading level=\"2\">RECITALS</heading>\n"
|
| 48 |
+
f"<p>WHEREAS Provider possesses expertise in professional services and Client desires to engage Provider;</p>\n"
|
| 49 |
+
f"<p>WHEREAS the parties wish to establish the terms under which services will be rendered;</p>\n"
|
| 50 |
+
f"<heading level=\"2\">TERMS AND CONDITIONS</heading>\n"
|
| 51 |
+
f"{clause_lines}\n"
|
| 52 |
+
f"<heading level=\"2\">COMPENSATION</heading>\n"
|
| 53 |
+
f"<p>Client shall pay Provider a total fee of {amount} for all services rendered under this Agreement.</p>\n"
|
| 54 |
+
f"<heading level=\"2\">EXECUTION</heading>\n"
|
| 55 |
+
f"<p>IN WITNESS WHEREOF, the parties have executed this Agreement as of the date first written above.</p>\n"
|
| 56 |
+
f"<p>{party_a}: _________________________ Date: _________</p>\n"
|
| 57 |
+
f"<p>{party_b}: _________________________ Date: _________</p>"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _gen_memo(rng: random.Random) -> str:
|
| 62 |
+
sender = full_name(rng)
|
| 63 |
+
subject = pick(rng, SUBJECTS)
|
| 64 |
+
date = pick(rng, DATES)
|
| 65 |
+
paras = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(2, 4), len(BUSINESS_PARAGRAPHS)))
|
| 66 |
+
body = "\n".join(f"<p>{fill_template(rng, p)}</p>" for p in paras)
|
| 67 |
+
return (
|
| 68 |
+
f"<heading level=\"1\">MEMORANDUM</heading>\n"
|
| 69 |
+
f"<p>To: All Staff</p>\n"
|
| 70 |
+
f"<p>From: {sender}, Director of Operations</p>\n"
|
| 71 |
+
f"<p>Date: {date}</p>\n"
|
| 72 |
+
f"<p>Subject: {subject}</p>\n"
|
| 73 |
+
f"{body}\n"
|
| 74 |
+
f"<p>Please direct any questions to {sender} or the {pick(rng, ['HR', 'Operations', 'Legal', 'Finance'])} department.</p>"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _gen_technical_report(rng: random.Random) -> str:
|
| 79 |
+
author = full_name(rng)
|
| 80 |
+
date = pick(rng, DATES)
|
| 81 |
+
title = pick(rng, [
|
| 82 |
+
"Q3 Infrastructure Performance Report", "Annual Security Audit Summary",
|
| 83 |
+
"Platform Migration Assessment", "System Reliability Engineering Review",
|
| 84 |
+
"Data Pipeline Optimization Report", "Cloud Cost Analysis Report",
|
| 85 |
+
])
|
| 86 |
+
findings = rng.sample(TECHNICAL_PARAGRAPHS, k=min(rng.randint(2, 4), len(TECHNICAL_PARAGRAPHS)))
|
| 87 |
+
finding_lines = "\n".join(f"<p>{fill_template(rng, f)}</p>" for f in findings)
|
| 88 |
+
recs = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(1, 3), len(BUSINESS_PARAGRAPHS)))
|
| 89 |
+
rec_lines = "\n".join(f"<p>{i+1}. {fill_template(rng, r)}</p>" for i, r in enumerate(recs))
|
| 90 |
+
return (
|
| 91 |
+
f"<heading level=\"1\">{title}</heading>\n"
|
| 92 |
+
f"<p>Author: {author} | Date: {date} | Version: {rng.randint(1,5)}.{rng.randint(0,9)}</p>\n"
|
| 93 |
+
f"<heading level=\"2\">Executive Summary</heading>\n"
|
| 94 |
+
f"<p>{fill_template(rng, pick(rng, BUSINESS_PARAGRAPHS))}</p>\n"
|
| 95 |
+
f"<heading level=\"2\">Findings</heading>\n"
|
| 96 |
+
f"{finding_lines}\n"
|
| 97 |
+
f"<heading level=\"2\">Recommendations</heading>\n"
|
| 98 |
+
f"{rec_lines}\n"
|
| 99 |
+
f"<heading level=\"2\">Conclusion</heading>\n"
|
| 100 |
+
f"<p>{fill_template(rng, pick(rng, BUSINESS_PARAGRAPHS))}</p>"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _gen_resume(rng: random.Random) -> str:
|
| 105 |
+
name = full_name(rng)
|
| 106 |
+
city = pick(rng, ["New York", "San Francisco", "Chicago", "Austin", "Seattle", "Boston"])
|
| 107 |
+
company1 = pick(rng, COMPANY_NAMES)
|
| 108 |
+
company2 = pick(rng, COMPANY_NAMES)
|
| 109 |
+
while company2 == company1:
|
| 110 |
+
company2 = pick(rng, COMPANY_NAMES)
|
| 111 |
+
achievements1 = rng.sample(RESUME_ACHIEVEMENTS, k=min(2, len(RESUME_ACHIEVEMENTS)))
|
| 112 |
+
achievements2 = rng.sample(RESUME_ACHIEVEMENTS, k=min(2, len(RESUME_ACHIEVEMENTS)))
|
| 113 |
+
ach1 = "\n".join(f"<p>• {fill_template(rng, a)}</p>" for a in achievements1)
|
| 114 |
+
ach2 = "\n".join(f"<p>• {fill_template(rng, a)}</p>" for a in achievements2)
|
| 115 |
+
return (
|
| 116 |
+
f"<heading level=\"1\">{name}</heading>\n"
|
| 117 |
+
f"<p>{name.lower().replace(' ', '.')}@email.com | (555) {rng.randint(100,999)}-{rng.randint(1000,9999)} | {city}</p>\n"
|
| 118 |
+
f"<heading level=\"2\">Professional Experience</heading>\n"
|
| 119 |
+
f"<p><bold>{company1}</bold> — Senior Manager ({rng.randint(2020,2024)}-Present)</p>\n"
|
| 120 |
+
f"{ach1}\n"
|
| 121 |
+
f"<p><bold>{company2}</bold> — Associate ({rng.randint(2016,2020)}-{rng.randint(2020,2023)})</p>\n"
|
| 122 |
+
f"{ach2}\n"
|
| 123 |
+
f"<heading level=\"2\">Education</heading>\n"
|
| 124 |
+
f"<p><bold>{pick(rng, UNIVERSITIES)}</bold> — {pick(rng, DEGREES)} ({rng.randint(2012,2020)})</p>\n"
|
| 125 |
+
f"<heading level=\"2\">Skills</heading>\n"
|
| 126 |
+
f"<p>{pick(rng, SKILLS_LISTS)}</p>"
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
DOC_GENERATORS = {
|
| 131 |
+
"business_letter": _gen_business_letter,
|
| 132 |
+
"legal_contract": _gen_legal_contract,
|
| 133 |
+
"memo": _gen_memo,
|
| 134 |
+
"technical_report": _gen_technical_report,
|
| 135 |
+
"resume": _gen_resume,
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
DIFFICULTY_CONFIG = {
|
| 139 |
+
"easy": {"corruption_count": (2, 5), "corruption_types": ["spelling", "case"], "max_steps": 15},
|
| 140 |
+
"medium": {"corruption_count": (5, 12), "corruption_types": ["spelling", "case", "names", "punctuation"], "max_steps": 25},
|
| 141 |
+
"hard": {"corruption_count": (10, 20), "corruption_types": ["spelling", "case", "names", "content", "punctuation", "formatting"], "max_steps": 40},
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def generate_task(seed: int = 0, difficulty: str = "easy") -> dict:
|
| 146 |
+
"""
|
| 147 |
+
Generate a complete task: target document, corrupted source, and edit instruction.
|
| 148 |
+
|
| 149 |
+
Returns dict with keys: source, target, instruction, doc_type, difficulty,
|
| 150 |
+
corruption_types_used, corruption_count, max_steps, seed.
|
| 151 |
+
"""
|
| 152 |
+
rng = random.Random(seed)
|
| 153 |
+
diff = DIFFICULTY_CONFIG.get(difficulty, DIFFICULTY_CONFIG["easy"])
|
| 154 |
+
doc_type = rng.choice(list(DOC_GENERATORS.keys()))
|
| 155 |
+
target = DOC_GENERATORS[doc_type](rng)
|
| 156 |
+
|
| 157 |
+
count = rng.randint(*diff["corruption_count"])
|
| 158 |
+
n_types = min(len(diff["corruption_types"]), rng.randint(1, len(diff["corruption_types"])))
|
| 159 |
+
chosen_types = rng.sample(diff["corruption_types"], k=n_types)
|
| 160 |
+
|
| 161 |
+
source, applied_corruptions, instruction = apply_corruptions(
|
| 162 |
+
rng, target, chosen_types, count
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
return {
|
| 166 |
+
"source": source,
|
| 167 |
+
"target": target,
|
| 168 |
+
"instruction": instruction,
|
| 169 |
+
"doc_type": doc_type,
|
| 170 |
+
"difficulty": difficulty,
|
| 171 |
+
"corruption_types_used": chosen_types,
|
| 172 |
+
"corruption_count": len(applied_corruptions),
|
| 173 |
+
"corruptions": applied_corruptions,
|
| 174 |
+
"max_steps": diff["max_steps"],
|
| 175 |
+
"seed": seed,
|
| 176 |
+
}
|
game/grader.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Grading / similarity computation for document editing tasks."""
|
| 2 |
+
|
| 3 |
+
from difflib import SequenceMatcher
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def compute_similarity(current: str, target: str) -> float:
|
| 7 |
+
"""Normalized SequenceMatcher ratio. Returns 0.0–1.0."""
|
| 8 |
+
if not target:
|
| 9 |
+
return 1.0 if not current else 0.0
|
| 10 |
+
return SequenceMatcher(None, current, target).ratio()
|
inference.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Baseline inference script for DocEdit Game environment.
|
| 3 |
+
Runs an OpenAI-compatible LLM agent against the 3 fixed evaluation tasks.
|
| 4 |
+
|
| 5 |
+
Required env vars:
|
| 6 |
+
API_BASE_URL — LLM API endpoint
|
| 7 |
+
MODEL_NAME — model identifier
|
| 8 |
+
HF_TOKEN — Hugging Face / API key
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import asyncio
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
from typing import List
|
| 15 |
+
|
| 16 |
+
from openai import OpenAI
|
| 17 |
+
|
| 18 |
+
API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
| 19 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
| 20 |
+
API_KEY = os.environ.get("HF_TOKEN", os.environ.get("OPENAI_API_KEY", ""))
|
| 21 |
+
|
| 22 |
+
BENCHMARK = "doc_edit_game"
|
| 23 |
+
TASKS = ["easy", "medium", "hard"]
|
| 24 |
+
SUCCESS_THRESHOLD = 0.90
|
| 25 |
+
IMAGE_NAME = os.environ.get("DOC_EDIT_GAME_IMAGE", "doc_edit_game-env:latest")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def log_start(task: str, env: str, model: str):
|
| 29 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 30 |
+
|
| 31 |
+
def log_step(step: int, action: dict, reward: float, done: bool, error=None):
|
| 32 |
+
err_str = f" error={error}" if error else ""
|
| 33 |
+
print(f"[STEP] step={step} action={json.dumps(action)} reward={reward} done={done}{err_str}", flush=True)
|
| 34 |
+
|
| 35 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]):
|
| 36 |
+
print(f"[END] success={success} steps={steps} score={score} rewards={json.dumps(rewards)}", flush=True)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
SYSTEM_PROMPT = """You are an expert document editor. You receive an XML-tagged document and an edit instruction describing what needs to be fixed.
|
| 40 |
+
|
| 41 |
+
You must respond with a JSON object (no markdown fences) representing ONE edit operation:
|
| 42 |
+
{
|
| 43 |
+
"operation": "replace" | "insert" | "delete" | "format" | "move",
|
| 44 |
+
"target": "exact text to find in the document",
|
| 45 |
+
"content": "replacement or new text",
|
| 46 |
+
"position": -1,
|
| 47 |
+
"format_type": "bold" | "italic" | "uppercase" | "lowercase" | "none"
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
Operations:
|
| 51 |
+
- "replace": find target, replace with content
|
| 52 |
+
- "insert": insert content as new paragraph at position (-1 = end)
|
| 53 |
+
- "delete": delete the line containing target
|
| 54 |
+
- "format": wrap target with format_type tags (bold/italic) or change case (uppercase/lowercase)
|
| 55 |
+
- "move": move paragraph containing target to position
|
| 56 |
+
|
| 57 |
+
Rules:
|
| 58 |
+
- ONE operation per response
|
| 59 |
+
- Use EXACT text from the document for the target field
|
| 60 |
+
- For replace: copy the exact corrupted text as target, put the corrected text as content
|
| 61 |
+
- For format: target is the text to format, format_type specifies how
|
| 62 |
+
- Think step by step about which corruption to fix next
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_model_action(client: OpenAI, document: str, instruction: str, similarity: float, history: List[str]) -> dict:
|
| 67 |
+
history_text = "\n".join(history[-5:]) if history else "No previous actions."
|
| 68 |
+
user_msg = (
|
| 69 |
+
f"Current document:\n{document}\n\n"
|
| 70 |
+
f"Edit instruction: {instruction}\n\n"
|
| 71 |
+
f"Current similarity to target: {similarity:.3f}\n\n"
|
| 72 |
+
f"Recent actions:\n{history_text}\n\n"
|
| 73 |
+
f"Respond with ONE JSON edit operation to improve the document."
|
| 74 |
+
)
|
| 75 |
+
try:
|
| 76 |
+
resp = client.chat.completions.create(
|
| 77 |
+
model=MODEL_NAME,
|
| 78 |
+
messages=[
|
| 79 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 80 |
+
{"role": "user", "content": user_msg},
|
| 81 |
+
],
|
| 82 |
+
temperature=0.0,
|
| 83 |
+
max_tokens=512,
|
| 84 |
+
)
|
| 85 |
+
text = resp.choices[0].message.content.strip()
|
| 86 |
+
if text.startswith("```"):
|
| 87 |
+
text = text.split("\n", 1)[1] if "\n" in text else text[3:]
|
| 88 |
+
if text.endswith("```"):
|
| 89 |
+
text = text[:-3]
|
| 90 |
+
text = text.strip()
|
| 91 |
+
return json.loads(text)
|
| 92 |
+
except Exception as exc:
|
| 93 |
+
print(f"[DEBUG] Model request failed: {exc}", flush=True)
|
| 94 |
+
return {"operation": "replace", "target": "", "content": ""}
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
async def run_task(task_name: str) -> dict:
|
| 98 |
+
from doc_edit_game import DocEditGameAction, DocEditGameEnv
|
| 99 |
+
|
| 100 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 101 |
+
env = await DocEditGameEnv.from_docker_image(IMAGE_NAME)
|
| 102 |
+
|
| 103 |
+
history: List[str] = []
|
| 104 |
+
rewards: List[float] = []
|
| 105 |
+
steps_taken = 0
|
| 106 |
+
score = 0.0
|
| 107 |
+
success = False
|
| 108 |
+
|
| 109 |
+
log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
result = await env.reset(task_name=task_name)
|
| 113 |
+
obs = result.observation
|
| 114 |
+
|
| 115 |
+
max_steps = obs.steps_remaining + obs.edits_made
|
| 116 |
+
|
| 117 |
+
for step in range(1, max_steps + 1):
|
| 118 |
+
if result.done:
|
| 119 |
+
break
|
| 120 |
+
|
| 121 |
+
action_dict = get_model_action(client, obs.document, obs.edit_instruction, obs.similarity, history)
|
| 122 |
+
action = DocEditGameAction(
|
| 123 |
+
operation=action_dict.get("operation", "replace"),
|
| 124 |
+
target=action_dict.get("target", ""),
|
| 125 |
+
content=action_dict.get("content", ""),
|
| 126 |
+
position=action_dict.get("position", -1),
|
| 127 |
+
format_type=action_dict.get("format_type", "none"),
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
result = await env.step(action)
|
| 131 |
+
obs = result.observation
|
| 132 |
+
reward = result.reward or 0.0
|
| 133 |
+
|
| 134 |
+
rewards.append(reward)
|
| 135 |
+
steps_taken = step
|
| 136 |
+
|
| 137 |
+
log_step(step=step, action=action_dict, reward=reward, done=result.done)
|
| 138 |
+
history.append(f"Step {step}: {action_dict.get('operation')} -> reward {reward:+.3f}, sim {obs.similarity:.3f}")
|
| 139 |
+
|
| 140 |
+
if result.done:
|
| 141 |
+
break
|
| 142 |
+
|
| 143 |
+
score = obs.similarity
|
| 144 |
+
success = score >= SUCCESS_THRESHOLD
|
| 145 |
+
|
| 146 |
+
finally:
|
| 147 |
+
try:
|
| 148 |
+
await env.close()
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"[DEBUG] env.close() error: {e}", flush=True)
|
| 151 |
+
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 152 |
+
|
| 153 |
+
return {"task": task_name, "score": score, "success": success, "steps": steps_taken}
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
async def main():
|
| 157 |
+
results = []
|
| 158 |
+
for task in TASKS:
|
| 159 |
+
r = await run_task(task)
|
| 160 |
+
results.append(r)
|
| 161 |
+
print(f"\n{'='*60}", flush=True)
|
| 162 |
+
|
| 163 |
+
print(f"\n{'='*60}")
|
| 164 |
+
print("SUMMARY")
|
| 165 |
+
print(f"{'='*60}")
|
| 166 |
+
for r in results:
|
| 167 |
+
status = "PASS" if r["success"] else "FAIL"
|
| 168 |
+
print(f" [{status}] {r['task']}: score={r['score']:.3f} steps={r['steps']}")
|
| 169 |
+
avg = sum(r["score"] for r in results) / len(results) if results else 0
|
| 170 |
+
print(f" Average score: {avg:.3f}")
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
asyncio.run(main())
|
models.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic models for the DocEdit Game environment."""
|
| 2 |
+
|
| 3 |
+
from typing import List, Optional
|
| 4 |
+
from openenv.core.env_server.types import Action, Observation
|
| 5 |
+
from pydantic import Field
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class DocEditGameAction(Action):
|
| 9 |
+
"""Agent submits one edit operation per step."""
|
| 10 |
+
|
| 11 |
+
operation: str = Field(
|
| 12 |
+
...,
|
| 13 |
+
description="Edit operation: 'replace', 'insert', 'delete', 'format', or 'move'",
|
| 14 |
+
)
|
| 15 |
+
target: str = Field(
|
| 16 |
+
default="",
|
| 17 |
+
description="Text to find in the document (for replace/delete/format/move)",
|
| 18 |
+
)
|
| 19 |
+
content: str = Field(
|
| 20 |
+
default="",
|
| 21 |
+
description="New content (replacement for 'replace', new paragraph for 'insert')",
|
| 22 |
+
)
|
| 23 |
+
position: int = Field(
|
| 24 |
+
default=-1,
|
| 25 |
+
description="Paragraph index for 'insert'/'move' (-1 = append at end)",
|
| 26 |
+
)
|
| 27 |
+
format_type: str = Field(
|
| 28 |
+
default="none",
|
| 29 |
+
description="Formatting type for 'format' operation: 'bold', 'italic', 'underline', 'uppercase', 'lowercase', 'none'",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class DocEditGameObservation(Observation):
|
| 34 |
+
"""Observation returned after each step."""
|
| 35 |
+
|
| 36 |
+
document: str = Field(default="", description="Current document content (XML-tagged)")
|
| 37 |
+
edit_instruction: str = Field(default="", description="Natural language description of required edits")
|
| 38 |
+
similarity: float = Field(default=0.0, description="Similarity to target document (0.0–1.0)")
|
| 39 |
+
task_id: str = Field(default="", description="Unique task identifier (seed-based)")
|
| 40 |
+
task_difficulty: str = Field(default="easy", description="Task difficulty: easy, medium, hard")
|
| 41 |
+
doc_type: str = Field(default="", description="Document type (business_letter, legal_contract, etc.)")
|
| 42 |
+
corruption_types: List[str] = Field(default_factory=list, description="Types of corruptions applied")
|
| 43 |
+
steps_remaining: int = Field(default=0, description="Steps left in this episode")
|
| 44 |
+
edits_made: int = Field(default=0, description="Number of edit actions taken so far")
|
| 45 |
+
edits_estimated: int = Field(default=0, description="Estimated edits needed")
|
openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: doc_edit_game
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
openenv_doc_edit_game.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-doc_edit_game
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Doc Edit Game environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Provides-Extra: dev
|
| 8 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 9 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_doc_edit_game.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
./__init__.py
|
| 4 |
+
./client.py
|
| 5 |
+
./inference.py
|
| 6 |
+
./models.py
|
| 7 |
+
openenv_doc_edit_game.egg-info/PKG-INFO
|
| 8 |
+
openenv_doc_edit_game.egg-info/SOURCES.txt
|
| 9 |
+
openenv_doc_edit_game.egg-info/dependency_links.txt
|
| 10 |
+
openenv_doc_edit_game.egg-info/entry_points.txt
|
| 11 |
+
openenv_doc_edit_game.egg-info/requires.txt
|
| 12 |
+
openenv_doc_edit_game.egg-info/top_level.txt
|
| 13 |
+
server/__init__.py
|
| 14 |
+
server/app.py
|
| 15 |
+
server/doc_edit_game_environment.py
|
openenv_doc_edit_game.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_doc_edit_game.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = doc_edit_game.server.app:main
|
openenv_doc_edit_game.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
|
| 3 |
+
[dev]
|
| 4 |
+
pytest>=8.0.0
|
| 5 |
+
pytest-cov>=4.0.0
|
openenv_doc_edit_game.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
doc_edit_game
|
pyproject.toml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-doc_edit_game"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Doc Edit Game environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
# "numpy>=1.19.0",
|
| 25 |
+
# "torch>=2.0.0",
|
| 26 |
+
# "gymnasium>=0.29.0",
|
| 27 |
+
# "openspiel>=1.0.0",
|
| 28 |
+
# "smolagents>=1.22.0,<2",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
[project.optional-dependencies]
|
| 32 |
+
dev = [
|
| 33 |
+
"pytest>=8.0.0",
|
| 34 |
+
"pytest-cov>=4.0.0",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.scripts]
|
| 38 |
+
# Server entry point - enables running via: uv run --project . server
|
| 39 |
+
# or: python -m doc_edit_game.server.app
|
| 40 |
+
server = "doc_edit_game.server.app:main"
|
| 41 |
+
|
| 42 |
+
[tool.setuptools]
|
| 43 |
+
include-package-data = true
|
| 44 |
+
packages = ["doc_edit_game", "doc_edit_game.server"]
|
| 45 |
+
package-dir = { "doc_edit_game" = ".", "doc_edit_game.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Doc Edit Game environment server components."""
|
| 8 |
+
|
| 9 |
+
from .doc_edit_game_environment import DocEditGameEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["DocEditGameEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI application for the DocEdit Game Environment."""
|
| 2 |
+
|
| 3 |
+
try:
|
| 4 |
+
from openenv.core.env_server.http_server import create_app
|
| 5 |
+
except Exception as e:
|
| 6 |
+
raise ImportError("openenv is required. Install with: uv sync") from e
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from ..models import DocEditGameAction, DocEditGameObservation
|
| 10 |
+
from .doc_edit_game_environment import DocEditGameEnvironment
|
| 11 |
+
except (ImportError, ModuleNotFoundError):
|
| 12 |
+
from models import DocEditGameAction, DocEditGameObservation
|
| 13 |
+
from server.doc_edit_game_environment import DocEditGameEnvironment
|
| 14 |
+
|
| 15 |
+
app = create_app(
|
| 16 |
+
DocEditGameEnvironment,
|
| 17 |
+
DocEditGameAction,
|
| 18 |
+
DocEditGameObservation,
|
| 19 |
+
env_name="doc_edit_game",
|
| 20 |
+
max_concurrent_envs=4,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 25 |
+
import uvicorn
|
| 26 |
+
uvicorn.run(app, host=host, port=port)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
main()
|
server/doc_edit_game_environment.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DocEdit Game Environment — procedurally generated document editing challenges.
|
| 3 |
+
|
| 4 |
+
Agents face randomized document editing tasks: fix spelling, case, names,
|
| 5 |
+
punctuation, formatting, and structural issues across diverse document types.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import re
|
| 9 |
+
from typing import Any, Optional
|
| 10 |
+
from uuid import uuid4
|
| 11 |
+
|
| 12 |
+
from openenv.core.env_server.interfaces import Environment
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from ..models import DocEditGameAction, DocEditGameObservation
|
| 17 |
+
from ..game.generator import generate_task
|
| 18 |
+
from ..game.grader import compute_similarity
|
| 19 |
+
except ImportError:
|
| 20 |
+
from models import DocEditGameAction, DocEditGameObservation
|
| 21 |
+
from game.generator import generate_task
|
| 22 |
+
from game.grader import compute_similarity
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Three fixed tasks for competition evaluation (deterministic seeds)
|
| 26 |
+
FIXED_TASKS = {
|
| 27 |
+
"easy": {"seed": 1001, "difficulty": "easy"},
|
| 28 |
+
"medium": {"seed": 2002, "difficulty": "medium"},
|
| 29 |
+
"hard": {"seed": 3003, "difficulty": "hard"},
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class DocEditGameEnvironment(Environment):
|
| 34 |
+
"""
|
| 35 |
+
Procedurally generated document editing RL environment.
|
| 36 |
+
|
| 37 |
+
On reset, generates a random document with intentional corruptions.
|
| 38 |
+
Agent must reverse the corruptions to match the hidden target.
|
| 39 |
+
Supports both fixed tasks (for evaluation) and procedural generation (for training).
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 43 |
+
|
| 44 |
+
def __init__(self):
|
| 45 |
+
self._document = ""
|
| 46 |
+
self._target = ""
|
| 47 |
+
self._instruction = ""
|
| 48 |
+
self._task_info: dict = {}
|
| 49 |
+
self._task_id = ""
|
| 50 |
+
self._prev_similarity = 0.0
|
| 51 |
+
self._max_steps = 15
|
| 52 |
+
self._edits_made = 0
|
| 53 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 54 |
+
|
| 55 |
+
def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditGameObservation:
|
| 56 |
+
# Support named task selection for evaluation
|
| 57 |
+
task_name = kwargs.get("task_name", "")
|
| 58 |
+
difficulty = kwargs.get("difficulty", "easy")
|
| 59 |
+
|
| 60 |
+
if task_name in FIXED_TASKS:
|
| 61 |
+
cfg = FIXED_TASKS[task_name]
|
| 62 |
+
seed = cfg["seed"]
|
| 63 |
+
difficulty = cfg["difficulty"]
|
| 64 |
+
elif seed is None:
|
| 65 |
+
seed = hash(uuid4()) & 0xFFFFFFFF
|
| 66 |
+
|
| 67 |
+
task = generate_task(seed=seed, difficulty=difficulty)
|
| 68 |
+
self._document = task["source"]
|
| 69 |
+
self._target = task["target"]
|
| 70 |
+
self._instruction = task["instruction"]
|
| 71 |
+
self._task_info = task
|
| 72 |
+
self._max_steps = task["max_steps"]
|
| 73 |
+
self._edits_made = 0
|
| 74 |
+
self._task_id = f"seed_{seed}_{difficulty}"
|
| 75 |
+
self._prev_similarity = compute_similarity(self._document, self._target)
|
| 76 |
+
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
|
| 77 |
+
|
| 78 |
+
return DocEditGameObservation(
|
| 79 |
+
document=self._document,
|
| 80 |
+
edit_instruction=self._instruction,
|
| 81 |
+
similarity=self._prev_similarity,
|
| 82 |
+
task_id=self._task_id,
|
| 83 |
+
task_difficulty=difficulty,
|
| 84 |
+
doc_type=task["doc_type"],
|
| 85 |
+
corruption_types=task["corruption_types_used"],
|
| 86 |
+
steps_remaining=self._max_steps,
|
| 87 |
+
edits_made=0,
|
| 88 |
+
edits_estimated=task["corruption_count"],
|
| 89 |
+
done=False,
|
| 90 |
+
reward=0.0,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
def step(self, action: DocEditGameAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditGameObservation:
|
| 94 |
+
self._state.step_count += 1
|
| 95 |
+
self._edits_made += 1
|
| 96 |
+
op = action.operation.lower().strip()
|
| 97 |
+
noop = False
|
| 98 |
+
|
| 99 |
+
if op == "replace" and action.target:
|
| 100 |
+
if action.target in self._document:
|
| 101 |
+
self._document = self._document.replace(action.target, action.content, 1)
|
| 102 |
+
else:
|
| 103 |
+
noop = True
|
| 104 |
+
|
| 105 |
+
elif op == "insert":
|
| 106 |
+
paragraphs = self._document.split("\n")
|
| 107 |
+
new_para = action.content
|
| 108 |
+
if not (new_para.startswith("<p>") or new_para.startswith("<heading")):
|
| 109 |
+
new_para = f"<p>{new_para}</p>"
|
| 110 |
+
pos = action.position
|
| 111 |
+
if pos < 0 or pos >= len(paragraphs):
|
| 112 |
+
paragraphs.append(new_para)
|
| 113 |
+
else:
|
| 114 |
+
paragraphs.insert(pos, new_para)
|
| 115 |
+
self._document = "\n".join(paragraphs)
|
| 116 |
+
|
| 117 |
+
elif op == "delete" and action.target:
|
| 118 |
+
lines = self._document.split("\n")
|
| 119 |
+
new_lines = [l for l in lines if action.target not in l]
|
| 120 |
+
if len(new_lines) == len(lines):
|
| 121 |
+
noop = True
|
| 122 |
+
self._document = "\n".join(new_lines)
|
| 123 |
+
|
| 124 |
+
elif op == "format" and action.target:
|
| 125 |
+
fmt = action.format_type.lower()
|
| 126 |
+
if action.target in self._document:
|
| 127 |
+
if fmt == "bold":
|
| 128 |
+
self._document = self._document.replace(action.target, f"<bold>{action.target}</bold>", 1)
|
| 129 |
+
elif fmt == "italic":
|
| 130 |
+
self._document = self._document.replace(action.target, f"<italic>{action.target}</italic>", 1)
|
| 131 |
+
elif fmt == "underline":
|
| 132 |
+
self._document = self._document.replace(action.target, f"<underline>{action.target}</underline>", 1)
|
| 133 |
+
elif fmt == "uppercase":
|
| 134 |
+
self._document = self._document.replace(action.target, action.target.upper(), 1)
|
| 135 |
+
elif fmt == "lowercase":
|
| 136 |
+
self._document = self._document.replace(action.target, action.target.lower(), 1)
|
| 137 |
+
else:
|
| 138 |
+
noop = True
|
| 139 |
+
else:
|
| 140 |
+
noop = True
|
| 141 |
+
|
| 142 |
+
elif op == "move" and action.target:
|
| 143 |
+
lines = self._document.split("\n")
|
| 144 |
+
source_idx = None
|
| 145 |
+
for i, l in enumerate(lines):
|
| 146 |
+
if action.target in l:
|
| 147 |
+
source_idx = i
|
| 148 |
+
break
|
| 149 |
+
if source_idx is not None:
|
| 150 |
+
moved_line = lines.pop(source_idx)
|
| 151 |
+
pos = action.position
|
| 152 |
+
if pos < 0 or pos >= len(lines):
|
| 153 |
+
lines.append(moved_line)
|
| 154 |
+
else:
|
| 155 |
+
lines.insert(pos, moved_line)
|
| 156 |
+
self._document = "\n".join(lines)
|
| 157 |
+
else:
|
| 158 |
+
noop = True
|
| 159 |
+
else:
|
| 160 |
+
noop = True
|
| 161 |
+
|
| 162 |
+
new_sim = compute_similarity(self._document, self._target)
|
| 163 |
+
reward = new_sim - self._prev_similarity
|
| 164 |
+
|
| 165 |
+
if noop:
|
| 166 |
+
reward -= 0.01 # penalty for wasted step
|
| 167 |
+
|
| 168 |
+
self._prev_similarity = new_sim
|
| 169 |
+
steps_left = self._max_steps - self._state.step_count
|
| 170 |
+
done = (new_sim >= 0.999) or (steps_left <= 0)
|
| 171 |
+
|
| 172 |
+
if new_sim >= 0.999:
|
| 173 |
+
reward += 0.5
|
| 174 |
+
|
| 175 |
+
return DocEditGameObservation(
|
| 176 |
+
document=self._document,
|
| 177 |
+
edit_instruction=self._instruction,
|
| 178 |
+
similarity=new_sim,
|
| 179 |
+
task_id=self._task_id,
|
| 180 |
+
task_difficulty=self._task_info.get("difficulty", "easy"),
|
| 181 |
+
doc_type=self._task_info.get("doc_type", ""),
|
| 182 |
+
corruption_types=self._task_info.get("corruption_types_used", []),
|
| 183 |
+
steps_remaining=max(steps_left, 0),
|
| 184 |
+
edits_made=self._edits_made,
|
| 185 |
+
edits_estimated=self._task_info.get("corruption_count", 0),
|
| 186 |
+
done=done,
|
| 187 |
+
reward=round(reward, 4),
|
| 188 |
+
metadata={
|
| 189 |
+
"step": self._state.step_count,
|
| 190 |
+
"operation": op,
|
| 191 |
+
"noop": noop,
|
| 192 |
+
"exact_match": new_sim >= 0.999,
|
| 193 |
+
},
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
@property
|
| 197 |
+
def state(self) -> State:
|
| 198 |
+
return self._state
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|