Spaces:

sanjuhs
/

doc-edit-game

Sleeping

App Files Files Community

sanjuhs commited on Apr 7

Commit

df3ca83

verified ·

1 Parent(s): 8b597a0

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

Dockerfile +81 -0
README.md +126 -5
__init__.py +10 -0
client.py +51 -0
game/__init__.py +3 -0
game/content_pools.py +190 -0
game/corruptions.py +251 -0
game/generator.py +176 -0
game/grader.py +10 -0
inference.py +174 -0
models.py +45 -0
openenv.yaml +7 -0
openenv_doc_edit_game.egg-info/PKG-INFO +9 -0
openenv_doc_edit_game.egg-info/SOURCES.txt +15 -0
openenv_doc_edit_game.egg-info/dependency_links.txt +1 -0
openenv_doc_edit_game.egg-info/entry_points.txt +2 -0
openenv_doc_edit_game.egg-info/requires.txt +5 -0
openenv_doc_edit_game.egg-info/top_level.txt +1 -0
pyproject.toml +45 -0
server/__init__.py +11 -0
server/app.py +30 -0
server/doc_edit_game_environment.py +198 -0
server/requirements.txt +6 -0
uv.lock +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Ensure git is available (required for installing dependencies from VCS)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=doc_edit_game
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv is already vendored in the build context
+# For standalone builds, openenv will be installed via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+ENV ENABLE_WEB_INTERFACE=true
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

README.md CHANGED Viewed

@@ -1,10 +1,131 @@
 ---
-title: Doc Edit Game
-emoji: 🌖
-colorFrom: gray
-colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DocEdit Game Environment
+emoji: 📝
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
 ---
+# DocEdit Game — Procedural Document Editing RL Environment
+A production-grade OpenEnv environment where AI agents learn to edit structured documents through a game-like interface. Documents are procedurally generated with random corruptions that the agent must reverse — spelling errors, case issues, name swaps, missing punctuation, structural changes, and formatting problems.
+## Why This Matters
+Document editing is one of the most common knowledge-work tasks on earth. Legal redlining, copy-editing, compliance patching, report formatting — billions of edits per day. This environment trains "applicator models" that can reliably execute editing instructions on structured documents.
+## Game Mechanics
+1. **Reset**: Environment generates a random document (business letter, legal contract, memo, technical report, or resume) and applies random corruptions
+2. **Observe**: Agent receives the corrupted document + natural language edit instruction describing what needs fixing
+3. **Act**: Agent submits one edit operation per step (replace, insert, delete, format, move)
+4. **Reward**: Incremental similarity improvement to the hidden target document
+5. **Win**: Achieve similarity ≥ 0.999 to complete the task
+### Procedural Generation
+Every task is unique — documents, corruptions, and instructions are generated from seeds. `reset(seed=42)` always produces the same task for reproducibility.
+### 5 Document Types
+- **Business letters** — formal correspondence with sender/recipient/body
+- **Legal contracts** — service agreements with recitals, clauses, signatures
+- **Office memos** — internal communications with subject/body/action items
+- **Technical reports** — engineering reports with findings/recommendations
+- **Resumes** — professional CVs with experience/education/skills
+### 6 Corruption Types
+| Type | What It Does | Example |
+|------|-------------|---------|
+| Spelling | Swaps words with common misspellings | "receive" → "recieve" |
+| Case | Wrong capitalization | "MEMORANDUM" → "memorandum" |
+| Names | Swaps person/company names | "James" → "Robert" |
+| Punctuation | Removes/adds punctuation | Missing period at end of sentence |
+| Content | Deletes paragraphs or adds junk | Missing clause in contract |
+| Formatting | Strips bold/italic tags | `<bold>Important</bold>` → `Important` |
+## Action Space
+| Field | Type | Description |
+|-------|------|-------------|
+| `operation` | str | `"replace"`, `"insert"`, `"delete"`, `"format"`, `"move"` |
+| `target` | str | Text to find in document |
+| `content` | str | Replacement/new text |
+| `position` | int | Paragraph index for insert/move (-1 = end) |
+| `format_type` | str | `"bold"`, `"italic"`, `"uppercase"`, `"lowercase"`, `"none"` |
+## Observation Space
+| Field | Type | Description |
+|-------|------|-------------|
+| `document` | str | Current document (XML-tagged paragraphs) |
+| `edit_instruction` | str | Natural language description of edits needed |
+| `similarity` | float | Similarity to target (0.0–1.0) |
+| `task_difficulty` | str | easy / medium / hard |
+| `doc_type` | str | Document template type |
+| `corruption_types` | list | Which corruption types were applied |
+| `steps_remaining` | int | Steps left |
+| `edits_estimated` | int | Estimated edits needed |
+## 3 Fixed Tasks (for evaluation)
+| Task | Difficulty | Max Steps | Corruption Types |
+|------|-----------|-----------|-----------------|
+| `easy` | easy | 15 | spelling, case |
+| `medium` | medium | 25 | spelling, case, names, punctuation |
+| `hard` | hard | 40 | all types |
+## Reward Design
+```
+reward = similarity_after - similarity_before    # incremental
+if exact_match: reward += 0.5                    # completion bonus
+if noop: reward -= 0.01                          # wasted step penalty
+```
+## Quick Start
+```bash
+uv sync
+uvicorn server.app:app --reload --host 0.0.0.0 --port 8001
+# Or Docker
+docker build -t doc_edit_game-env:latest -f server/Dockerfile .
+docker run -p 8000:8000 doc_edit_game-env:latest
+```
+## Run Inference
+```bash
+export API_BASE_URL="https://api.openai.com/v1"
+export MODEL_NAME="gpt-4o-mini"
+export HF_TOKEN="your-key"
+python inference.py
+```
+## Project Structure
+```
+doc_edit_game/
+├── openenv.yaml
+├── pyproject.toml
+├── README.md
+├── inference.py
+├── models.py
+├── client.py
+├── __init__.py
+├── game/
+│   ├── __init__.py
+│   ├── generator.py        # Procedural document + task generation
+│   ├── corruptions.py      # 6 corruption types
+│   ├── grader.py            # Similarity scoring
+│   └── content_pools.py    # Names, phrases, misspellings, templates
+└── server/
+    ├── doc_edit_game_environment.py
+    ├── app.py
+    └── Dockerfile
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""DocEdit Game Environment."""
+from .client import DocEditGameEnv
+from .models import DocEditGameAction, DocEditGameObservation
+__all__ = [
+    "DocEditGameAction",
+    "DocEditGameObservation",
+    "DocEditGameEnv",
+]

client.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""DocEdit Game Environment Client."""
+from typing import Dict
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import State
+from .models import DocEditGameAction, DocEditGameObservation
+class DocEditGameEnv(EnvClient[DocEditGameAction, DocEditGameObservation, State]):
+    """WebSocket client for the DocEdit Game environment."""
+    def _step_payload(self, action: DocEditGameAction) -> Dict:
+        return {
+            "operation": action.operation,
+            "target": action.target,
+            "content": action.content,
+            "position": action.position,
+            "format_type": action.format_type,
+        }
+    def _parse_result(self, payload: Dict) -> StepResult[DocEditGameObservation]:
+        obs_data = payload.get("observation", {})
+        observation = DocEditGameObservation(
+            document=obs_data.get("document", ""),
+            edit_instruction=obs_data.get("edit_instruction", ""),
+            similarity=obs_data.get("similarity", 0.0),
+            task_id=obs_data.get("task_id", ""),
+            task_difficulty=obs_data.get("task_difficulty", "easy"),
+            doc_type=obs_data.get("doc_type", ""),
+            corruption_types=obs_data.get("corruption_types", []),
+            steps_remaining=obs_data.get("steps_remaining", 0),
+            edits_made=obs_data.get("edits_made", 0),
+            edits_estimated=obs_data.get("edits_estimated", 0),
+            done=payload.get("done", False),
+            reward=payload.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> State:
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )

game/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .generator import generate_task
+from .corruptions import apply_corruptions
+from .grader import compute_similarity

game/content_pools.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""Content pools for procedural document generation."""
+import random as _random
+FIRST_NAMES = [
+    "James", "Sarah", "Michael", "Emily", "David", "Jennifer", "Robert", "Maria",
+    "William", "Elizabeth", "Richard", "Patricia", "Thomas", "Linda", "Charles",
+    "Barbara", "Daniel", "Susan", "Matthew", "Jessica", "Anthony", "Karen",
+    "Andrew", "Nancy", "Christopher", "Lisa", "Joseph", "Margaret", "Steven", "Dorothy",
+]
+LAST_NAMES = [
+    "Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
+    "Rodriguez", "Martinez", "Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson",
+    "Thomas", "Taylor", "Moore", "Jackson", "Martin", "Lee", "Perez", "Thompson",
+    "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
+]
+COMPANY_NAMES = [
+    "Acme Corporation", "GlobalTech Solutions", "Summit Industries", "Vertex Partners",
+    "Pinnacle Holdings", "Atlas Dynamics", "Meridian Group", "Cascade Systems",
+    "Horizon Enterprises", "Sterling Consulting", "Nexus Financial", "Vanguard Legal",
+    "Pacific Ventures", "Continental Services", "Apex Analytics",
+]
+CITIES = [
+    "New York", "San Francisco", "Chicago", "Los Angeles", "Seattle", "Boston",
+    "Austin", "Denver", "Miami", "Washington DC", "Portland", "Atlanta",
+    "Dallas", "Philadelphia", "Minneapolis",
+]
+DATES = [
+    "January 2026", "February 2026", "March 2026", "April 2026", "May 2026",
+    "June 2025", "July 2025", "August 2025", "September 2025", "October 2025",
+    "November 2025", "December 2025", "January 2025", "March 2025", "June 2024",
+]
+DOLLAR_AMOUNTS = [
+    "$50,000", "$100,000", "$250,000", "$500,000", "$750,000",
+    "$1,000,000", "$1,500,000", "$2,000,000", "$5,000,000", "$10,000,000",
+]
+SUBJECTS = [
+    "Updated Remote Work Policy", "Q3 Performance Review", "Annual Budget Proposal",
+    "Client Engagement Strategy", "Office Relocation Plan", "Software Upgrade Timeline",
+    "Employee Training Program", "Marketing Campaign Launch", "Vendor Contract Renewal",
+    "Data Security Protocol Update", "Team Restructuring Announcement",
+    "Holiday Schedule Notification", "New Hire Onboarding Process",
+]
+LEGAL_CLAUSE_TEMPLATES = [
+    "The {party} shall deliver all materials within {days} business days of execution.",
+    "Payment of {amount} shall be made in {installments} equal installments.",
+    "Either party may terminate this agreement with {days} days written notice.",
+    "All intellectual property created during the term shall remain the property of {party}.",
+    "The {party} agrees to maintain confidentiality of all proprietary information.",
+    "This agreement shall be governed by the laws of the State of {state}.",
+    "Any disputes arising under this agreement shall be resolved through binding arbitration.",
+    "The {party} shall indemnify and hold harmless the other party against all claims.",
+    "Force majeure events shall excuse performance for the duration of the event.",
+    "Neither party may assign this agreement without prior written consent.",
+]
+BUSINESS_PARAGRAPHS = [
+    "We are pleased to inform you that the quarterly targets have been exceeded by {percent}%. The team's dedication to excellence has been instrumental in achieving these results.",
+    "Following our recent review, we recommend implementing the proposed changes effective {date}. This will ensure alignment with our strategic objectives for the fiscal year.",
+    "The analysis of current market conditions indicates a strong opportunity for expansion into the {region} market. Our competitive positioning remains favorable.",
+    "Please ensure all relevant documentation is submitted to the {department} department by {date}. Late submissions may result in processing delays.",
+    "The board of directors has approved the allocation of {amount} for the proposed initiative. Implementation is expected to begin in {date}.",
+    "Customer satisfaction metrics have shown a {percent}% improvement over the previous quarter. This trend is attributed to our enhanced service delivery framework.",
+    "The proposed timeline for project completion is {days} days from the date of approval. Key milestones will be tracked through our project management system.",
+    "We have identified several areas for operational improvement including process automation, resource optimization, and enhanced quality controls.",
+    "The annual compliance review has been completed with no significant findings. All regulatory requirements have been met as of the reporting date.",
+    "Stakeholder feedback has been overwhelmingly positive regarding the new initiative. We will continue to refine our approach based on ongoing input.",
+]
+TECHNICAL_PARAGRAPHS = [
+    "The system architecture employs a microservices pattern with {count} independent services communicating via REST APIs and message queues.",
+    "Performance benchmarks indicate average response times of {ms}ms under standard load conditions. Stress testing showed graceful degradation at {percent}% capacity.",
+    "Database migration from the legacy system was completed with zero data loss. The new schema supports {count}x faster query performance.",
+    "Security audit findings have been addressed in this release. All critical vulnerabilities have been patched and verified through penetration testing.",
+    "The deployment pipeline now supports automated rollbacks within {minutes} minutes of failure detection. Monitoring coverage has been extended to all production endpoints.",
+    "Code review analysis shows a {percent}% reduction in defect density compared to the previous release cycle. Test coverage stands at {coverage}%.",
+]
+RESUME_ACHIEVEMENTS = [
+    "Led cross-functional team of {count} to deliver {project} {percent}% under budget",
+    "Increased revenue by {amount} through strategic partnership development",
+    "Reduced operational costs by {percent}% through process automation initiatives",
+    "Managed portfolio of {count} client accounts totaling {amount} in annual revenue",
+    "Implemented new {system} system resulting in {percent}% efficiency improvement",
+    "Spearheaded company-wide digital transformation initiative across {count} departments",
+    "Negotiated contracts worth {amount} with key enterprise clients",
+    "Developed and launched {count} products generating {amount} in first-year revenue",
+]
+SKILLS_LISTS = [
+    "Python, JavaScript, SQL, Docker, Kubernetes, AWS, GCP",
+    "Project Management, Agile, Scrum, JIRA, Confluence, Stakeholder Management",
+    "Financial Analysis, Excel Modeling, Bloomberg Terminal, Risk Assessment",
+    "Contract Negotiation, Regulatory Compliance, Due Diligence, M&A Advisory",
+    "Machine Learning, TensorFlow, PyTorch, NLP, Computer Vision, MLOps",
+    "Marketing Strategy, SEO, Content Marketing, Google Analytics, HubSpot",
+]
+UNIVERSITIES = [
+    "Stanford University", "MIT", "Harvard University", "UC Berkeley",
+    "Columbia University", "University of Chicago", "Yale University",
+    "Princeton University", "University of Michigan", "Georgia Tech",
+    "Carnegie Mellon University", "University of Texas at Austin",
+]
+DEGREES = [
+    "Bachelor of Science in Computer Science",
+    "Master of Business Administration",
+    "Juris Doctor",
+    "Bachelor of Arts in Economics",
+    "Master of Science in Data Science",
+    "Bachelor of Engineering in Electrical Engineering",
+    "Master of Arts in Communication",
+    "Doctor of Philosophy in Physics",
+]
+# Common misspellings: correct → misspelled
+MISSPELLINGS = {
+    "receive": "recieve", "management": "managment", "definitely": "definately",
+    "separate": "seperate", "occurrence": "occurence", "accommodate": "accomodate",
+    "necessary": "neccessary", "environment": "enviroment", "government": "goverment",
+    "professional": "proffesional", "recommend": "reccomend", "maintenance": "maintainance",
+    "independent": "independant", "committee": "commitee", "assessment": "assesment",
+    "achievement": "achievment", "development": "developement", "immediately": "immediatly",
+    "experience": "experiance", "performance": "preformance", "agreement": "agremeent",
+    "department": "departmnet", "implementation": "implemenation", "comprehensive": "comperhensive",
+    "communication": "comunication", "approximately": "approximatly", "significant": "signifcant",
+    "responsibility": "responsibilty", "opportunity": "oppertunity", "requirements": "requirments",
+    "acquisition": "aquisition", "beneficial": "benefical", "competitive": "competative",
+    "consistency": "consistancy", "corporation": "corparation", "efficiency": "effeciency",
+    "guarantee": "gaurantee", "infrastructure": "infastructure", "preliminary": "prelimanary",
+    "recognition": "reconition", "regulatory": "regulatary", "specifically": "specificaly",
+    "sufficient": "sufficent", "technical": "techincal", "transformation": "tranformation",
+    "compliance": "complience", "quarterly": "quartely", "delivery": "delivrey",
+    "schedule": "scedule", "revenue": "revnue", "analysis": "anaylsis",
+}
+# Alternate names for entity swap corruptions
+ALTERNATE_NAMES = {
+    "James": "Robert", "Sarah": "Jennifer", "Michael": "William", "Emily": "Patricia",
+    "David": "Thomas", "Jennifer": "Sarah", "Robert": "James", "Maria": "Linda",
+}
+ALTERNATE_COMPANIES = {
+    "Acme Corporation": "Beta Industries", "GlobalTech Solutions": "LocalTech Services",
+    "Summit Industries": "Valley Enterprises", "Vertex Partners": "Edge Associates",
+}
+STATES = [
+    "California", "New York", "Texas", "Delaware", "Massachusetts",
+    "Illinois", "Florida", "Washington", "Colorado", "Georgia",
+]
+def pick(rng: _random.Random, pool: list) -> str:
+    return rng.choice(pool)
+def full_name(rng: _random.Random) -> str:
+    return f"{pick(rng, FIRST_NAMES)} {pick(rng, LAST_NAMES)}"
+def fill_template(rng: _random.Random, template: str) -> str:
+    """Fill {placeholders} in a template string with random content."""
+    replacements = {
+        "party": f"the {pick(rng, ['Vendor', 'Client', 'Contractor', 'Licensee'])}",
+        "days": str(rng.choice([5, 10, 15, 20, 30, 45, 60, 90])),
+        "amount": pick(rng, DOLLAR_AMOUNTS),
+        "installments": str(rng.choice([2, 3, 4, 6, 12])),
+        "state": pick(rng, STATES),
+        "percent": str(rng.randint(5, 45)),
+        "date": pick(rng, DATES),
+        "region": pick(rng, ["Northeast", "Pacific Northwest", "Southeast", "Midwest", "Southwest"]),
+        "department": pick(rng, ["Finance", "Operations", "Legal", "Human Resources", "Engineering"]),
+        "count": str(rng.randint(3, 25)),
+        "ms": str(rng.randint(15, 350)),
+        "minutes": str(rng.randint(2, 15)),
+        "coverage": str(rng.randint(75, 98)),
+        "project": pick(rng, ["CRM migration", "API platform", "data pipeline", "mobile app"]),
+        "system": pick(rng, ["ERP", "CRM", "HRIS", "BI", "inventory management"]),
+    }
+    result = template
+    for key, value in replacements.items():
+        result = result.replace("{" + key + "}", value)
+    return result

game/corruptions.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""Corruption engine — applies reversible corruptions to target documents to create source documents."""
+import random
+import re
+from typing import List, Tuple
+from .content_pools import (
+    ALTERNATE_COMPANIES, ALTERNATE_NAMES, FIRST_NAMES, LAST_NAMES,
+    MISSPELLINGS, full_name, pick,
+)
+def _find_words_in_doc(document: str) -> List[str]:
+    """Extract unique words (4+ chars, alphabetic) from the document text (ignoring tags)."""
+    text_only = re.sub(r"<[^>]+>", " ", document)
+    words = re.findall(r"\b[a-zA-Z]{4,}\b", text_only)
+    return list(set(words))
+def _corrupt_spelling(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Introduce spelling errors by swapping known words with misspelled versions."""
+    doc_words = _find_words_in_doc(document)
+    corruptible = [w for w in doc_words if w.lower() in MISSPELLINGS]
+    rng.shuffle(corruptible)
+    applied = []
+    result = document
+    for word in corruptible[:count]:
+        misspelled = MISSPELLINGS[word.lower()]
+        # Preserve original case
+        if word[0].isupper():
+            misspelled = misspelled[0].upper() + misspelled[1:]
+        if word in result:
+            result = result.replace(word, misspelled, 1)
+            applied.append({"type": "spelling", "original": word, "corrupted": misspelled})
+    return result, applied
+def _corrupt_case(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Introduce case errors — lowercase headings, uppercase random words, etc."""
+    lines = document.split("\n")
+    applied = []
+    indices = list(range(len(lines)))
+    rng.shuffle(indices)
+    for idx in indices:
+        if len(applied) >= count:
+            break
+        line = lines[idx]
+        # Lowercase a heading
+        heading_match = re.match(r'(<heading[^>]*>)(.*?)(</heading>)', line)
+        if heading_match and heading_match.group(2) == heading_match.group(2).upper():
+            original = heading_match.group(2)
+            corrupted = original.lower()
+            lines[idx] = line.replace(original, corrupted, 1)
+            applied.append({"type": "case", "original": original, "corrupted": corrupted, "line": idx})
+            continue
+        # Randomly uppercase a normal word in a paragraph
+        if "<p>" in line:
+            words = re.findall(r"\b[a-z]{4,}\b", line)
+            if words:
+                word = rng.choice(words)
+                corrupted = word.upper()
+                lines[idx] = line.replace(word, corrupted, 1)
+                applied.append({"type": "case", "original": word, "corrupted": corrupted, "line": idx})
+    return "\n".join(lines), applied
+def _corrupt_names(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Swap person/company names with alternates."""
+    applied = []
+    result = document
+    # Find first names in the doc
+    for name in FIRST_NAMES:
+        if len(applied) >= count:
+            break
+        if name in result and name in ALTERNATE_NAMES:
+            alt = ALTERNATE_NAMES[name]
+            result = result.replace(name, alt, 1)
+            applied.append({"type": "name", "original": name, "corrupted": alt})
+    # Company names
+    for company, alt in ALTERNATE_COMPANIES.items():
+        if len(applied) >= count:
+            break
+        if company in result:
+            result = result.replace(company, alt, 1)
+            applied.append({"type": "name", "original": company, "corrupted": alt})
+    return result, applied
+def _corrupt_punctuation(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Remove or alter punctuation — drop periods, add extra commas, etc."""
+    lines = document.split("\n")
+    applied = []
+    indices = list(range(len(lines)))
+    rng.shuffle(indices)
+    for idx in indices:
+        if len(applied) >= count:
+            break
+        line = lines[idx]
+        if not line.startswith("<p>"):
+            continue
+        # Remove trailing period before </p>
+        if line.endswith(".</p>"):
+            lines[idx] = line[:-5] + "</p>"
+            applied.append({"type": "punctuation", "action": "removed_period", "line": idx})
+        elif ", " in line and rng.random() < 0.5:
+            # Remove a comma
+            pos = line.index(", ")
+            lines[idx] = line[:pos] + line[pos+1:]  # remove the comma, keep space
+            applied.append({"type": "punctuation", "action": "removed_comma", "line": idx})
+    return "\n".join(lines), applied
+def _corrupt_content(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Structural corruptions — delete paragraphs, add junk, reorder."""
+    lines = document.split("\n")
+    applied = []
+    # Only corrupt <p> lines, not headings
+    p_indices = [i for i, l in enumerate(lines) if l.startswith("<p>") and len(l) > 20]
+    rng.shuffle(p_indices)
+    for idx in p_indices:
+        if len(applied) >= count:
+            break
+        if rng.random() < 0.6:
+            # Delete a paragraph
+            deleted = lines[idx]
+            lines[idx] = ""  # mark for removal
+            applied.append({"type": "content", "action": "deleted", "line": idx, "text": deleted})
+        else:
+            # Add junk paragraph after this line
+            junk = "<p>THIS PARAGRAPH SHOULD NOT BE HERE AND MUST BE REMOVED.</p>"
+            lines.insert(idx + 1, junk)
+            applied.append({"type": "content", "action": "added_junk", "after_line": idx, "text": junk})
+            break  # inserting shifts indices, stop after one
+    # Clean up empty lines from deletions
+    lines = [l for l in lines if l != ""]
+    return "\n".join(lines), applied
+def _corrupt_formatting(rng: random.Random, document: str, count: int) -> Tuple[str, List[dict]]:
+    """Remove or misapply formatting tags — strip <bold>, <italic>, etc."""
+    applied = []
+    result = document
+    # Find existing bold tags and strip them
+    bold_matches = list(re.finditer(r"<bold>(.*?)</bold>", result))
+    rng.shuffle(bold_matches)
+    for match in bold_matches[:count]:
+        if len(applied) >= count:
+            break
+        original = match.group(0)
+        stripped = match.group(1)
+        if original in result:
+            result = result.replace(original, stripped, 1)
+            applied.append({"type": "formatting", "action": "stripped_bold", "text": stripped})
+    return result, applied
+CORRUPTION_FUNCTIONS = {
+    "spelling": _corrupt_spelling,
+    "case": _corrupt_case,
+    "names": _corrupt_names,
+    "punctuation": _corrupt_punctuation,
+    "content": _corrupt_content,
+    "formatting": _corrupt_formatting,
+}
+def apply_corruptions(
+    rng: random.Random,
+    target: str,
+    corruption_types: List[str],
+    total_count: int,
+) -> Tuple[str, List[dict], str]:
+    """
+    Apply corruptions to a target document to create a source document.
+    Returns: (corrupted_source, list_of_corruptions, natural_language_instruction)
+    """
+    per_type = max(1, total_count // len(corruption_types))
+    remainder = total_count - per_type * len(corruption_types)
+    all_corruptions = []
+    source = target
+    for i, ctype in enumerate(corruption_types):
+        count = per_type + (1 if i < remainder else 0)
+        fn = CORRUPTION_FUNCTIONS.get(ctype)
+        if fn:
+            source, corruptions = fn(rng, source, count)
+            all_corruptions.extend(corruptions)
+    instruction = _build_instruction(all_corruptions)
+    return source, all_corruptions, instruction
+def _build_instruction(corruptions: List[dict]) -> str:
+    """Generate a natural language edit instruction from the list of corruptions."""
+    if not corruptions:
+        return "The document appears correct. No edits needed."
+    parts = []
+    by_type = {}
+    for c in corruptions:
+        by_type.setdefault(c["type"], []).append(c)
+    if "spelling" in by_type:
+        items = by_type["spelling"]
+        examples = ", ".join(f"'{c['corrupted']}' should be '{c['original']}'" for c in items[:3])
+        suffix = f" and {len(items)-3} more" if len(items) > 3 else ""
+        parts.append(f"Fix {len(items)} spelling error(s): {examples}{suffix}.")
+    if "case" in by_type:
+        items = by_type["case"]
+        parts.append(f"Fix {len(items)} case error(s) — some text has incorrect capitalization.")
+    if "names" in by_type:
+        items = by_type["names"]
+        examples = ", ".join(f"'{c['corrupted']}' should be '{c['original']}'" for c in items[:2])
+        parts.append(f"Fix {len(items)} incorrect name(s): {examples}.")
+    if "punctuation" in by_type:
+        items = by_type["punctuation"]
+        parts.append(f"Fix {len(items)} punctuation error(s) — missing or extra punctuation marks.")
+    if "content" in by_type:
+        items = by_type["content"]
+        deleted = [c for c in items if c["action"] == "deleted"]
+        junk = [c for c in items if c["action"] == "added_junk"]
+        if deleted:
+            parts.append(f"Restore {len(deleted)} missing paragraph(s) that were removed.")
+        if junk:
+            parts.append(f"Delete {len(junk)} paragraph(s) that don't belong in the document.")
+    if "formatting" in by_type:
+        items = by_type["formatting"]
+        parts.append(f"Restore {len(items)} missing formatting tag(s) (e.g., bold text that lost its tags).")
+    return " ".join(parts)

game/generator.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""Procedural document generator — creates target documents from templates + random content."""
+import random
+from typing import Tuple
+from .content_pools import (
+    BUSINESS_PARAGRAPHS, COMPANY_NAMES, DATES, DEGREES, DOLLAR_AMOUNTS,
+    LEGAL_CLAUSE_TEMPLATES, RESUME_ACHIEVEMENTS, SKILLS_LISTS, SUBJECTS,
+    TECHNICAL_PARAGRAPHS, UNIVERSITIES, fill_template, full_name, pick,
+)
+from .corruptions import apply_corruptions
+def _gen_business_letter(rng: random.Random) -> str:
+    sender = full_name(rng)
+    recipient = full_name(rng)
+    company = pick(rng, COMPANY_NAMES)
+    date = pick(rng, DATES)
+    paras = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(2, 4), len(BUSINESS_PARAGRAPHS)))
+    body = "\n".join(f"<p>{fill_template(rng, p)}</p>" for p in paras)
+    return (
+        f"<heading level=\"1\">{company}</heading>\n"
+        f"<p>{date}</p>\n"
+        f"<p>Dear {recipient},</p>\n"
+        f"{body}\n"
+        f"<p>Please do not hesitate to contact us should you require further information.</p>\n"
+        f"<p>Sincerely,</p>\n"
+        f"<p>{sender}, Senior Vice President</p>"
+    )
+def _gen_legal_contract(rng: random.Random) -> str:
+    party_a = pick(rng, COMPANY_NAMES)
+    party_b = pick(rng, COMPANY_NAMES)
+    while party_b == party_a:
+        party_b = pick(rng, COMPANY_NAMES)
+    date = pick(rng, DATES)
+    amount = pick(rng, DOLLAR_AMOUNTS)
+    clauses = rng.sample(LEGAL_CLAUSE_TEMPLATES, k=min(rng.randint(3, 6), len(LEGAL_CLAUSE_TEMPLATES)))
+    clause_lines = "\n".join(
+        f"<p>{i+1}. {fill_template(rng, c)}</p>" for i, c in enumerate(clauses)
+    )
+    return (
+        f"<heading level=\"1\">SERVICE AGREEMENT</heading>\n"
+        f"<p>This Service Agreement (the \"Agreement\") is entered into as of {date} between "
+        f"{party_a} (\"Provider\") and {party_b} (\"Client\").</p>\n"
+        f"<heading level=\"2\">RECITALS</heading>\n"
+        f"<p>WHEREAS Provider possesses expertise in professional services and Client desires to engage Provider;</p>\n"
+        f"<p>WHEREAS the parties wish to establish the terms under which services will be rendered;</p>\n"
+        f"<heading level=\"2\">TERMS AND CONDITIONS</heading>\n"
+        f"{clause_lines}\n"
+        f"<heading level=\"2\">COMPENSATION</heading>\n"
+        f"<p>Client shall pay Provider a total fee of {amount} for all services rendered under this Agreement.</p>\n"
+        f"<heading level=\"2\">EXECUTION</heading>\n"
+        f"<p>IN WITNESS WHEREOF, the parties have executed this Agreement as of the date first written above.</p>\n"
+        f"<p>{party_a}: _________________________ Date: _________</p>\n"
+        f"<p>{party_b}: _________________________ Date: _________</p>"
+    )
+def _gen_memo(rng: random.Random) -> str:
+    sender = full_name(rng)
+    subject = pick(rng, SUBJECTS)
+    date = pick(rng, DATES)
+    paras = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(2, 4), len(BUSINESS_PARAGRAPHS)))
+    body = "\n".join(f"<p>{fill_template(rng, p)}</p>" for p in paras)
+    return (
+        f"<heading level=\"1\">MEMORANDUM</heading>\n"
+        f"<p>To: All Staff</p>\n"
+        f"<p>From: {sender}, Director of Operations</p>\n"
+        f"<p>Date: {date}</p>\n"
+        f"<p>Subject: {subject}</p>\n"
+        f"{body}\n"
+        f"<p>Please direct any questions to {sender} or the {pick(rng, ['HR', 'Operations', 'Legal', 'Finance'])} department.</p>"
+    )
+def _gen_technical_report(rng: random.Random) -> str:
+    author = full_name(rng)
+    date = pick(rng, DATES)
+    title = pick(rng, [
+        "Q3 Infrastructure Performance Report", "Annual Security Audit Summary",
+        "Platform Migration Assessment", "System Reliability Engineering Review",
+        "Data Pipeline Optimization Report", "Cloud Cost Analysis Report",
+    ])
+    findings = rng.sample(TECHNICAL_PARAGRAPHS, k=min(rng.randint(2, 4), len(TECHNICAL_PARAGRAPHS)))
+    finding_lines = "\n".join(f"<p>{fill_template(rng, f)}</p>" for f in findings)
+    recs = rng.sample(BUSINESS_PARAGRAPHS, k=min(rng.randint(1, 3), len(BUSINESS_PARAGRAPHS)))
+    rec_lines = "\n".join(f"<p>{i+1}. {fill_template(rng, r)}</p>" for i, r in enumerate(recs))
+    return (
+        f"<heading level=\"1\">{title}</heading>\n"
+        f"<p>Author: {author} | Date: {date} | Version: {rng.randint(1,5)}.{rng.randint(0,9)}</p>\n"
+        f"<heading level=\"2\">Executive Summary</heading>\n"
+        f"<p>{fill_template(rng, pick(rng, BUSINESS_PARAGRAPHS))}</p>\n"
+        f"<heading level=\"2\">Findings</heading>\n"
+        f"{finding_lines}\n"
+        f"<heading level=\"2\">Recommendations</heading>\n"
+        f"{rec_lines}\n"
+        f"<heading level=\"2\">Conclusion</heading>\n"
+        f"<p>{fill_template(rng, pick(rng, BUSINESS_PARAGRAPHS))}</p>"
+    )
+def _gen_resume(rng: random.Random) -> str:
+    name = full_name(rng)
+    city = pick(rng, ["New York", "San Francisco", "Chicago", "Austin", "Seattle", "Boston"])
+    company1 = pick(rng, COMPANY_NAMES)
+    company2 = pick(rng, COMPANY_NAMES)
+    while company2 == company1:
+        company2 = pick(rng, COMPANY_NAMES)
+    achievements1 = rng.sample(RESUME_ACHIEVEMENTS, k=min(2, len(RESUME_ACHIEVEMENTS)))
+    achievements2 = rng.sample(RESUME_ACHIEVEMENTS, k=min(2, len(RESUME_ACHIEVEMENTS)))
+    ach1 = "\n".join(f"<p>• {fill_template(rng, a)}</p>" for a in achievements1)
+    ach2 = "\n".join(f"<p>• {fill_template(rng, a)}</p>" for a in achievements2)
+    return (
+        f"<heading level=\"1\">{name}</heading>\n"
+        f"<p>{name.lower().replace(' ', '.')}@email.com | (555) {rng.randint(100,999)}-{rng.randint(1000,9999)} | {city}</p>\n"
+        f"<heading level=\"2\">Professional Experience</heading>\n"
+        f"<p><bold>{company1}</bold> — Senior Manager ({rng.randint(2020,2024)}-Present)</p>\n"
+        f"{ach1}\n"
+        f"<p><bold>{company2}</bold> — Associate ({rng.randint(2016,2020)}-{rng.randint(2020,2023)})</p>\n"
+        f"{ach2}\n"
+        f"<heading level=\"2\">Education</heading>\n"
+        f"<p><bold>{pick(rng, UNIVERSITIES)}</bold> — {pick(rng, DEGREES)} ({rng.randint(2012,2020)})</p>\n"
+        f"<heading level=\"2\">Skills</heading>\n"
+        f"<p>{pick(rng, SKILLS_LISTS)}</p>"
+    )
+DOC_GENERATORS = {
+    "business_letter": _gen_business_letter,
+    "legal_contract": _gen_legal_contract,
+    "memo": _gen_memo,
+    "technical_report": _gen_technical_report,
+    "resume": _gen_resume,
+}
+DIFFICULTY_CONFIG = {
+    "easy": {"corruption_count": (2, 5), "corruption_types": ["spelling", "case"], "max_steps": 15},
+    "medium": {"corruption_count": (5, 12), "corruption_types": ["spelling", "case", "names", "punctuation"], "max_steps": 25},
+    "hard": {"corruption_count": (10, 20), "corruption_types": ["spelling", "case", "names", "content", "punctuation", "formatting"], "max_steps": 40},
+}
+def generate_task(seed: int = 0, difficulty: str = "easy") -> dict:
+    """
+    Generate a complete task: target document, corrupted source, and edit instruction.
+    Returns dict with keys: source, target, instruction, doc_type, difficulty,
+    corruption_types_used, corruption_count, max_steps, seed.
+    """
+    rng = random.Random(seed)
+    diff = DIFFICULTY_CONFIG.get(difficulty, DIFFICULTY_CONFIG["easy"])
+    doc_type = rng.choice(list(DOC_GENERATORS.keys()))
+    target = DOC_GENERATORS[doc_type](rng)
+    count = rng.randint(*diff["corruption_count"])
+    n_types = min(len(diff["corruption_types"]), rng.randint(1, len(diff["corruption_types"])))
+    chosen_types = rng.sample(diff["corruption_types"], k=n_types)
+    source, applied_corruptions, instruction = apply_corruptions(
+        rng, target, chosen_types, count
+    )
+    return {
+        "source": source,
+        "target": target,
+        "instruction": instruction,
+        "doc_type": doc_type,
+        "difficulty": difficulty,
+        "corruption_types_used": chosen_types,
+        "corruption_count": len(applied_corruptions),
+        "corruptions": applied_corruptions,
+        "max_steps": diff["max_steps"],
+        "seed": seed,
+    }

game/grader.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""Grading / similarity computation for document editing tasks."""
+from difflib import SequenceMatcher
+def compute_similarity(current: str, target: str) -> float:
+    """Normalized SequenceMatcher ratio. Returns 0.0–1.0."""
+    if not target:
+        return 1.0 if not current else 0.0
+    return SequenceMatcher(None, current, target).ratio()

inference.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Baseline inference script for DocEdit Game environment.
+Runs an OpenAI-compatible LLM agent against the 3 fixed evaluation tasks.
+Required env vars:
+    API_BASE_URL  — LLM API endpoint
+    MODEL_NAME    — model identifier
+    HF_TOKEN      — Hugging Face / API key
+"""
+import asyncio
+import json
+import os
+from typing import List
+from openai import OpenAI
+API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
+MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
+API_KEY = os.environ.get("HF_TOKEN", os.environ.get("OPENAI_API_KEY", ""))
+BENCHMARK = "doc_edit_game"
+TASKS = ["easy", "medium", "hard"]
+SUCCESS_THRESHOLD = 0.90
+IMAGE_NAME = os.environ.get("DOC_EDIT_GAME_IMAGE", "doc_edit_game-env:latest")
+def log_start(task: str, env: str, model: str):
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: dict, reward: float, done: bool, error=None):
+    err_str = f" error={error}" if error else ""
+    print(f"[STEP] step={step} action={json.dumps(action)} reward={reward} done={done}{err_str}", flush=True)
+def log_end(success: bool, steps: int, score: float, rewards: List[float]):
+    print(f"[END] success={success} steps={steps} score={score} rewards={json.dumps(rewards)}", flush=True)
+SYSTEM_PROMPT = """You are an expert document editor. You receive an XML-tagged document and an edit instruction describing what needs to be fixed.
+You must respond with a JSON object (no markdown fences) representing ONE edit operation:
+{
+  "operation": "replace" | "insert" | "delete" | "format" | "move",
+  "target": "exact text to find in the document",
+  "content": "replacement or new text",
+  "position": -1,
+  "format_type": "bold" | "italic" | "uppercase" | "lowercase" | "none"
+}
+Operations:
+- "replace": find target, replace with content
+- "insert": insert content as new paragraph at position (-1 = end)
+- "delete": delete the line containing target
+- "format": wrap target with format_type tags (bold/italic) or change case (uppercase/lowercase)
+- "move": move paragraph containing target to position
+Rules:
+- ONE operation per response
+- Use EXACT text from the document for the target field
+- For replace: copy the exact corrupted text as target, put the corrected text as content
+- For format: target is the text to format, format_type specifies how
+- Think step by step about which corruption to fix next
+"""
+def get_model_action(client: OpenAI, document: str, instruction: str, similarity: float, history: List[str]) -> dict:
+    history_text = "\n".join(history[-5:]) if history else "No previous actions."
+    user_msg = (
+        f"Current document:\n{document}\n\n"
+        f"Edit instruction: {instruction}\n\n"
+        f"Current similarity to target: {similarity:.3f}\n\n"
+        f"Recent actions:\n{history_text}\n\n"
+        f"Respond with ONE JSON edit operation to improve the document."
+    )
+    try:
+        resp = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_msg},
+            ],
+            temperature=0.0,
+            max_tokens=512,
+        )
+        text = resp.choices[0].message.content.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
+            if text.endswith("```"):
+                text = text[:-3]
+            text = text.strip()
+        return json.loads(text)
+    except Exception as exc:
+        print(f"[DEBUG] Model request failed: {exc}", flush=True)
+        return {"operation": "replace", "target": "", "content": ""}
+async def run_task(task_name: str) -> dict:
+    from doc_edit_game import DocEditGameAction, DocEditGameEnv
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+    env = await DocEditGameEnv.from_docker_image(IMAGE_NAME)
+    history: List[str] = []
+    rewards: List[float] = []
+    steps_taken = 0
+    score = 0.0
+    success = False
+    log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
+    try:
+        result = await env.reset(task_name=task_name)
+        obs = result.observation
+        max_steps = obs.steps_remaining + obs.edits_made
+        for step in range(1, max_steps + 1):
+            if result.done:
+                break
+            action_dict = get_model_action(client, obs.document, obs.edit_instruction, obs.similarity, history)
+            action = DocEditGameAction(
+                operation=action_dict.get("operation", "replace"),
+                target=action_dict.get("target", ""),
+                content=action_dict.get("content", ""),
+                position=action_dict.get("position", -1),
+                format_type=action_dict.get("format_type", "none"),
+            )
+            result = await env.step(action)
+            obs = result.observation
+            reward = result.reward or 0.0
+            rewards.append(reward)
+            steps_taken = step
+            log_step(step=step, action=action_dict, reward=reward, done=result.done)
+            history.append(f"Step {step}: {action_dict.get('operation')} -> reward {reward:+.3f}, sim {obs.similarity:.3f}")
+            if result.done:
+                break
+        score = obs.similarity
+        success = score >= SUCCESS_THRESHOLD
+    finally:
+        try:
+            await env.close()
+        except Exception as e:
+            print(f"[DEBUG] env.close() error: {e}", flush=True)
+        log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
+    return {"task": task_name, "score": score, "success": success, "steps": steps_taken}
+async def main():
+    results = []
+    for task in TASKS:
+        r = await run_task(task)
+        results.append(r)
+        print(f"\n{'='*60}", flush=True)
+    print(f"\n{'='*60}")
+    print("SUMMARY")
+    print(f"{'='*60}")
+    for r in results:
+        status = "PASS" if r["success"] else "FAIL"
+        print(f"  [{status}] {r['task']}: score={r['score']:.3f} steps={r['steps']}")
+    avg = sum(r["score"] for r in results) / len(results) if results else 0
+    print(f"  Average score: {avg:.3f}")
+if __name__ == "__main__":
+    asyncio.run(main())

models.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Pydantic models for the DocEdit Game environment."""
+from typing import List, Optional
+from openenv.core.env_server.types import Action, Observation
+from pydantic import Field
+class DocEditGameAction(Action):
+    """Agent submits one edit operation per step."""
+    operation: str = Field(
+        ...,
+        description="Edit operation: 'replace', 'insert', 'delete', 'format', or 'move'",
+    )
+    target: str = Field(
+        default="",
+        description="Text to find in the document (for replace/delete/format/move)",
+    )
+    content: str = Field(
+        default="",
+        description="New content (replacement for 'replace', new paragraph for 'insert')",
+    )
+    position: int = Field(
+        default=-1,
+        description="Paragraph index for 'insert'/'move' (-1 = append at end)",
+    )
+    format_type: str = Field(
+        default="none",
+        description="Formatting type for 'format' operation: 'bold', 'italic', 'underline', 'uppercase', 'lowercase', 'none'",
+    )
+class DocEditGameObservation(Observation):
+    """Observation returned after each step."""
+    document: str = Field(default="", description="Current document content (XML-tagged)")
+    edit_instruction: str = Field(default="", description="Natural language description of required edits")
+    similarity: float = Field(default=0.0, description="Similarity to target document (0.0–1.0)")
+    task_id: str = Field(default="", description="Unique task identifier (seed-based)")
+    task_difficulty: str = Field(default="easy", description="Task difficulty: easy, medium, hard")
+    doc_type: str = Field(default="", description="Document type (business_letter, legal_contract, etc.)")
+    corruption_types: List[str] = Field(default_factory=list, description="Types of corruptions applied")
+    steps_remaining: int = Field(default=0, description="Steps left in this episode")
+    edits_made: int = Field(default=0, description="Number of edit actions taken so far")
+    edits_estimated: int = Field(default=0, description="Estimated edits needed")

openenv.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+spec_version: 1
+name: doc_edit_game
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000

openenv_doc_edit_game.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,9 @@

+Metadata-Version: 2.4
+Name: openenv-doc_edit_game
+Version: 0.1.0
+Summary: Doc Edit Game environment for OpenEnv
+Requires-Python: >=3.10
+Requires-Dist: openenv-core[core]>=0.2.2
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"

openenv_doc_edit_game.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+README.md
+pyproject.toml
+./__init__.py
+./client.py
+./inference.py
+./models.py
+openenv_doc_edit_game.egg-info/PKG-INFO
+openenv_doc_edit_game.egg-info/SOURCES.txt
+openenv_doc_edit_game.egg-info/dependency_links.txt
+openenv_doc_edit_game.egg-info/entry_points.txt
+openenv_doc_edit_game.egg-info/requires.txt
+openenv_doc_edit_game.egg-info/top_level.txt
+server/__init__.py
+server/app.py
+server/doc_edit_game_environment.py

openenv_doc_edit_game.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

openenv_doc_edit_game.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ server = doc_edit_game.server.app:main

openenv_doc_edit_game.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openenv-core[core]>=0.2.2
+[dev]
+pytest>=8.0.0
+pytest-cov>=4.0.0

openenv_doc_edit_game.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ doc_edit_game

pyproject.toml ADDED Viewed

	@@ -0,0 +1,45 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-doc_edit_game"
+version = "0.1.0"
+description = "Doc Edit Game environment for OpenEnv"
+requires-python = ">=3.10"
+dependencies = [
+    # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
+    # install from github
+    # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
+    "openenv-core[core]>=0.2.2",
+    # Environment-specific dependencies
+    # Add all dependencies needed for your environment here
+    # Examples:
+    # "numpy>=1.19.0",
+    # "torch>=2.0.0",
+    # "gymnasium>=0.29.0",
+    # "openspiel>=1.0.0",
+    # "smolagents>=1.22.0,<2",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.scripts]
+# Server entry point - enables running via: uv run --project . server
+# or: python -m doc_edit_game.server.app
+server = "doc_edit_game.server.app:main"
+[tool.setuptools]
+include-package-data = true
+packages = ["doc_edit_game", "doc_edit_game.server"]
+package-dir = { "doc_edit_game" = ".", "doc_edit_game.server" = "server" }

server/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Doc Edit Game environment server components."""
+from .doc_edit_game_environment import DocEditGameEnvironment
+__all__ = ["DocEditGameEnvironment"]

server/app.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""FastAPI application for the DocEdit Game Environment."""
+try:
+    from openenv.core.env_server.http_server import create_app
+except Exception as e:
+    raise ImportError("openenv is required. Install with: uv sync") from e
+try:
+    from ..models import DocEditGameAction, DocEditGameObservation
+    from .doc_edit_game_environment import DocEditGameEnvironment
+except (ImportError, ModuleNotFoundError):
+    from models import DocEditGameAction, DocEditGameObservation
+    from server.doc_edit_game_environment import DocEditGameEnvironment
+app = create_app(
+    DocEditGameEnvironment,
+    DocEditGameAction,
+    DocEditGameObservation,
+    env_name="doc_edit_game",
+    max_concurrent_envs=4,
+)
+def main(host: str = "0.0.0.0", port: int = 8000):
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    main()

server/doc_edit_game_environment.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""
+DocEdit Game Environment — procedurally generated document editing challenges.
+Agents face randomized document editing tasks: fix spelling, case, names,
+punctuation, formatting, and structural issues across diverse document types.
+"""
+import re
+from typing import Any, Optional
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from openenv.core.env_server.types import State
+try:
+    from ..models import DocEditGameAction, DocEditGameObservation
+    from ..game.generator import generate_task
+    from ..game.grader import compute_similarity
+except ImportError:
+    from models import DocEditGameAction, DocEditGameObservation
+    from game.generator import generate_task
+    from game.grader import compute_similarity
+# Three fixed tasks for competition evaluation (deterministic seeds)
+FIXED_TASKS = {
+    "easy": {"seed": 1001, "difficulty": "easy"},
+    "medium": {"seed": 2002, "difficulty": "medium"},
+    "hard": {"seed": 3003, "difficulty": "hard"},
+}
+class DocEditGameEnvironment(Environment):
+    """
+    Procedurally generated document editing RL environment.
+    On reset, generates a random document with intentional corruptions.
+    Agent must reverse the corruptions to match the hidden target.
+    Supports both fixed tasks (for evaluation) and procedural generation (for training).
+    """
+    SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    def __init__(self):
+        self._document = ""
+        self._target = ""
+        self._instruction = ""
+        self._task_info: dict = {}
+        self._task_id = ""
+        self._prev_similarity = 0.0
+        self._max_steps = 15
+        self._edits_made = 0
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+    def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditGameObservation:
+        # Support named task selection for evaluation
+        task_name = kwargs.get("task_name", "")
+        difficulty = kwargs.get("difficulty", "easy")
+        if task_name in FIXED_TASKS:
+            cfg = FIXED_TASKS[task_name]
+            seed = cfg["seed"]
+            difficulty = cfg["difficulty"]
+        elif seed is None:
+            seed = hash(uuid4()) & 0xFFFFFFFF
+        task = generate_task(seed=seed, difficulty=difficulty)
+        self._document = task["source"]
+        self._target = task["target"]
+        self._instruction = task["instruction"]
+        self._task_info = task
+        self._max_steps = task["max_steps"]
+        self._edits_made = 0
+        self._task_id = f"seed_{seed}_{difficulty}"
+        self._prev_similarity = compute_similarity(self._document, self._target)
+        self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
+        return DocEditGameObservation(
+            document=self._document,
+            edit_instruction=self._instruction,
+            similarity=self._prev_similarity,
+            task_id=self._task_id,
+            task_difficulty=difficulty,
+            doc_type=task["doc_type"],
+            corruption_types=task["corruption_types_used"],
+            steps_remaining=self._max_steps,
+            edits_made=0,
+            edits_estimated=task["corruption_count"],
+            done=False,
+            reward=0.0,
+        )
+    def step(self, action: DocEditGameAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditGameObservation:
+        self._state.step_count += 1
+        self._edits_made += 1
+        op = action.operation.lower().strip()
+        noop = False
+        if op == "replace" and action.target:
+            if action.target in self._document:
+                self._document = self._document.replace(action.target, action.content, 1)
+            else:
+                noop = True
+        elif op == "insert":
+            paragraphs = self._document.split("\n")
+            new_para = action.content
+            if not (new_para.startswith("<p>") or new_para.startswith("<heading")):
+                new_para = f"<p>{new_para}</p>"
+            pos = action.position
+            if pos < 0 or pos >= len(paragraphs):
+                paragraphs.append(new_para)
+            else:
+                paragraphs.insert(pos, new_para)
+            self._document = "\n".join(paragraphs)
+        elif op == "delete" and action.target:
+            lines = self._document.split("\n")
+            new_lines = [l for l in lines if action.target not in l]
+            if len(new_lines) == len(lines):
+                noop = True
+            self._document = "\n".join(new_lines)
+        elif op == "format" and action.target:
+            fmt = action.format_type.lower()
+            if action.target in self._document:
+                if fmt == "bold":
+                    self._document = self._document.replace(action.target, f"<bold>{action.target}</bold>", 1)
+                elif fmt == "italic":
+                    self._document = self._document.replace(action.target, f"<italic>{action.target}</italic>", 1)
+                elif fmt == "underline":
+                    self._document = self._document.replace(action.target, f"<underline>{action.target}</underline>", 1)
+                elif fmt == "uppercase":
+                    self._document = self._document.replace(action.target, action.target.upper(), 1)
+                elif fmt == "lowercase":
+                    self._document = self._document.replace(action.target, action.target.lower(), 1)
+                else:
+                    noop = True
+            else:
+                noop = True
+        elif op == "move" and action.target:
+            lines = self._document.split("\n")
+            source_idx = None
+            for i, l in enumerate(lines):
+                if action.target in l:
+                    source_idx = i
+                    break
+            if source_idx is not None:
+                moved_line = lines.pop(source_idx)
+                pos = action.position
+                if pos < 0 or pos >= len(lines):
+                    lines.append(moved_line)
+                else:
+                    lines.insert(pos, moved_line)
+                self._document = "\n".join(lines)
+            else:
+                noop = True
+        else:
+            noop = True
+        new_sim = compute_similarity(self._document, self._target)
+        reward = new_sim - self._prev_similarity
+        if noop:
+            reward -= 0.01  # penalty for wasted step
+        self._prev_similarity = new_sim
+        steps_left = self._max_steps - self._state.step_count
+        done = (new_sim >= 0.999) or (steps_left <= 0)
+        if new_sim >= 0.999:
+            reward += 0.5
+        return DocEditGameObservation(
+            document=self._document,
+            edit_instruction=self._instruction,
+            similarity=new_sim,
+            task_id=self._task_id,
+            task_difficulty=self._task_info.get("difficulty", "easy"),
+            doc_type=self._task_info.get("doc_type", ""),
+            corruption_types=self._task_info.get("corruption_types_used", []),
+            steps_remaining=max(steps_left, 0),
+            edits_made=self._edits_made,
+            edits_estimated=self._task_info.get("corruption_count", 0),
+            done=done,
+            reward=round(reward, 4),
+            metadata={
+                "step": self._state.step_count,
+                "operation": op,
+                "noop": noop,
+                "exact_match": new_sim >= 0.999,
+            },
+        )
+    @property
+    def state(self) -> State:
+        return self._state

server/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openenv[core]>=0.2.0
+fastapi>=0.115.0
+uvicorn>=0.24.0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff