doc-edit-env / server /doc_edit_environment.py
sanjuhs's picture
Upload folder using huggingface_hub
06165d0 verified
"""
DocEdit Environment — an RL environment for structured document editing.
Agents learn to transform source documents into target documents through
replace, insert, and delete operations on XML-tagged paragraph content.
Three tasks with increasing difficulty (easy → medium → hard).
"""
from difflib import SequenceMatcher
from typing import Any, Optional
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
try:
from ..models import DocEditAction, DocEditObservation
except ImportError:
from models import DocEditAction, DocEditObservation
# ---------------------------------------------------------------------------
# Task definitions: source doc, target doc, description, max steps
# ---------------------------------------------------------------------------
TASKS = {
"easy_word_replace": {
"source": (
"<p>The company's annual revnue exceeded expectations this quarter.</p>\n"
"<p>Our clints have expressed strong satisfcation with the new product line.</p>\n"
"<p>The managment team will present the quartely results on Friday.</p>"
),
"target": (
"<p>The company's annual revenue exceeded expectations this quarter.</p>\n"
"<p>Our clients have expressed strong satisfaction with the new product line.</p>\n"
"<p>The management team will present the quarterly results on Friday.</p>"
),
"description": (
"Fix spelling errors in this business report. The document contains typos: "
"'revnue' should be 'revenue', 'clints' should be 'clients', "
"'satisfcation' should be 'satisfaction', 'managment' should be 'management', "
"'quartely' should be 'quarterly'."
),
"max_steps": 10,
},
"medium_paragraph_edit": {
"source": (
"<p>MEMORANDUM</p>\n"
"<p>To: All Staff</p>\n"
"<p>From: Human Resources</p>\n"
"<p>Subject: Updated Remote Work Policy</p>\n"
"<p>Effective immediately, all employees may work remotely up to three days per week.</p>\n"
"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
),
"target": (
"<p>MEMORANDUM</p>\n"
"<p>To: All Staff</p>\n"
"<p>From: Human Resources</p>\n"
"<p>Date: April 2026</p>\n"
"<p>Subject: Updated Remote Work Policy</p>\n"
"<p>Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.</p>\n"
"<p>Remote work days must not fall on team meeting days (Tuesday and Thursday).</p>\n"
"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
),
"description": (
"Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', "
"(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', "
"(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'"
),
"max_steps": 15,
},
"hard_multi_edit": {
"source": (
"<p>CONTRACT AMENDMENT NO. 3</p>\n"
"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
"<p>WHEREAS the original agremeent dated January 2024 established terms for software development services;</p>\n"
"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
"<p>NOW THEREFORE the parties agree as follows:</p>\n"
"<p>1. The delivrey schedule in Section 4.2 is extended by 90 days.</p>\n"
"<p>2. The total contract value remains unchanged at $500,000.</p>\n"
"<p>3. All other terms and conditions of the original agreeement remain in full force.</p>\n"
"<p>This amendment shall be effective upon execution by both parties.</p>\n"
"<p>OBSOLETE CLAUSE: This section is no longer applicable and should be removed.</p>"
),
"target": (
"<p>CONTRACT AMENDMENT NO. 3</p>\n"
"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
"<p>WHEREAS the original agreement dated January 2024 established terms for software development services;</p>\n"
"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
"<p>NOW THEREFORE the parties agree as follows:</p>\n"
"<p>1. The delivery schedule in Section 4.2 is extended by 90 days.</p>\n"
"<p>2. The total contract value is hereby increased to $750,000 to reflect additional scope.</p>\n"
"<p>3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'</p>\n"
"<p>4. All other terms and conditions of the original agreement remain in full force.</p>\n"
"<p>This amendment shall be effective upon execution by both parties.</p>"
),
"description": (
"Edit this legal contract amendment: "
"(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, "
"(2) Fix 'delivrey' to 'delivery' in clause 1, "
"(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', "
"(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', "
"(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', "
"(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely."
),
"max_steps": 20,
},
}
TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"]
def compute_similarity(a: str, b: str) -> float:
"""Normalized SequenceMatcher ratio between two strings."""
return SequenceMatcher(None, a, b).ratio()
class DocEditEnvironment(Environment):
"""
Document editing RL environment.
The agent receives a source document with XML paragraph tags and must
transform it to match a target document through edit operations.
Reward is the incremental improvement in similarity to the target.
"""
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self, task_name: str = "easy_word_replace"):
self._task_name = task_name if task_name in TASKS else "easy_word_replace"
self._task = TASKS[self._task_name]
self._document = ""
self._target = ""
self._prev_similarity = 0.0
self._max_steps = self._task["max_steps"]
self._state = State(episode_id=str(uuid4()), step_count=0)
def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation:
task_name = kwargs.get("task_name", self._task_name)
if task_name in TASKS:
self._task_name = task_name
self._task = TASKS[self._task_name]
self._max_steps = self._task["max_steps"]
self._document = self._task["source"]
self._target = self._task["target"]
self._prev_similarity = compute_similarity(self._document, self._target)
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
return DocEditObservation(
document=self._document,
target_description=self._task["description"],
similarity=self._prev_similarity,
task_name=self._task_name,
steps_remaining=self._max_steps,
done=False,
reward=0.0,
)
def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation:
self._state.step_count += 1
op = action.operation.lower().strip()
if op == "replace" and action.target:
self._document = self._document.replace(action.target, action.content, 1)
elif op == "insert":
paragraphs = self._document.split("\n")
new_para = action.content if action.content.startswith("<p>") else f"<p>{action.content}</p>"
pos = action.position
if pos < 0 or pos >= len(paragraphs):
paragraphs.append(new_para)
else:
paragraphs.insert(pos, new_para)
self._document = "\n".join(paragraphs)
elif op == "delete" and action.target:
# Delete the line containing the target text
lines = self._document.split("\n")
lines = [l for l in lines if action.target not in l]
self._document = "\n".join(lines)
new_sim = compute_similarity(self._document, self._target)
reward = new_sim - self._prev_similarity # positive if improving
self._prev_similarity = new_sim
steps_left = self._max_steps - self._state.step_count
done = (new_sim >= 0.999) or (steps_left <= 0)
# Bonus for completing the task
if new_sim >= 0.999:
reward += 0.5
return DocEditObservation(
document=self._document,
target_description=self._task["description"],
similarity=new_sim,
task_name=self._task_name,
steps_remaining=max(steps_left, 0),
done=done,
reward=round(reward, 4),
metadata={
"step": self._state.step_count,
"operation": op,
"exact_match": new_sim >= 0.999,
},
)
@property
def state(self) -> State:
return self._state