""" DocEdit Environment — an RL environment for structured document editing. Agents learn to transform source documents into target documents through replace, insert, and delete operations on XML-tagged paragraph content. Three tasks with increasing difficulty (easy → medium → hard). """ from difflib import SequenceMatcher from typing import Any, Optional from uuid import uuid4 from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State try: from ..models import DocEditAction, DocEditObservation except ImportError: from models import DocEditAction, DocEditObservation # --------------------------------------------------------------------------- # Task definitions: source doc, target doc, description, max steps # --------------------------------------------------------------------------- TASKS = { "easy_word_replace": { "source": ( "
The company's annual revnue exceeded expectations this quarter.
\n" "Our clints have expressed strong satisfcation with the new product line.
\n" "The managment team will present the quartely results on Friday.
" ), "target": ( "The company's annual revenue exceeded expectations this quarter.
\n" "Our clients have expressed strong satisfaction with the new product line.
\n" "The management team will present the quarterly results on Friday.
" ), "description": ( "Fix spelling errors in this business report. The document contains typos: " "'revnue' should be 'revenue', 'clints' should be 'clients', " "'satisfcation' should be 'satisfaction', 'managment' should be 'management', " "'quartely' should be 'quarterly'." ), "max_steps": 10, }, "medium_paragraph_edit": { "source": ( "MEMORANDUM
\n" "To: All Staff
\n" "From: Human Resources
\n" "Subject: Updated Remote Work Policy
\n" "Effective immediately, all employees may work remotely up to three days per week.
\n" "Please submit your preferred schedule to your direct manager by end of month.
" ), "target": ( "MEMORANDUM
\n" "To: All Staff
\n" "From: Human Resources
\n" "Date: April 2026
\n" "Subject: Updated Remote Work Policy
\n" "Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.
\n" "Remote work days must not fall on team meeting days (Tuesday and Thursday).
\n" "Please submit your preferred schedule to your direct manager by end of month.
" ), "description": ( "Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', " "(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', " "(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'" ), "max_steps": 15, }, "hard_multi_edit": { "source": ( "CONTRACT AMENDMENT NO. 3
\n" "This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').
\n" "WHEREAS the original agremeent dated January 2024 established terms for software development services;
\n" "WHEREAS both parties wish to modify certain terms of the agreement;
\n" "NOW THEREFORE the parties agree as follows:
\n" "1. The delivrey schedule in Section 4.2 is extended by 90 days.
\n" "2. The total contract value remains unchanged at $500,000.
\n" "3. All other terms and conditions of the original agreeement remain in full force.
\n" "This amendment shall be effective upon execution by both parties.
\n" "OBSOLETE CLAUSE: This section is no longer applicable and should be removed.
" ), "target": ( "CONTRACT AMENDMENT NO. 3
\n" "This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').
\n" "WHEREAS the original agreement dated January 2024 established terms for software development services;
\n" "WHEREAS both parties wish to modify certain terms of the agreement;
\n" "NOW THEREFORE the parties agree as follows:
\n" "1. The delivery schedule in Section 4.2 is extended by 90 days.
\n" "2. The total contract value is hereby increased to $750,000 to reflect additional scope.
\n" "3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'
\n" "4. All other terms and conditions of the original agreement remain in full force.
\n" "This amendment shall be effective upon execution by both parties.
" ), "description": ( "Edit this legal contract amendment: " "(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, " "(2) Fix 'delivrey' to 'delivery' in clause 1, " "(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', " "(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', " "(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', " "(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely." ), "max_steps": 20, }, } TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"] def compute_similarity(a: str, b: str) -> float: """Normalized SequenceMatcher ratio between two strings.""" return SequenceMatcher(None, a, b).ratio() class DocEditEnvironment(Environment): """ Document editing RL environment. The agent receives a source document with XML paragraph tags and must transform it to match a target document through edit operations. Reward is the incremental improvement in similarity to the target. """ SUPPORTS_CONCURRENT_SESSIONS: bool = True def __init__(self, task_name: str = "easy_word_replace"): self._task_name = task_name if task_name in TASKS else "easy_word_replace" self._task = TASKS[self._task_name] self._document = "" self._target = "" self._prev_similarity = 0.0 self._max_steps = self._task["max_steps"] self._state = State(episode_id=str(uuid4()), step_count=0) def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation: task_name = kwargs.get("task_name", self._task_name) if task_name in TASKS: self._task_name = task_name self._task = TASKS[self._task_name] self._max_steps = self._task["max_steps"] self._document = self._task["source"] self._target = self._task["target"] self._prev_similarity = compute_similarity(self._document, self._target) self._state = State(episode_id=episode_id or str(uuid4()), step_count=0) return DocEditObservation( document=self._document, target_description=self._task["description"], similarity=self._prev_similarity, task_name=self._task_name, steps_remaining=self._max_steps, done=False, reward=0.0, ) def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation: self._state.step_count += 1 op = action.operation.lower().strip() if op == "replace" and action.target: self._document = self._document.replace(action.target, action.content, 1) elif op == "insert": paragraphs = self._document.split("\n") new_para = action.content if action.content.startswith("") else f"
{action.content}
" pos = action.position if pos < 0 or pos >= len(paragraphs): paragraphs.append(new_para) else: paragraphs.insert(pos, new_para) self._document = "\n".join(paragraphs) elif op == "delete" and action.target: # Delete the line containing the target text lines = self._document.split("\n") lines = [l for l in lines if action.target not in l] self._document = "\n".join(lines) new_sim = compute_similarity(self._document, self._target) reward = new_sim - self._prev_similarity # positive if improving self._prev_similarity = new_sim steps_left = self._max_steps - self._state.step_count done = (new_sim >= 0.999) or (steps_left <= 0) # Bonus for completing the task if new_sim >= 0.999: reward += 0.5 return DocEditObservation( document=self._document, target_description=self._task["description"], similarity=new_sim, task_name=self._task_name, steps_remaining=max(steps_left, 0), done=done, reward=round(reward, 4), metadata={ "step": self._state.step_count, "operation": op, "exact_match": new_sim >= 0.999, }, ) @property def state(self) -> State: return self._state