Spaces:
Sleeping
Sleeping
| """ | |
| DocEdit Environment — an RL environment for structured document editing. | |
| Agents learn to transform source documents into target documents through | |
| replace, insert, and delete operations on XML-tagged paragraph content. | |
| Three tasks with increasing difficulty (easy → medium → hard). | |
| """ | |
| from difflib import SequenceMatcher | |
| from typing import Any, Optional | |
| from uuid import uuid4 | |
| from openenv.core.env_server.interfaces import Environment | |
| from openenv.core.env_server.types import State | |
| try: | |
| from ..models import DocEditAction, DocEditObservation | |
| except ImportError: | |
| from models import DocEditAction, DocEditObservation | |
| # --------------------------------------------------------------------------- | |
| # Task definitions: source doc, target doc, description, max steps | |
| # --------------------------------------------------------------------------- | |
| TASKS = { | |
| "easy_word_replace": { | |
| "source": ( | |
| "<p>The company's annual revnue exceeded expectations this quarter.</p>\n" | |
| "<p>Our clints have expressed strong satisfcation with the new product line.</p>\n" | |
| "<p>The managment team will present the quartely results on Friday.</p>" | |
| ), | |
| "target": ( | |
| "<p>The company's annual revenue exceeded expectations this quarter.</p>\n" | |
| "<p>Our clients have expressed strong satisfaction with the new product line.</p>\n" | |
| "<p>The management team will present the quarterly results on Friday.</p>" | |
| ), | |
| "description": ( | |
| "Fix spelling errors in this business report. The document contains typos: " | |
| "'revnue' should be 'revenue', 'clints' should be 'clients', " | |
| "'satisfcation' should be 'satisfaction', 'managment' should be 'management', " | |
| "'quartely' should be 'quarterly'." | |
| ), | |
| "max_steps": 10, | |
| }, | |
| "medium_paragraph_edit": { | |
| "source": ( | |
| "<p>MEMORANDUM</p>\n" | |
| "<p>To: All Staff</p>\n" | |
| "<p>From: Human Resources</p>\n" | |
| "<p>Subject: Updated Remote Work Policy</p>\n" | |
| "<p>Effective immediately, all employees may work remotely up to three days per week.</p>\n" | |
| "<p>Please submit your preferred schedule to your direct manager by end of month.</p>" | |
| ), | |
| "target": ( | |
| "<p>MEMORANDUM</p>\n" | |
| "<p>To: All Staff</p>\n" | |
| "<p>From: Human Resources</p>\n" | |
| "<p>Date: April 2026</p>\n" | |
| "<p>Subject: Updated Remote Work Policy</p>\n" | |
| "<p>Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.</p>\n" | |
| "<p>Remote work days must not fall on team meeting days (Tuesday and Thursday).</p>\n" | |
| "<p>Please submit your preferred schedule to your direct manager by end of month.</p>" | |
| ), | |
| "description": ( | |
| "Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', " | |
| "(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', " | |
| "(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'" | |
| ), | |
| "max_steps": 15, | |
| }, | |
| "hard_multi_edit": { | |
| "source": ( | |
| "<p>CONTRACT AMENDMENT NO. 3</p>\n" | |
| "<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n" | |
| "<p>WHEREAS the original agremeent dated January 2024 established terms for software development services;</p>\n" | |
| "<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n" | |
| "<p>NOW THEREFORE the parties agree as follows:</p>\n" | |
| "<p>1. The delivrey schedule in Section 4.2 is extended by 90 days.</p>\n" | |
| "<p>2. The total contract value remains unchanged at $500,000.</p>\n" | |
| "<p>3. All other terms and conditions of the original agreeement remain in full force.</p>\n" | |
| "<p>This amendment shall be effective upon execution by both parties.</p>\n" | |
| "<p>OBSOLETE CLAUSE: This section is no longer applicable and should be removed.</p>" | |
| ), | |
| "target": ( | |
| "<p>CONTRACT AMENDMENT NO. 3</p>\n" | |
| "<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n" | |
| "<p>WHEREAS the original agreement dated January 2024 established terms for software development services;</p>\n" | |
| "<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n" | |
| "<p>NOW THEREFORE the parties agree as follows:</p>\n" | |
| "<p>1. The delivery schedule in Section 4.2 is extended by 90 days.</p>\n" | |
| "<p>2. The total contract value is hereby increased to $750,000 to reflect additional scope.</p>\n" | |
| "<p>3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'</p>\n" | |
| "<p>4. All other terms and conditions of the original agreement remain in full force.</p>\n" | |
| "<p>This amendment shall be effective upon execution by both parties.</p>" | |
| ), | |
| "description": ( | |
| "Edit this legal contract amendment: " | |
| "(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, " | |
| "(2) Fix 'delivrey' to 'delivery' in clause 1, " | |
| "(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', " | |
| "(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', " | |
| "(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', " | |
| "(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely." | |
| ), | |
| "max_steps": 20, | |
| }, | |
| } | |
| TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"] | |
| def compute_similarity(a: str, b: str) -> float: | |
| """Normalized SequenceMatcher ratio between two strings.""" | |
| return SequenceMatcher(None, a, b).ratio() | |
| class DocEditEnvironment(Environment): | |
| """ | |
| Document editing RL environment. | |
| The agent receives a source document with XML paragraph tags and must | |
| transform it to match a target document through edit operations. | |
| Reward is the incremental improvement in similarity to the target. | |
| """ | |
| SUPPORTS_CONCURRENT_SESSIONS: bool = True | |
| def __init__(self, task_name: str = "easy_word_replace"): | |
| self._task_name = task_name if task_name in TASKS else "easy_word_replace" | |
| self._task = TASKS[self._task_name] | |
| self._document = "" | |
| self._target = "" | |
| self._prev_similarity = 0.0 | |
| self._max_steps = self._task["max_steps"] | |
| self._state = State(episode_id=str(uuid4()), step_count=0) | |
| def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation: | |
| task_name = kwargs.get("task_name", self._task_name) | |
| if task_name in TASKS: | |
| self._task_name = task_name | |
| self._task = TASKS[self._task_name] | |
| self._max_steps = self._task["max_steps"] | |
| self._document = self._task["source"] | |
| self._target = self._task["target"] | |
| self._prev_similarity = compute_similarity(self._document, self._target) | |
| self._state = State(episode_id=episode_id or str(uuid4()), step_count=0) | |
| return DocEditObservation( | |
| document=self._document, | |
| target_description=self._task["description"], | |
| similarity=self._prev_similarity, | |
| task_name=self._task_name, | |
| steps_remaining=self._max_steps, | |
| done=False, | |
| reward=0.0, | |
| ) | |
| def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation: | |
| self._state.step_count += 1 | |
| op = action.operation.lower().strip() | |
| if op == "replace" and action.target: | |
| self._document = self._document.replace(action.target, action.content, 1) | |
| elif op == "insert": | |
| paragraphs = self._document.split("\n") | |
| new_para = action.content if action.content.startswith("<p>") else f"<p>{action.content}</p>" | |
| pos = action.position | |
| if pos < 0 or pos >= len(paragraphs): | |
| paragraphs.append(new_para) | |
| else: | |
| paragraphs.insert(pos, new_para) | |
| self._document = "\n".join(paragraphs) | |
| elif op == "delete" and action.target: | |
| # Delete the line containing the target text | |
| lines = self._document.split("\n") | |
| lines = [l for l in lines if action.target not in l] | |
| self._document = "\n".join(lines) | |
| new_sim = compute_similarity(self._document, self._target) | |
| reward = new_sim - self._prev_similarity # positive if improving | |
| self._prev_similarity = new_sim | |
| steps_left = self._max_steps - self._state.step_count | |
| done = (new_sim >= 0.999) or (steps_left <= 0) | |
| # Bonus for completing the task | |
| if new_sim >= 0.999: | |
| reward += 0.5 | |
| return DocEditObservation( | |
| document=self._document, | |
| target_description=self._task["description"], | |
| similarity=new_sim, | |
| task_name=self._task_name, | |
| steps_remaining=max(steps_left, 0), | |
| done=done, | |
| reward=round(reward, 4), | |
| metadata={ | |
| "step": self._state.step_count, | |
| "operation": op, | |
| "exact_match": new_sim >= 0.999, | |
| }, | |
| ) | |
| def state(self) -> State: | |
| return self._state | |