""" DocEdit Environment — an RL environment for structured document editing. Agents learn to transform source documents into target documents through replace, insert, and delete operations on XML-tagged paragraph content. Three tasks with increasing difficulty (easy → medium → hard). """ from difflib import SequenceMatcher from typing import Any, Optional from uuid import uuid4 from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State try: from ..models import DocEditAction, DocEditObservation except ImportError: from models import DocEditAction, DocEditObservation # --------------------------------------------------------------------------- # Task definitions: source doc, target doc, description, max steps # --------------------------------------------------------------------------- TASKS = { "easy_word_replace": { "source": ( "

The company's annual revnue exceeded expectations this quarter.

\n" "

Our clints have expressed strong satisfcation with the new product line.

\n" "

The managment team will present the quartely results on Friday.

" ), "target": ( "

The company's annual revenue exceeded expectations this quarter.

\n" "

Our clients have expressed strong satisfaction with the new product line.

\n" "

The management team will present the quarterly results on Friday.

" ), "description": ( "Fix spelling errors in this business report. The document contains typos: " "'revnue' should be 'revenue', 'clints' should be 'clients', " "'satisfcation' should be 'satisfaction', 'managment' should be 'management', " "'quartely' should be 'quarterly'." ), "max_steps": 10, }, "medium_paragraph_edit": { "source": ( "

MEMORANDUM

\n" "

To: All Staff

\n" "

From: Human Resources

\n" "

Subject: Updated Remote Work Policy

\n" "

Effective immediately, all employees may work remotely up to three days per week.

\n" "

Please submit your preferred schedule to your direct manager by end of month.

" ), "target": ( "

MEMORANDUM

\n" "

To: All Staff

\n" "

From: Human Resources

\n" "

Date: April 2026

\n" "

Subject: Updated Remote Work Policy

\n" "

Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.

\n" "

Remote work days must not fall on team meeting days (Tuesday and Thursday).

\n" "

Please submit your preferred schedule to your direct manager by end of month.

" ), "description": ( "Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', " "(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', " "(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'" ), "max_steps": 15, }, "hard_multi_edit": { "source": ( "

CONTRACT AMENDMENT NO. 3

\n" "

This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').

\n" "

WHEREAS the original agremeent dated January 2024 established terms for software development services;

\n" "

WHEREAS both parties wish to modify certain terms of the agreement;

\n" "

NOW THEREFORE the parties agree as follows:

\n" "

1. The delivrey schedule in Section 4.2 is extended by 90 days.

\n" "

2. The total contract value remains unchanged at $500,000.

\n" "

3. All other terms and conditions of the original agreeement remain in full force.

\n" "

This amendment shall be effective upon execution by both parties.

\n" "

OBSOLETE CLAUSE: This section is no longer applicable and should be removed.

" ), "target": ( "

CONTRACT AMENDMENT NO. 3

\n" "

This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').

\n" "

WHEREAS the original agreement dated January 2024 established terms for software development services;

\n" "

WHEREAS both parties wish to modify certain terms of the agreement;

\n" "

NOW THEREFORE the parties agree as follows:

\n" "

1. The delivery schedule in Section 4.2 is extended by 90 days.

\n" "

2. The total contract value is hereby increased to $750,000 to reflect additional scope.

\n" "

3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'

\n" "

4. All other terms and conditions of the original agreement remain in full force.

\n" "

This amendment shall be effective upon execution by both parties.

" ), "description": ( "Edit this legal contract amendment: " "(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, " "(2) Fix 'delivrey' to 'delivery' in clause 1, " "(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', " "(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', " "(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', " "(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely." ), "max_steps": 20, }, } TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"] def compute_similarity(a: str, b: str) -> float: """Normalized SequenceMatcher ratio between two strings.""" return SequenceMatcher(None, a, b).ratio() class DocEditEnvironment(Environment): """ Document editing RL environment. The agent receives a source document with XML paragraph tags and must transform it to match a target document through edit operations. Reward is the incremental improvement in similarity to the target. """ SUPPORTS_CONCURRENT_SESSIONS: bool = True def __init__(self, task_name: str = "easy_word_replace"): self._task_name = task_name if task_name in TASKS else "easy_word_replace" self._task = TASKS[self._task_name] self._document = "" self._target = "" self._prev_similarity = 0.0 self._max_steps = self._task["max_steps"] self._state = State(episode_id=str(uuid4()), step_count=0) def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation: task_name = kwargs.get("task_name", self._task_name) if task_name in TASKS: self._task_name = task_name self._task = TASKS[self._task_name] self._max_steps = self._task["max_steps"] self._document = self._task["source"] self._target = self._task["target"] self._prev_similarity = compute_similarity(self._document, self._target) self._state = State(episode_id=episode_id or str(uuid4()), step_count=0) return DocEditObservation( document=self._document, target_description=self._task["description"], similarity=self._prev_similarity, task_name=self._task_name, steps_remaining=self._max_steps, done=False, reward=0.0, ) def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation: self._state.step_count += 1 op = action.operation.lower().strip() if op == "replace" and action.target: self._document = self._document.replace(action.target, action.content, 1) elif op == "insert": paragraphs = self._document.split("\n") new_para = action.content if action.content.startswith("

") else f"

{action.content}

" pos = action.position if pos < 0 or pos >= len(paragraphs): paragraphs.append(new_para) else: paragraphs.insert(pos, new_para) self._document = "\n".join(paragraphs) elif op == "delete" and action.target: # Delete the line containing the target text lines = self._document.split("\n") lines = [l for l in lines if action.target not in l] self._document = "\n".join(lines) new_sim = compute_similarity(self._document, self._target) reward = new_sim - self._prev_similarity # positive if improving self._prev_similarity = new_sim steps_left = self._max_steps - self._state.step_count done = (new_sim >= 0.999) or (steps_left <= 0) # Bonus for completing the task if new_sim >= 0.999: reward += 0.5 return DocEditObservation( document=self._document, target_description=self._task["description"], similarity=new_sim, task_name=self._task_name, steps_remaining=max(steps_left, 0), done=done, reward=round(reward, 4), metadata={ "step": self._state.step_count, "operation": op, "exact_match": new_sim >= 0.999, }, ) @property def state(self) -> State: return self._state