Spaces:
Sleeping
Sleeping
File size: 10,001 Bytes
06165d0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | """
DocEdit Environment — an RL environment for structured document editing.
Agents learn to transform source documents into target documents through
replace, insert, and delete operations on XML-tagged paragraph content.
Three tasks with increasing difficulty (easy → medium → hard).
"""
from difflib import SequenceMatcher
from typing import Any, Optional
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
try:
from ..models import DocEditAction, DocEditObservation
except ImportError:
from models import DocEditAction, DocEditObservation
# ---------------------------------------------------------------------------
# Task definitions: source doc, target doc, description, max steps
# ---------------------------------------------------------------------------
TASKS = {
"easy_word_replace": {
"source": (
"<p>The company's annual revnue exceeded expectations this quarter.</p>\n"
"<p>Our clints have expressed strong satisfcation with the new product line.</p>\n"
"<p>The managment team will present the quartely results on Friday.</p>"
),
"target": (
"<p>The company's annual revenue exceeded expectations this quarter.</p>\n"
"<p>Our clients have expressed strong satisfaction with the new product line.</p>\n"
"<p>The management team will present the quarterly results on Friday.</p>"
),
"description": (
"Fix spelling errors in this business report. The document contains typos: "
"'revnue' should be 'revenue', 'clints' should be 'clients', "
"'satisfcation' should be 'satisfaction', 'managment' should be 'management', "
"'quartely' should be 'quarterly'."
),
"max_steps": 10,
},
"medium_paragraph_edit": {
"source": (
"<p>MEMORANDUM</p>\n"
"<p>To: All Staff</p>\n"
"<p>From: Human Resources</p>\n"
"<p>Subject: Updated Remote Work Policy</p>\n"
"<p>Effective immediately, all employees may work remotely up to three days per week.</p>\n"
"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
),
"target": (
"<p>MEMORANDUM</p>\n"
"<p>To: All Staff</p>\n"
"<p>From: Human Resources</p>\n"
"<p>Date: April 2026</p>\n"
"<p>Subject: Updated Remote Work Policy</p>\n"
"<p>Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.</p>\n"
"<p>Remote work days must not fall on team meeting days (Tuesday and Thursday).</p>\n"
"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
),
"description": (
"Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', "
"(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', "
"(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'"
),
"max_steps": 15,
},
"hard_multi_edit": {
"source": (
"<p>CONTRACT AMENDMENT NO. 3</p>\n"
"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
"<p>WHEREAS the original agremeent dated January 2024 established terms for software development services;</p>\n"
"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
"<p>NOW THEREFORE the parties agree as follows:</p>\n"
"<p>1. The delivrey schedule in Section 4.2 is extended by 90 days.</p>\n"
"<p>2. The total contract value remains unchanged at $500,000.</p>\n"
"<p>3. All other terms and conditions of the original agreeement remain in full force.</p>\n"
"<p>This amendment shall be effective upon execution by both parties.</p>\n"
"<p>OBSOLETE CLAUSE: This section is no longer applicable and should be removed.</p>"
),
"target": (
"<p>CONTRACT AMENDMENT NO. 3</p>\n"
"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
"<p>WHEREAS the original agreement dated January 2024 established terms for software development services;</p>\n"
"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
"<p>NOW THEREFORE the parties agree as follows:</p>\n"
"<p>1. The delivery schedule in Section 4.2 is extended by 90 days.</p>\n"
"<p>2. The total contract value is hereby increased to $750,000 to reflect additional scope.</p>\n"
"<p>3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'</p>\n"
"<p>4. All other terms and conditions of the original agreement remain in full force.</p>\n"
"<p>This amendment shall be effective upon execution by both parties.</p>"
),
"description": (
"Edit this legal contract amendment: "
"(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, "
"(2) Fix 'delivrey' to 'delivery' in clause 1, "
"(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', "
"(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', "
"(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', "
"(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely."
),
"max_steps": 20,
},
}
TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"]
def compute_similarity(a: str, b: str) -> float:
"""Normalized SequenceMatcher ratio between two strings."""
return SequenceMatcher(None, a, b).ratio()
class DocEditEnvironment(Environment):
"""
Document editing RL environment.
The agent receives a source document with XML paragraph tags and must
transform it to match a target document through edit operations.
Reward is the incremental improvement in similarity to the target.
"""
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self, task_name: str = "easy_word_replace"):
self._task_name = task_name if task_name in TASKS else "easy_word_replace"
self._task = TASKS[self._task_name]
self._document = ""
self._target = ""
self._prev_similarity = 0.0
self._max_steps = self._task["max_steps"]
self._state = State(episode_id=str(uuid4()), step_count=0)
def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation:
task_name = kwargs.get("task_name", self._task_name)
if task_name in TASKS:
self._task_name = task_name
self._task = TASKS[self._task_name]
self._max_steps = self._task["max_steps"]
self._document = self._task["source"]
self._target = self._task["target"]
self._prev_similarity = compute_similarity(self._document, self._target)
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
return DocEditObservation(
document=self._document,
target_description=self._task["description"],
similarity=self._prev_similarity,
task_name=self._task_name,
steps_remaining=self._max_steps,
done=False,
reward=0.0,
)
def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation:
self._state.step_count += 1
op = action.operation.lower().strip()
if op == "replace" and action.target:
self._document = self._document.replace(action.target, action.content, 1)
elif op == "insert":
paragraphs = self._document.split("\n")
new_para = action.content if action.content.startswith("<p>") else f"<p>{action.content}</p>"
pos = action.position
if pos < 0 or pos >= len(paragraphs):
paragraphs.append(new_para)
else:
paragraphs.insert(pos, new_para)
self._document = "\n".join(paragraphs)
elif op == "delete" and action.target:
# Delete the line containing the target text
lines = self._document.split("\n")
lines = [l for l in lines if action.target not in l]
self._document = "\n".join(lines)
new_sim = compute_similarity(self._document, self._target)
reward = new_sim - self._prev_similarity # positive if improving
self._prev_similarity = new_sim
steps_left = self._max_steps - self._state.step_count
done = (new_sim >= 0.999) or (steps_left <= 0)
# Bonus for completing the task
if new_sim >= 0.999:
reward += 0.5
return DocEditObservation(
document=self._document,
target_description=self._task["description"],
similarity=new_sim,
task_name=self._task_name,
steps_remaining=max(steps_left, 0),
done=done,
reward=round(reward, 4),
metadata={
"step": self._state.step_count,
"operation": op,
"exact_match": new_sim >= 0.999,
},
)
@property
def state(self) -> State:
return self._state
|