File size: 10,001 Bytes
06165d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""
DocEdit Environment — an RL environment for structured document editing.

Agents learn to transform source documents into target documents through
replace, insert, and delete operations on XML-tagged paragraph content.
Three tasks with increasing difficulty (easy → medium → hard).
"""

from difflib import SequenceMatcher
from typing import Any, Optional
from uuid import uuid4

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..models import DocEditAction, DocEditObservation
except ImportError:
    from models import DocEditAction, DocEditObservation


# ---------------------------------------------------------------------------
# Task definitions: source doc, target doc, description, max steps
# ---------------------------------------------------------------------------

TASKS = {
    "easy_word_replace": {
        "source": (
            "<p>The company's annual revnue exceeded expectations this quarter.</p>\n"
            "<p>Our clints have expressed strong satisfcation with the new product line.</p>\n"
            "<p>The managment team will present the quartely results on Friday.</p>"
        ),
        "target": (
            "<p>The company's annual revenue exceeded expectations this quarter.</p>\n"
            "<p>Our clients have expressed strong satisfaction with the new product line.</p>\n"
            "<p>The management team will present the quarterly results on Friday.</p>"
        ),
        "description": (
            "Fix spelling errors in this business report. The document contains typos: "
            "'revnue' should be 'revenue', 'clints' should be 'clients', "
            "'satisfcation' should be 'satisfaction', 'managment' should be 'management', "
            "'quartely' should be 'quarterly'."
        ),
        "max_steps": 10,
    },
    "medium_paragraph_edit": {
        "source": (
            "<p>MEMORANDUM</p>\n"
            "<p>To: All Staff</p>\n"
            "<p>From: Human Resources</p>\n"
            "<p>Subject: Updated Remote Work Policy</p>\n"
            "<p>Effective immediately, all employees may work remotely up to three days per week.</p>\n"
            "<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
        ),
        "target": (
            "<p>MEMORANDUM</p>\n"
            "<p>To: All Staff</p>\n"
            "<p>From: Human Resources</p>\n"
            "<p>Date: April 2026</p>\n"
            "<p>Subject: Updated Remote Work Policy</p>\n"
            "<p>Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.</p>\n"
            "<p>Remote work days must not fall on team meeting days (Tuesday and Thursday).</p>\n"
            "<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
        ),
        "description": (
            "Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', "
            "(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', "
            "(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'"
        ),
        "max_steps": 15,
    },
    "hard_multi_edit": {
        "source": (
            "<p>CONTRACT AMENDMENT NO. 3</p>\n"
            "<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
            "<p>WHEREAS the original agremeent dated January 2024 established terms for software development services;</p>\n"
            "<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
            "<p>NOW THEREFORE the parties agree as follows:</p>\n"
            "<p>1. The delivrey schedule in Section 4.2 is extended by 90 days.</p>\n"
            "<p>2. The total contract value remains unchanged at $500,000.</p>\n"
            "<p>3. All other terms and conditions of the original agreeement remain in full force.</p>\n"
            "<p>This amendment shall be effective upon execution by both parties.</p>\n"
            "<p>OBSOLETE CLAUSE: This section is no longer applicable and should be removed.</p>"
        ),
        "target": (
            "<p>CONTRACT AMENDMENT NO. 3</p>\n"
            "<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
            "<p>WHEREAS the original agreement dated January 2024 established terms for software development services;</p>\n"
            "<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
            "<p>NOW THEREFORE the parties agree as follows:</p>\n"
            "<p>1. The delivery schedule in Section 4.2 is extended by 90 days.</p>\n"
            "<p>2. The total contract value is hereby increased to $750,000 to reflect additional scope.</p>\n"
            "<p>3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'</p>\n"
            "<p>4. All other terms and conditions of the original agreement remain in full force.</p>\n"
            "<p>This amendment shall be effective upon execution by both parties.</p>"
        ),
        "description": (
            "Edit this legal contract amendment: "
            "(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, "
            "(2) Fix 'delivrey' to 'delivery' in clause 1, "
            "(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', "
            "(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', "
            "(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', "
            "(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely."
        ),
        "max_steps": 20,
    },
}

TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"]


def compute_similarity(a: str, b: str) -> float:
    """Normalized SequenceMatcher ratio between two strings."""
    return SequenceMatcher(None, a, b).ratio()


class DocEditEnvironment(Environment):
    """
    Document editing RL environment.

    The agent receives a source document with XML paragraph tags and must
    transform it to match a target document through edit operations.
    Reward is the incremental improvement in similarity to the target.
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self, task_name: str = "easy_word_replace"):
        self._task_name = task_name if task_name in TASKS else "easy_word_replace"
        self._task = TASKS[self._task_name]
        self._document = ""
        self._target = ""
        self._prev_similarity = 0.0
        self._max_steps = self._task["max_steps"]
        self._state = State(episode_id=str(uuid4()), step_count=0)

    def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation:
        task_name = kwargs.get("task_name", self._task_name)
        if task_name in TASKS:
            self._task_name = task_name
            self._task = TASKS[self._task_name]
            self._max_steps = self._task["max_steps"]

        self._document = self._task["source"]
        self._target = self._task["target"]
        self._prev_similarity = compute_similarity(self._document, self._target)
        self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)

        return DocEditObservation(
            document=self._document,
            target_description=self._task["description"],
            similarity=self._prev_similarity,
            task_name=self._task_name,
            steps_remaining=self._max_steps,
            done=False,
            reward=0.0,
        )

    def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation:
        self._state.step_count += 1
        op = action.operation.lower().strip()

        if op == "replace" and action.target:
            self._document = self._document.replace(action.target, action.content, 1)
        elif op == "insert":
            paragraphs = self._document.split("\n")
            new_para = action.content if action.content.startswith("<p>") else f"<p>{action.content}</p>"
            pos = action.position
            if pos < 0 or pos >= len(paragraphs):
                paragraphs.append(new_para)
            else:
                paragraphs.insert(pos, new_para)
            self._document = "\n".join(paragraphs)
        elif op == "delete" and action.target:
            # Delete the line containing the target text
            lines = self._document.split("\n")
            lines = [l for l in lines if action.target not in l]
            self._document = "\n".join(lines)

        new_sim = compute_similarity(self._document, self._target)
        reward = new_sim - self._prev_similarity  # positive if improving
        self._prev_similarity = new_sim

        steps_left = self._max_steps - self._state.step_count
        done = (new_sim >= 0.999) or (steps_left <= 0)

        # Bonus for completing the task
        if new_sim >= 0.999:
            reward += 0.5

        return DocEditObservation(
            document=self._document,
            target_description=self._task["description"],
            similarity=new_sim,
            task_name=self._task_name,
            steps_remaining=max(steps_left, 0),
            done=done,
            reward=round(reward, 4),
            metadata={
                "step": self._state.step_count,
                "operation": op,
                "exact_match": new_sim >= 0.999,
            },
        )

    @property
    def state(self) -> State:
        return self._state