Spaces:

sanjuhs
/

doc-edit-env

Sleeping

App Files Files Community

doc-edit-env / server /doc_edit_environment.py

sanjuhs

Upload folder using huggingface_hub

06165d0 verified about 2 months ago

raw

history blame contribute delete

10 kB

	"""
	DocEdit Environment — an RL environment for structured document editing.

	Agents learn to transform source documents into target documents through
	replace, insert, and delete operations on XML-tagged paragraph content.
	Three tasks with increasing difficulty (easy → medium → hard).
	"""

	from difflib import SequenceMatcher
	from typing import Any, Optional
	from uuid import uuid4

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	try:
	from ..models import DocEditAction, DocEditObservation
	except ImportError:
	from models import DocEditAction, DocEditObservation


	# ---------------------------------------------------------------------------
	# Task definitions: source doc, target doc, description, max steps
	# ---------------------------------------------------------------------------

	TASKS = {
	"easy_word_replace": {
	"source": (
	"<p>The company's annual revnue exceeded expectations this quarter.</p>\n"
	"<p>Our clints have expressed strong satisfcation with the new product line.</p>\n"
	"<p>The managment team will present the quartely results on Friday.</p>"
	),
	"target": (
	"<p>The company's annual revenue exceeded expectations this quarter.</p>\n"
	"<p>Our clients have expressed strong satisfaction with the new product line.</p>\n"
	"<p>The management team will present the quarterly results on Friday.</p>"
	),
	"description": (
	"Fix spelling errors in this business report. The document contains typos: "
	"'revnue' should be 'revenue', 'clints' should be 'clients', "
	"'satisfcation' should be 'satisfaction', 'managment' should be 'management', "
	"'quartely' should be 'quarterly'."
	),
	"max_steps": 10,
	},
	"medium_paragraph_edit": {
	"source": (
	"<p>MEMORANDUM</p>\n"
	"<p>To: All Staff</p>\n"
	"<p>From: Human Resources</p>\n"
	"<p>Subject: Updated Remote Work Policy</p>\n"
	"<p>Effective immediately, all employees may work remotely up to three days per week.</p>\n"
	"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
	),
	"target": (
	"<p>MEMORANDUM</p>\n"
	"<p>To: All Staff</p>\n"
	"<p>From: Human Resources</p>\n"
	"<p>Date: April 2026</p>\n"
	"<p>Subject: Updated Remote Work Policy</p>\n"
	"<p>Effective immediately, all employees may work remotely up to three days per week. Employees must ensure reliable internet connectivity and a dedicated workspace.</p>\n"
	"<p>Remote work days must not fall on team meeting days (Tuesday and Thursday).</p>\n"
	"<p>Please submit your preferred schedule to your direct manager by end of month.</p>"
	),
	"description": (
	"Edit this office memorandum: (1) Insert a 'Date: April 2026' paragraph after 'From: Human Resources', "
	"(2) Append to the remote work paragraph: ' Employees must ensure reliable internet connectivity and a dedicated workspace.', "
	"(3) Insert a new paragraph before the last paragraph: 'Remote work days must not fall on team meeting days (Tuesday and Thursday).'"
	),
	"max_steps": 15,
	},
	"hard_multi_edit": {
	"source": (
	"<p>CONTRACT AMENDMENT NO. 3</p>\n"
	"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
	"<p>WHEREAS the original agremeent dated January 2024 established terms for software development services;</p>\n"
	"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
	"<p>NOW THEREFORE the parties agree as follows:</p>\n"
	"<p>1. The delivrey schedule in Section 4.2 is extended by 90 days.</p>\n"
	"<p>2. The total contract value remains unchanged at $500,000.</p>\n"
	"<p>3. All other terms and conditions of the original agreeement remain in full force.</p>\n"
	"<p>This amendment shall be effective upon execution by both parties.</p>\n"
	"<p>OBSOLETE CLAUSE: This section is no longer applicable and should be removed.</p>"
	),
	"target": (
	"<p>CONTRACT AMENDMENT NO. 3</p>\n"
	"<p>This amendment is entered into between Party A (hereinafter 'the Vendor') and Party B (hereinafter 'the Client').</p>\n"
	"<p>WHEREAS the original agreement dated January 2024 established terms for software development services;</p>\n"
	"<p>WHEREAS both parties wish to modify certain terms of the agreement;</p>\n"
	"<p>NOW THEREFORE the parties agree as follows:</p>\n"
	"<p>1. The delivery schedule in Section 4.2 is extended by 90 days.</p>\n"
	"<p>2. The total contract value is hereby increased to $750,000 to reflect additional scope.</p>\n"
	"<p>3. A new Section 5.1 is added: 'Vendor shall provide monthly progress reports to Client.'</p>\n"
	"<p>4. All other terms and conditions of the original agreement remain in full force.</p>\n"
	"<p>This amendment shall be effective upon execution by both parties.</p>"
	),
	"description": (
	"Edit this legal contract amendment: "
	"(1) Fix 'agremeent' to 'agreement' in the WHEREAS clause, "
	"(2) Fix 'delivrey' to 'delivery' in clause 1, "
	"(3) Replace clause 2 text with: 'The total contract value is hereby increased to $750,000 to reflect additional scope.', "
	"(4) Replace clause 3 text with: 'A new Section 5.1 is added: \\'Vendor shall provide monthly progress reports to Client.\\'', "
	"(5) Renumber old clause 3 as clause 4 and fix 'agreeement' to 'agreement', "
	"(6) Delete the 'OBSOLETE CLAUSE' paragraph entirely."
	),
	"max_steps": 20,
	},
	}

	TASK_ORDER = ["easy_word_replace", "medium_paragraph_edit", "hard_multi_edit"]


	def compute_similarity(a: str, b: str) -> float:
	"""Normalized SequenceMatcher ratio between two strings."""
	return SequenceMatcher(None, a, b).ratio()


	class DocEditEnvironment(Environment):
	"""
	Document editing RL environment.

	The agent receives a source document with XML paragraph tags and must
	transform it to match a target document through edit operations.
	Reward is the incremental improvement in similarity to the target.
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self, task_name: str = "easy_word_replace"):
	self._task_name = task_name if task_name in TASKS else "easy_word_replace"
	self._task = TASKS[self._task_name]
	self._document = ""
	self._target = ""
	self._prev_similarity = 0.0
	self._max_steps = self._task["max_steps"]
	self._state = State(episode_id=str(uuid4()), step_count=0)

	def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> DocEditObservation:
	task_name = kwargs.get("task_name", self._task_name)
	if task_name in TASKS:
	self._task_name = task_name
	self._task = TASKS[self._task_name]
	self._max_steps = self._task["max_steps"]

	self._document = self._task["source"]
	self._target = self._task["target"]
	self._prev_similarity = compute_similarity(self._document, self._target)
	self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)

	return DocEditObservation(
	document=self._document,
	target_description=self._task["description"],
	similarity=self._prev_similarity,
	task_name=self._task_name,
	steps_remaining=self._max_steps,
	done=False,
	reward=0.0,
	)

	def step(self, action: DocEditAction, timeout_s: Optional[float] = None, **kwargs: Any) -> DocEditObservation:
	self._state.step_count += 1
	op = action.operation.lower().strip()

	if op == "replace" and action.target:
	self._document = self._document.replace(action.target, action.content, 1)
	elif op == "insert":
	paragraphs = self._document.split("\n")
	new_para = action.content if action.content.startswith("<p>") else f"<p>{action.content}</p>"
	pos = action.position
	if pos < 0 or pos >= len(paragraphs):
	paragraphs.append(new_para)
	else:
	paragraphs.insert(pos, new_para)
	self._document = "\n".join(paragraphs)
	elif op == "delete" and action.target:
	# Delete the line containing the target text
	lines = self._document.split("\n")
	lines = [l for l in lines if action.target not in l]
	self._document = "\n".join(lines)

	new_sim = compute_similarity(self._document, self._target)
	reward = new_sim - self._prev_similarity # positive if improving
	self._prev_similarity = new_sim

	steps_left = self._max_steps - self._state.step_count
	done = (new_sim >= 0.999) or (steps_left <= 0)

	# Bonus for completing the task
	if new_sim >= 0.999:
	reward += 0.5

	return DocEditObservation(
	document=self._document,
	target_description=self._task["description"],
	similarity=new_sim,
	task_name=self._task_name,
	steps_remaining=max(steps_left, 0),
	done=done,
	reward=round(reward, 4),
	metadata={
	"step": self._state.step_count,
	"operation": op,
	"exact_match": new_sim >= 0.999,
	},
	)

	@property
	def state(self) -> State:
	return self._state