Spaces:

madhurithika22
/

spec-guard

Sleeping

App Files Files Community

spec-guard / server /env.py

madhurithika22

Upload folder using huggingface_hub

5de2623 verified about 2 months ago

raw

history blame contribute delete

3.76 kB

	from uuid import uuid4
	from typing import List, Any, Dict
	import random

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from .models import SpecGamingAction, SpecGamingObservation
	# Note: We import TASKS from .tasks to keep a single source of truth
	from .tasks import (
	TASKS,
	grade_data_cleaning,
	grade_financial,
	grade_instruction,
	normalize_score
	)

	# =========================
	# 🌍 ENVIRONMENT
	# =========================
	class SpecGamingEnvironment(Environment):
	"""
	OpenEnv compliant environment for SpecGuard tasks.
	"""
	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	tasks = TASKS

	def __init__(self):
	super().__init__()
	self.tasks = TASKS

	self._state = State(episode_id=str(uuid4()), step_count=0)
	self.current_task = None
	self.task_index = 0

	# Internal mapping for string-based grader lookups if needed
	self._grader_map = {
	"grade_data_cleaning": grade_data_cleaning,
	"grade_financial": grade_financial,
	"grade_instruction": grade_instruction
	}

	# -------------------------
	# RESET
	# -------------------------
	def reset(self) -> SpecGamingObservation:
	"""
	Resets the environment and cycles to the next task.
	"""
	self._state = State(episode_id=str(uuid4()), step_count=0)

	# 🔥 Cycle through tasks to ensure all 3 are validated by the agent
	self.current_task = self.tasks[self.task_index % len(self.tasks)]
	self.task_index += 1

	return SpecGamingObservation(
	task=self.current_task["name"],
	input_data=self.current_task["input"],
	instruction=self.current_task["instruction"],
	# Initial reward must be > 0.0 and < 1.0 for Phase 2 compliance
	reward=0.10,
	done=False,
	metadata={
	"task_id": self.current_task["id"],
	"required_steps": self.current_task.get("required_steps", [])
	}
	)

	# -------------------------
	# STEP
	# -------------------------
	def step(self, action: SpecGamingAction) -> SpecGamingObservation:
	self._state.step_count += 1

	try:
	grader = self.current_task.get("grader")

	# Use the map only as a fallback for strings, otherwise call directly
	if isinstance(grader, str):
	reward = self._grader_map.get(grader, lambda x: 0.10)(action)
	elif callable(grader):
	reward = grader(action)
	else:
	reward = 0.10

	reason = "graded via task grader"
	except Exception as e:
	reward = 0.10
	reason = f"grader error: {str(e)}"

	return SpecGamingObservation(
	task=self.current_task["name"],
	input_data=self.current_task["input"],
	instruction=self.current_task["instruction"],
	reward=float(reward),
	done=True,
	metadata={
	"reason": reason,
	"steps": action.steps,
	"output": action.output,
	"step_count": self._state.step_count
	}
	)

	# -------------------------
	# STATE
	# -------------------------
	@property
	def state(self) -> State:
	return self._state


	# =========================
	# 🔥 EXPORTS (CRITICAL)
	# =========================
	__all__ = [
	"SpecGamingEnvironment",
	"TASKS"
	]