Spaces:

rycerzes
/

qed-math-openenv

Sleeping

sourasishbasu

add .gitattributes and normalize line endings to LF

2e3721b about 2 months ago

2.68 kB

	"""
	Data models for the QED Math Environment.

	Defines action and observation types for mathematical proof submission
	and grading.
	"""

	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field

	RewardValue = bool \| int \| float \| None


	class QEDMathAction(Action):
	"""Base action for the QED Math environment."""


	class SubmitProof(QEDMathAction):
	"""Submit a proof attempt for the current problem."""

	proof: str = Field(..., description="The proof text submitted by agent")
	attempt_number: int = Field(default=1, description="Attempt counter")


	class GetProblem(QEDMathAction):
	"""Request the current problem statement."""


	class GetGradingGuidelines(QEDMathAction):
	"""Request the grading guidelines/rubric for current problem."""


	class QEDMathObservation(Observation):
	"""Base observation for the QED Math environment."""


	class ProblemObservation(QEDMathObservation):
	"""Observation containing the problem statement."""

	problem: str = Field(default="", description="The mathematical problem")
	reference_solution: str = Field(default="", description="Ground truth solution")
	grading_guidelines: str = Field(default="", description="Rubric for grading (0-7 scale)")
	problem_id: str = Field(default="", description="Unique problem identifier")
	dataset_source: str = Field(default="", description="Source dataset name")
	problem_type: str = Field(
	default="proof",
	description="Problem type: proof, answer, or multi_step",
	)
	max_attempts: int = Field(
	default=1,
	description="Maximum number of allowed submission attempts",
	)


	class ProofSubmissionObservation(QEDMathObservation):
	"""Observation returned after submitting a proof."""

	proof: str = Field(default="", description="The submitted proof")
	score: int = Field(default=0, description="Grade from rubric (0-7)")
	feedback: str = Field(default="", description="Grader feedback")
	reward: RewardValue = Field(
	default=0.0,
	description="Normalized reward (score/7)",
	)
	done: bool = Field(default=True, description="Episode ends after proof submission")
	problem_type: str = Field(
	default="proof",
	description="Problem type used to evaluate this submission",
	)
	attempt_number: int = Field(default=1, description="1-based submission attempt index")
	attempts_remaining: int = Field(
	default=0,
	description="Remaining submission attempts in the current episode",
	)
	is_correct: bool = Field(
	default=False,
	description="Whether the submission is considered fully correct",
	)