qed-math-openenv / models.py
sourasishbasu
add .gitattributes and normalize line endings to LF
2e3721b
"""
Data models for the QED Math Environment.
Defines action and observation types for mathematical proof submission
and grading.
"""
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
RewardValue = bool | int | float | None
class QEDMathAction(Action):
"""Base action for the QED Math environment."""
class SubmitProof(QEDMathAction):
"""Submit a proof attempt for the current problem."""
proof: str = Field(..., description="The proof text submitted by agent")
attempt_number: int = Field(default=1, description="Attempt counter")
class GetProblem(QEDMathAction):
"""Request the current problem statement."""
class GetGradingGuidelines(QEDMathAction):
"""Request the grading guidelines/rubric for current problem."""
class QEDMathObservation(Observation):
"""Base observation for the QED Math environment."""
class ProblemObservation(QEDMathObservation):
"""Observation containing the problem statement."""
problem: str = Field(default="", description="The mathematical problem")
reference_solution: str = Field(default="", description="Ground truth solution")
grading_guidelines: str = Field(default="", description="Rubric for grading (0-7 scale)")
problem_id: str = Field(default="", description="Unique problem identifier")
dataset_source: str = Field(default="", description="Source dataset name")
problem_type: str = Field(
default="proof",
description="Problem type: proof, answer, or multi_step",
)
max_attempts: int = Field(
default=1,
description="Maximum number of allowed submission attempts",
)
class ProofSubmissionObservation(QEDMathObservation):
"""Observation returned after submitting a proof."""
proof: str = Field(default="", description="The submitted proof")
score: int = Field(default=0, description="Grade from rubric (0-7)")
feedback: str = Field(default="", description="Grader feedback")
reward: RewardValue = Field(
default=0.0,
description="Normalized reward (score/7)",
)
done: bool = Field(default=True, description="Episode ends after proof submission")
problem_type: str = Field(
default="proof",
description="Problem type used to evaluate this submission",
)
attempt_number: int = Field(default=1, description="1-based submission attempt index")
attempts_remaining: int = Field(
default=0,
description="Remaining submission attempts in the current episode",
)
is_correct: bool = Field(
default=False,
description="Whether the submission is considered fully correct",
)