Spaces:

rycerzes
/

qed-math-openenv

Sleeping

App Files Files Community

sourasishbasu commited on Apr 2

Commit

9d852e5

1 Parent(s): 9542047

QED Math Environment

Browse files

Files changed (10) hide show

.dockerignore +13 -0
README.md +47 -0
__init__.py +29 -0
client.py +146 -0
models.py +87 -0
openenv.yaml +83 -0
pyproject.toml +35 -0
server/Dockerfile +68 -0
server/__init__.py +12 -0
uv.lock +17 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.venv
+.git
+.gitignore
+.env
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
+*.pywz
+*.pyzw
+*.pyzwz

README.md CHANGED Viewed

	@@ -0,0 +1,47 @@

+# QED Math Environment
+A mathematical proof generation and evaluation environment for OpenEnv.
+## Features
+- **MCP Tools**: Agent interacts via MCP (Model Context Protocol)
+  - `get_problem`: Get current problem statement and metadata
+  - `submit_proof`: Submit proof for LLM-judge rubric grading (0-7 scale)
+  - `get_grading_guidelines`: Get grading rubric for current problem
+- **LLM-Judge Rubric**: Proofs graded on 0-7 scale with normalized rewards
+- **Answer-mode verification**: Uses `math_verify` for fast \\boxed{} checking
+- **Reward shaping**: Discount factor, length penalty, optional score thresholding
+- **Flexible datasets**: Local JSONL/JSON, Hugging Face Hub, or built-in bootstrap
+## Quick Start
+```bash
+# Install
+uv sync --all-extras
+# Run server
+uv run server
+# Or via Docker
+docker build -t qed-math-env:latest -f server/Dockerfile .
+docker run -p 8000:8000 -e OPENAI_API_KEY=$OPENAI_API_KEY qed-math-env:latest
+```
+## Usage
+```python
+from qed_math_env import QEDMathEnv
+with QEDMathEnv(base_url="http://localhost:8000") as env:
+    env.reset()
+    problem = env.call_tool("get_problem")
+    result = env.call_tool("submit_proof", proof="Let a=2m..."
+```
+## Testing
+```bash
+PYTHONPATH=src:envs uv run pytest tests/envs/test_qed_math_environment.py -v
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""QED Math Environment."""
+from .client import QEDMathEnv
+from .models import (
+    GetGradingGuidelines,
+    GetProblem,
+    ProblemObservation,
+    ProofSubmissionObservation,
+    QEDMathAction,
+    QEDMathObservation,
+    SubmitProof,
+)
+__all__ = [
+    "QEDMathAction",
+    "QEDMathObservation",
+    "QEDMathEnv",
+    "SubmitProof",
+    "GetProblem",
+    "GetGradingGuidelines",
+    "ProblemObservation",
+    "ProofSubmissionObservation",
+]

client.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""QED Math Environment Client.
+Provides tool-calling style interactions with the QED Math environment
+via MCP (Model Context Protocol).
+Example:
+    >>> with QEDMathEnv(base_url="http://localhost:8000") as env:
+    ...     env.reset()
+    ...     tools = env.list_tools()
+    ...     print([t.name for t in tools])
+    ...     result = env.call_tool("get_problem")
+    ...     result = env.call_tool("submit_proof", proof="By induction...")
+"""
+from typing import Any, Mapping, Optional
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import Observation, State
+from openenv.core.mcp_client import MCPToolClient
+from .models import ProblemObservation, ProofSubmissionObservation
+class QEDMathEnv(MCPToolClient):
+    """
+    Client for the QED Math Environment.
+    Inherits MCP tool-calling interface from MCPToolClient:
+    - ``list_tools()``: Discover available MCP tools
+    - ``call_tool(name, **kwargs)``: Call a tool by name
+    - ``reset(**kwargs)``: Reset the environment
+    Example:
+        >>> with QEDMathEnv(base_url="http://localhost:8000") as env:
+        ...     env.reset()
+        ...     result = env.call_tool("get_problem")
+        ...     result = env.call_tool("submit_proof", proof="By induction...")
+    """
+    @staticmethod
+    def _as_problem_observation(value: Any) -> ProblemObservation:
+        """Normalize tool/reset outputs into a ProblemObservation instance."""
+        if isinstance(value, ProblemObservation):
+            return value
+        if isinstance(value, Mapping):
+            return ProblemObservation(**dict(value))
+        if hasattr(value, "model_dump"):
+            return ProblemObservation(**value.model_dump())
+        raise TypeError(f"Unsupported problem observation payload type: {type(value).__name__}")
+    @staticmethod
+    def _as_proof_submission_observation(value: Any) -> ProofSubmissionObservation:
+        """Normalize tool outputs into a ProofSubmissionObservation instance."""
+        if isinstance(value, ProofSubmissionObservation):
+            return value
+        if isinstance(value, Mapping):
+            return ProofSubmissionObservation(**dict(value))
+        if hasattr(value, "model_dump"):
+            return ProofSubmissionObservation(**value.model_dump())
+        raise TypeError(f"Unsupported proof submission payload type: {type(value).__name__}")
+    async def reset(
+        self, problem_id: Optional[str] = None, **kwargs: Any
+    ) -> StepResult[Observation]:
+        """
+        Reset the environment, optionally selecting a specific problem.
+        Args:
+            problem_id: Optional problem identifier to load a specific problem.
+                        If None, a problem is chosen randomly from the dataset.
+            **kwargs: Additional reset parameters (e.g., seed).
+        Returns:
+            StepResult with a normalized ProblemObservation in `observation`.
+        """
+        if problem_id is not None:
+            kwargs["problem_id"] = problem_id
+        result = await super().reset(**kwargs)
+        observation = result.observation if isinstance(result, StepResult) else result
+        normalized_observation = self._as_problem_observation(observation)
+        return StepResult(
+            observation=normalized_observation,
+            reward=result.reward,
+            done=result.done,
+        )
+    async def submit_proof(self, proof: str) -> ProofSubmissionObservation:
+        """
+        Submit a proof attempt for the current problem.
+        Args:
+            proof: The proof text to submit for grading.
+        Returns:
+            ProofSubmissionObservation with score (0-7), feedback, and reward.
+        """
+        result = await self.call_tool("submit_proof", proof=proof)
+        return self._as_proof_submission_observation(result)
+    async def get_current_problem(self) -> ProblemObservation:
+        """
+        Retrieve the current problem statement without resetting.
+        Returns:
+            ProblemObservation for the active problem.
+        """
+        result = await self.call_tool("get_problem")
+        return self._as_problem_observation(result)
+    async def get_problem(self) -> ProblemObservation:
+        """Compatibility alias for get_current_problem()."""
+        return await self.get_current_problem()
+    async def get_grading_feedback(self) -> dict[str, Any]:
+        """
+        Retrieve the grading guidelines/rubric for the current problem.
+        Returns:
+            Tool payload containing grading_guidelines and problem metadata.
+        """
+        result = await self.call_tool("get_grading_guidelines")
+        if isinstance(result, Mapping):
+            return dict(result)
+        if hasattr(result, "model_dump"):
+            return result.model_dump()
+        raise TypeError(f"Unsupported grading feedback payload type: {type(result).__name__}")
+    async def get_state(self) -> State:
+        """Return current environment state (episode_id, step_count)."""
+        return await super().state()
+    def get_state_sync(self) -> State:
+        """Synchronous helper for code paths that do not use async/await."""
+        with self.sync() as client:
+            return client.state()

models.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data models for the QED Math Environment.
+Defines action and observation types for mathematical proof submission
+and grading.
+"""
+from openenv.core.env_server.types import Action, Observation
+from pydantic import Field
+RewardValue = bool | int | float | None
+class QEDMathAction(Action):
+    """Base action for the QED Math environment."""
+class SubmitProof(QEDMathAction):
+    """Submit a proof attempt for the current problem."""
+    proof: str = Field(..., description="The proof text submitted by agent")
+    attempt_number: int = Field(default=1, description="Attempt counter")
+class GetProblem(QEDMathAction):
+    """Request the current problem statement."""
+class GetGradingGuidelines(QEDMathAction):
+    """Request the grading guidelines/rubric for current problem."""
+class QEDMathObservation(Observation):
+    """Base observation for the QED Math environment."""
+class ProblemObservation(QEDMathObservation):
+    """Observation containing the problem statement."""
+    problem: str = Field(default="", description="The mathematical problem")
+    reference_solution: str = Field(default="", description="Ground truth solution")
+    grading_guidelines: str = Field(
+        default="", description="Rubric for grading (0-7 scale)"
+    )
+    problem_id: str = Field(default="", description="Unique problem identifier")
+    dataset_source: str = Field(default="", description="Source dataset name")
+    problem_type: str = Field(
+        default="proof",
+        description="Problem type: proof, answer, or multi_step",
+    )
+    max_attempts: int = Field(
+        default=1,
+        description="Maximum number of allowed submission attempts",
+    )
+class ProofSubmissionObservation(QEDMathObservation):
+    """Observation returned after submitting a proof."""
+    proof: str = Field(default="", description="The submitted proof")
+    score: int = Field(default=0, description="Grade from rubric (0-7)")
+    feedback: str = Field(default="", description="Grader feedback")
+    reward: RewardValue = Field(
+        default=0.0,
+        description="Normalized reward (score/7)",
+    )
+    done: bool = Field(default=True, description="Episode ends after proof submission")
+    problem_type: str = Field(
+        default="proof",
+        description="Problem type used to evaluate this submission",
+    )
+    attempt_number: int = Field(default=1, description="1-based submission attempt index")
+    attempts_remaining: int = Field(
+        default=0,
+        description="Remaining submission attempts in the current episode",
+    )
+    is_correct: bool = Field(
+        default=False,
+        description="Whether the submission is considered fully correct",
+    )

openenv.yaml ADDED Viewed

	@@ -0,0 +1,83 @@

+spec_version: 1
+name: qed_math_env
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000
+description: >
+  Mathematical proof generation and evaluation environment.
+  Agents receive math problems, submit proofs, and receive LLM-based
+  rubric grading (0-7 scale) with normalized rewards.
+  Answer-mode uses a process-based verifier service for concurrent rollout
+  safety (timeouts, retries, backpressure, worker restart).
+version: "0.1.0"
+# Environment configuration defaults (overridable via QEDMathConfig)
+environment:
+  grader_model: gemini-3-pro
+  prompt_name: v2
+  custom_reward_threshold: false
+  discount_factor: 1.0
+  buffer_tokens: 0
+  max_tokens: 0
+  verifier_workers: 4
+  verifier_queue_size: 128
+  verifier_request_timeout_seconds: 5.0
+  verifier_max_retries: 1
+  verifier_strict: true
+  verifier_numeric_precision: 5
+  verifier_float_rounding: 10
+# Rubric definition
+rubric:
+  type: llm_judge
+  name: MathProofRubric
+  scale: "0-7"
+  normalization: "score / 7.0"
+  thresholding: "optional (collapses 1-5 -> 1)"
+# MCP tools exposed by this environment
+tools:
+  - name: get_problem
+    description: Return current problem statement and metadata
+  - name: submit_proof
+    description: Submit a proof for LLM-based rubric grading
+    parameters:
+      - proof (str, required)
+  - name: get_grading_guidelines
+    description: Return the rubric/marking scheme for the current problem
+# Verifier metrics emitted per grading call
+metrics:
+  verifier:
+    - verifier/rollouts/success
+    - verifier/rollouts/failure
+    - verifier/failures/timeout
+    - verifier/failures/rate_limit
+    - verifier/failures/no_input
+    - verifier/failures/no_score_tag
+    - verifier/failures/all_attempts_failed
+    - verifier/failures/num_retries
+    - verifier/runtime/latency_per_request
+    - verifier/requests/count
+    - verifier/requests/latency_ms
+    - verifier/requests/timeout_count
+    - verifier/requests/error_count
+    - verifier/queue/depth
+    - verifier/cache/hit_rate
+    - verifier/workers/restart_count
+    - verifier/workers/worker_restarted
+    - verifier/workers/heartbeat_lag_ms
+    - verifier/runtime/input_tokens
+    - verifier/runtime/output_tokens
+  reward:
+    - reward/base
+    - reward/shaped
+    - reward/score_raw
+    - reward/overlong_penalty
+  episode:
+    - episode/attempt_number
+    - episode/is_correct
+    - episode/problem_type
+    - episode/dataset_source

pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-qed_math_env"
+version = "0.1.0"
+description = "Qed Math Env environment for OpenEnv"
+requires-python = ">=3.10"
+dependencies = [
+    "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
+    "datasets>=4.7.0",
+    "math-verify[antlr4_13_2]>=0.9.0",
+    "trackio>=0.19.0"
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.scripts]
+server = "qed_math_env.server.app:main"
+[tool.setuptools]
+include-package-data = true
+packages = ["qed_math_env", "qed_math_env.server"]
+package-dir = { "qed_math_env" = ".", "qed_math_env.server" = "server" }

server/Dockerfile ADDED Viewed

	@@ -0,0 +1,68 @@

+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+#
+# Build from the env directory:
+#   docker build -t qed-math-env:latest -f server/Dockerfile .
+#
+# Or from the repo root:
+#   docker build -t qed-math-env:latest -f envs/qed_math_env/server/Dockerfile envs/qed_math_env
+#
+# Run:
+#   docker run -p 8000:8000 -e OPENAI_API_KEY=$OPENAI_API_KEY qed-math-env:latest
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+ARG BUILD_MODE=in-repo
+COPY . /app/env
+WORKDIR /app/env
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+FROM ${BASE_IMAGE}
+WORKDIR /app
+COPY --from=builder /app/env/.venv /app/.venv
+COPY --from=builder /app/env /app/env
+ENV PATH="/app/.venv/bin:$PATH"
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+ENV PYTHONUNBUFFERED=1
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+CMD ["uvicorn", "qed_math_env.server.app:app", "--host", "0.0.0.0", "--port", "8000", "--ws-ping-interval", "120", "--ws-ping-timeout", "600"]

server/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""QED Math Environment server components."""
+from .qed_math_environment import QEDMathEnvironment
+__all__ = ["QEDMathEnvironment"]

uv.lock ADDED Viewed

	@@ -0,0 +1,17 @@

+version = 1
+version_hash = ""
+[[package]]
+name = "openenv-qed_math_env"
+version = "0.1.0"
+source = { git = "https://github.com/meta-pytorch/OpenEnv.git" }
+[package.dependencies]
+"datasets" = ">=4.7.0"
+"math-verify" = { extras = ["antlr4_13_2"], version = ">=0.9.0" }
+"openenv-core" = { extras = ["core"], git = "https://github.com/meta-pytorch/OpenEnv.git" }
+"trackio" = ">=0.19.0"
+[package.optional-dependencies]
+dev = ["pytest-cov>=4.0.0", "pytest>=8.0.0"]