Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +84 -0
- README.md +62 -10
- __init__.py +10 -0
- client.py +80 -0
- dataset_example.py +133 -0
- guidelines.txt +150 -0
- inference.py +282 -0
- models.py +38 -0
- openenv.yaml +25 -0
- openenv_constraint_env.egg-info/PKG-INFO +10 -0
- openenv_constraint_env.egg-info/SOURCES.txt +21 -0
- openenv_constraint_env.egg-info/dependency_links.txt +1 -0
- openenv_constraint_env.egg-info/entry_points.txt +2 -0
- openenv_constraint_env.egg-info/requires.txt +6 -0
- openenv_constraint_env.egg-info/top_level.txt +1 -0
- problem_statement.txt +64 -0
- pyproject.toml +46 -0
- server/__init__.py +11 -0
- server/app.py +126 -0
- server/constraint_env_environment.py +390 -0
- server/requirements.txt +6 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=constraint_env
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
ENV ENABLE_WEB_INTERFACE='true'
|
| 75 |
+
|
| 76 |
+
# HF Spaces uses port 7860 by default; override with PORT env var for local use
|
| 77 |
+
ENV PORT=7860
|
| 78 |
+
|
| 79 |
+
# Health check (uses the PORT variable)
|
| 80 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
|
| 81 |
+
CMD curl -f http://localhost:${PORT}/health || exit 1
|
| 82 |
+
|
| 83 |
+
# Run the FastAPI server – respects $PORT so it works on HF Spaces and locally
|
| 84 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port ${PORT:-7860}"]
|
README.md
CHANGED
|
@@ -1,10 +1,62 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Constraint
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Constraint Environment
|
| 3 |
+
emoji: 🧩
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
short_description: RL training env — natural language to constraint AST
|
| 10 |
+
base_path: /web
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Constraint Environment
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
An environment to convert to DSL from natural language
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
## Development & Testing
|
| 21 |
+
|
| 22 |
+
### Direct Environment Testing
|
| 23 |
+
|
| 24 |
+
Test the environment logic directly without starting the HTTP server:
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
# From the server directory
|
| 28 |
+
python3 server/constraint_env_environment.py
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
This verifies that:
|
| 32 |
+
- Environment resets correctly
|
| 33 |
+
- Step executes actions properly
|
| 34 |
+
- State tracking works
|
| 35 |
+
- Rewards are calculated correctly
|
| 36 |
+
|
| 37 |
+
### Running Locally
|
| 38 |
+
|
| 39 |
+
Run the server locally for development:
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
uvicorn server.app:app --reload
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
## Project Structure
|
| 46 |
+
|
| 47 |
+
```
|
| 48 |
+
constraint_env/
|
| 49 |
+
├── .dockerignore # Docker build exclusions
|
| 50 |
+
├── __init__.py # Module exports
|
| 51 |
+
├── README.md # This file
|
| 52 |
+
├── openenv.yaml # OpenEnv manifest
|
| 53 |
+
├── pyproject.toml # Project metadata and dependencies
|
| 54 |
+
├── uv.lock # Locked dependencies (generated)
|
| 55 |
+
├── client.py # ConstraintEnv client
|
| 56 |
+
├── models.py # Action and Observation models
|
| 57 |
+
└── server/
|
| 58 |
+
├── __init__.py # Server module exports
|
| 59 |
+
├── constraint_env_environment.py # Core environment logic
|
| 60 |
+
├── app.py # FastAPI application (HTTP + WebSocket endpoints)
|
| 61 |
+
└── Dockerfile # Container image definition
|
| 62 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constraint Env Environment."""
|
| 2 |
+
|
| 3 |
+
from .client import ConstraintEnv
|
| 4 |
+
from .models import ConstraintAction, ConstraintObservation
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"ConstraintAction",
|
| 8 |
+
"ConstraintObservation",
|
| 9 |
+
"ConstraintEnv",
|
| 10 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Constraint Env Environment Client."""
|
| 8 |
+
|
| 9 |
+
from typing import Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
from .models import ConstraintAction, ConstraintObservation, ConstraintState
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ConstraintEnv(
|
| 19 |
+
EnvClient[ConstraintAction, ConstraintObservation, ConstraintState]
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Client for the Constraint Env Environment.
|
| 23 |
+
|
| 24 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
+
enabling efficient multi-step interactions with lower latency.
|
| 26 |
+
Each client instance has its own dedicated environment session on the server.
|
| 27 |
+
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def _step_payload(self, action: ConstraintAction) -> Dict:
|
| 31 |
+
"""
|
| 32 |
+
Convert ConstraintAction to JSON payload for step message.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
action: ConstraintAction instance
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Dictionary representation suitable for JSON encoding
|
| 39 |
+
"""
|
| 40 |
+
return {
|
| 41 |
+
"ast_output": action.ast_output,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
def _parse_result(self, payload: Dict) -> StepResult[ConstraintObservation]:
|
| 45 |
+
"""
|
| 46 |
+
Parse server response into StepResult[ConstraintObservation].
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
payload: JSON response data from server
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
StepResult with ConstraintObservation
|
| 53 |
+
"""
|
| 54 |
+
obs_data = payload.get("observation", {})
|
| 55 |
+
observation = ConstraintObservation(
|
| 56 |
+
prompt=obs_data.get("prompt", ""),
|
| 57 |
+
info=obs_data.get("info", 0),
|
| 58 |
+
done=payload.get("done", False),
|
| 59 |
+
reward=payload.get("reward"),
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
return StepResult(
|
| 63 |
+
observation=observation,
|
| 64 |
+
reward=payload.get("reward"),
|
| 65 |
+
done=payload.get("done", False),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
def _parse_state(self, payload: Dict) -> ConstraintState:
|
| 69 |
+
"""
|
| 70 |
+
Parse server response into State object.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
payload: JSON response from state request
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
State object with episode_id and step_count
|
| 77 |
+
"""
|
| 78 |
+
return ConstraintState(
|
| 79 |
+
episode_id=payload.get("episode_id"),
|
| 80 |
+
)
|
dataset_example.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Dataset for the Constraint Environment.
|
| 9 |
+
|
| 10 |
+
Three difficulty tiers (easy / medium / hard) with increasing structural
|
| 11 |
+
complexity for translating natural-language scheduling constraints into a
|
| 12 |
+
JSON-based AST DSL.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
dataset = {
|
| 16 |
+
# ------------------------------------------------------------------
|
| 17 |
+
# EASY – single quantifier, no WHERE clause, direct assert
|
| 18 |
+
# ------------------------------------------------------------------
|
| 19 |
+
"easy": [
|
| 20 |
+
{
|
| 21 |
+
"prompt": (
|
| 22 |
+
"No classes should be scheduled on Saturday."
|
| 23 |
+
),
|
| 24 |
+
"target_ast": {
|
| 25 |
+
"type": "hard",
|
| 26 |
+
"name": "no_saturday_classes",
|
| 27 |
+
"forall": [
|
| 28 |
+
{"var": "b", "domain": "branches"},
|
| 29 |
+
{"var": "sub", "domain": "subjects"},
|
| 30 |
+
{"var": "d", "domain": "days"},
|
| 31 |
+
{"var": "s", "domain": "slots"},
|
| 32 |
+
],
|
| 33 |
+
"where": "d == 5",
|
| 34 |
+
"assert": "schedule(b, sub, d, s) == 0",
|
| 35 |
+
},
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"prompt": (
|
| 39 |
+
"Every teacher must teach at least one subject."
|
| 40 |
+
),
|
| 41 |
+
"target_ast": {
|
| 42 |
+
"type": "soft",
|
| 43 |
+
"name": "teacher_teaches_one",
|
| 44 |
+
"forall": [
|
| 45 |
+
{"var": "t", "domain": "teachers"},
|
| 46 |
+
],
|
| 47 |
+
"assert": "SUM(teaches(t, sub)) >= 1",
|
| 48 |
+
},
|
| 49 |
+
},
|
| 50 |
+
],
|
| 51 |
+
|
| 52 |
+
# ------------------------------------------------------------------
|
| 53 |
+
# MEDIUM – two quantifiers, WHERE clause, combined assert
|
| 54 |
+
# ------------------------------------------------------------------
|
| 55 |
+
"medium": [
|
| 56 |
+
{
|
| 57 |
+
"prompt": (
|
| 58 |
+
"No non-online classes should be scheduled on Saturday."
|
| 59 |
+
),
|
| 60 |
+
"target_ast": {
|
| 61 |
+
"type": "hard",
|
| 62 |
+
"name": "no_non_online_saturday",
|
| 63 |
+
"forall": [
|
| 64 |
+
{"var": "b", "domain": "branches"},
|
| 65 |
+
{"var": "sub", "domain": "subjects"},
|
| 66 |
+
{"var": "d", "domain": "days"},
|
| 67 |
+
{"var": "s", "domain": "slots"},
|
| 68 |
+
],
|
| 69 |
+
"where": "subject_type(b, sub) != 'online' AND d == 5",
|
| 70 |
+
"assert": "schedule(b, sub, d, s) == 0",
|
| 71 |
+
},
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"prompt": (
|
| 75 |
+
"A teacher cannot be assigned to two different slots at the same time."
|
| 76 |
+
),
|
| 77 |
+
"target_ast": {
|
| 78 |
+
"type": "hard",
|
| 79 |
+
"name": "no_teacher_time_conflict",
|
| 80 |
+
"forall": [
|
| 81 |
+
{"var": "t", "domain": "teachers"},
|
| 82 |
+
{"var": "d", "domain": "days"},
|
| 83 |
+
{"var": "s", "domain": "slots"},
|
| 84 |
+
],
|
| 85 |
+
"assert": "COUNT(occupies_teacher(t, d, s)) <= 1",
|
| 86 |
+
},
|
| 87 |
+
},
|
| 88 |
+
],
|
| 89 |
+
|
| 90 |
+
# ------------------------------------------------------------------
|
| 91 |
+
# HARD – multiple quantifiers, nested WHERE + AND/OR, minimize
|
| 92 |
+
# ------------------------------------------------------------------
|
| 93 |
+
"hard": [
|
| 94 |
+
{
|
| 95 |
+
"prompt": (
|
| 96 |
+
"Minimize the number of occupied slots for each branch on any given day, "
|
| 97 |
+
"but only for subjects that are labelled as practical."
|
| 98 |
+
),
|
| 99 |
+
"target_ast": {
|
| 100 |
+
"type": "soft",
|
| 101 |
+
"name": "minimize_practical_slots",
|
| 102 |
+
"forall": [
|
| 103 |
+
{"var": "b", "domain": "branches"},
|
| 104 |
+
{"var": "sub", "domain": "subjects"},
|
| 105 |
+
{"var": "d", "domain": "days"},
|
| 106 |
+
{"var": "s", "domain": "slots"},
|
| 107 |
+
],
|
| 108 |
+
"where": "subject_type(b, sub) == 'practical'",
|
| 109 |
+
"minimize": "SUM(occupies(b, sub, d, s))",
|
| 110 |
+
},
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"prompt": (
|
| 114 |
+
"No branch may have more than four scheduled slots in a single day, "
|
| 115 |
+
"unless a subject is online, in which case it may occupy one extra slot."
|
| 116 |
+
),
|
| 117 |
+
"target_ast": {
|
| 118 |
+
"type": "hard",
|
| 119 |
+
"name": "daily_slot_cap",
|
| 120 |
+
"forall": [
|
| 121 |
+
{"var": "b", "domain": "branches"},
|
| 122 |
+
{"var": "d", "domain": "days"},
|
| 123 |
+
{"var": "sub", "domain": "subjects"},
|
| 124 |
+
{"var": "s", "domain": "slots"},
|
| 125 |
+
],
|
| 126 |
+
"assert": (
|
| 127 |
+
"SUM(schedule(b, sub, d, s)) <= 4 OR "
|
| 128 |
+
"(subject_type(b, sub) == 'online' AND SUM(schedule(b, sub, d, s)) <= 5)"
|
| 129 |
+
),
|
| 130 |
+
},
|
| 131 |
+
},
|
| 132 |
+
],
|
| 133 |
+
}
|
guidelines.txt
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
🚀 Hackathon Submission Guidelines (OpenEnv RL Challenge)
|
| 2 |
+
1. Project Structure
|
| 3 |
+
* Your inference script must be named inference.py
|
| 4 |
+
* It must be located in the root directory of your project
|
| 5 |
+
|
| 6 |
+
________________
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
2. LLM Usage Requirements
|
| 10 |
+
* You must use the OpenAI Client for all LLM calls
|
| 11 |
+
* Do not use alternative SDKs or direct HTTP calls
|
| 12 |
+
|
| 13 |
+
________________
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
3. Required Environment Variables
|
| 17 |
+
Your inference.py must read the following environment variables:
|
| 18 |
+
* API_BASE_URL
|
| 19 |
+
* Description: API endpoint for the LLM
|
| 20 |
+
* Requirement: Must include a default value
|
| 21 |
+
* MODEL_NAME
|
| 22 |
+
* Description: Model identifier used for inference
|
| 23 |
+
* Requirement: Must include a default value
|
| 24 |
+
* HF_TOKEN
|
| 25 |
+
* Description: Hugging Face API token
|
| 26 |
+
* Requirement: Mandatory (no default required)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
4. INFERENCE OUTPUT FORMAT
|
| 30 |
+
The script must emit exactly three line types to stdout, in this order:
|
| 31 |
+
[START] task=<task_name> env=<benchmark> model=<model_name>
|
| 32 |
+
[STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
|
| 33 |
+
[END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
Rules:
|
| 41 |
+
- One [START] line at episode begin.
|
| 42 |
+
- One [STEP] line per step, immediately after env.step() returns.
|
| 43 |
+
- One [END] line after env.close(), always emitted (even on exception).
|
| 44 |
+
- reward and rewards are formatted to 2 decimal places.
|
| 45 |
+
- done and success are lowercase booleans: true or false.
|
| 46 |
+
- error is the raw last_action_error string, or null if none.
|
| 47 |
+
- All fields on a single line with no newlines within a line.
|
| 48 |
+
Example:
|
| 49 |
+
[START] task=click-test env=miniwob model=Qwen3-VL-30B
|
| 50 |
+
[STEP] step=1 action=click('123') reward=0.00 done=false error=null
|
| 51 |
+
[STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
|
| 52 |
+
[STEP] step=3 action=click('789') reward=1.00 done=true error=null
|
| 53 |
+
[END] success=true steps=3 rewards=0.00,0.00,1.00
|
| 54 |
+
✅ Example (inference.py)
|
| 55 |
+
import os
|
| 56 |
+
from openai import OpenAI
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# Read environment variables with defaults where required
|
| 60 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
|
| 61 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
|
| 62 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if HF_TOKEN is None:
|
| 66 |
+
raise ValueError("HF_TOKEN environment variable is required")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# Initialize OpenAI client
|
| 70 |
+
client = OpenAI(
|
| 71 |
+
base_url=API_BASE_URL,
|
| 72 |
+
api_key=HF_TOKEN
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def run_inference(prompt: str):
|
| 77 |
+
response = client.chat.completions.create(
|
| 78 |
+
model=MODEL_NAME,
|
| 79 |
+
messages=[
|
| 80 |
+
{"role": "user", "content": prompt}
|
| 81 |
+
]
|
| 82 |
+
)
|
| 83 |
+
response = response.choices[0].message.content
|
| 84 |
+
# Print output based on above given format
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
print(run_inference("Hello from OpenEnv!"))
|
| 91 |
+
|
| 92 |
+
________________
|
| 93 |
+
4. Hugging Face Space Guidelines
|
| 94 |
+
* Building a Hugging Face Space can take significant time, especially if multiple spaces are active
|
| 95 |
+
* To avoid delays:
|
| 96 |
+
* Turn off all unnecessary spaces
|
| 97 |
+
* Keep only your primary submission space running
|
| 98 |
+
________________
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
5. Submission Validation Rules
|
| 102 |
+
* The system will check if your Hugging Face Space is live
|
| 103 |
+
* If your space is not in a running state, your submission will fail automatically
|
| 104 |
+
Before submitting:
|
| 105 |
+
* Ensure your space is fully built
|
| 106 |
+
* Confirm it is in the “Running” state
|
| 107 |
+
________________
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
6. Hardware Requirements
|
| 111 |
+
* Your solution will be executed inside a Docker container with limited resources
|
| 112 |
+
* It must run within the following constraints:
|
| 113 |
+
* 2 vCPU
|
| 114 |
+
* 8 GB RAM
|
| 115 |
+
👉 Ensure your model, dependencies, and runtime fit within these limits. Submissions exceeding these constraints may fail during evaluation.
|
| 116 |
+
________________
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
6. Resubmissions
|
| 120 |
+
* You are allowed to resubmit your project multiple times
|
| 121 |
+
* If your submission fails validation, you can:
|
| 122 |
+
* Fix the issues
|
| 123 |
+
* Ensure your Hugging Face Space is running
|
| 124 |
+
* Submit again
|
| 125 |
+
👉 There is no penalty for resubmitting, so iterate until your submission passes all checks.
|
| 126 |
+
________________
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
⚠️ Common Failure Cases (Avoid These)
|
| 130 |
+
* inference.py not in root directory
|
| 131 |
+
* Missing default values for API_BASE_URL or MODEL_NAME
|
| 132 |
+
* Missing HF_TOKEN
|
| 133 |
+
* Hugging Face Space still building during submission
|
| 134 |
+
* Space stopped due to multiple active deployments
|
| 135 |
+
________________
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
🚀 Reference projects to guide you
|
| 139 |
+
Here are some strong examples from the San Francisco edition to help you understand how to structure your environment:
|
| 140 |
+
* Calendar Environment Server
|
| 141 |
+
https://github.com/meta-pytorch/OpenEnv/tree/main/envs/calendar_env
|
| 142 |
+
* Reasoning Gym Environment Server
|
| 143 |
+
https://github.com/meta-pytorch/OpenEnv/tree/main/envs/reasoning_gym_env
|
| 144 |
+
* TB2 Environment Server
|
| 145 |
+
https://github.com/meta-pytorch/OpenEnv/tree/main/envs/tbench2_env
|
| 146 |
+
* CARLA Environment Server
|
| 147 |
+
https://github.com/meta-pytorch/OpenEnv/tree/main/envs/carla_env
|
| 148 |
+
* REPL Environment Server
|
| 149 |
+
https://github.com/meta-pytorch/OpenEnv/tree/main/envs/repl_env
|
| 150 |
+
Use these as direction, not as templates. Focus on understanding structure and approach.
|
inference.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
inference.py – Baseline evaluator for the Constraint Environment.
|
| 3 |
+
|
| 4 |
+
Reads environment variables:
|
| 5 |
+
API_BASE_URL – LLM endpoint (default: https://openrouter.ai/api/v1)
|
| 6 |
+
MODEL_NAME – Model id (default: openai/gpt-oss-120b)
|
| 7 |
+
HF_TOKEN – API key (REQUIRED – raises ValueError if missing)
|
| 8 |
+
|
| 9 |
+
Output format (stdout only):
|
| 10 |
+
[START] task=<task_name> env=constraint_env model=<model_name>
|
| 11 |
+
[STEP] step=<n> action=<ast_json> reward=<0.00> done=<true|false> error=<msg|null>
|
| 12 |
+
[END] success=<true|false> steps=<n> rewards=<r1,r2,...>
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
import json
|
| 18 |
+
import asyncio
|
| 19 |
+
|
| 20 |
+
from openai import OpenAI
|
| 21 |
+
import dotenv
|
| 22 |
+
dotenv.load_dotenv()
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Environment variables
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
API_BASE_URL: str = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
|
| 28 |
+
MODEL_NAME: str = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
|
| 29 |
+
HF_TOKEN: str | None = os.getenv("HF_TOKEN")
|
| 30 |
+
|
| 31 |
+
if HF_TOKEN is None:
|
| 32 |
+
raise ValueError(
|
| 33 |
+
"HF_TOKEN environment variable is required. "
|
| 34 |
+
"Export it before running:\n"
|
| 35 |
+
' $env:HF_TOKEN="sk-or-v1-..." (PowerShell)\n'
|
| 36 |
+
' export HF_TOKEN="sk-or-v1-..." (bash)'
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# ---------------------------------------------------------------------------
|
| 40 |
+
# OpenAI client (routed through OpenRouter by default)
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
|
| 43 |
+
_llm = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
|
| 44 |
+
|
| 45 |
+
# ---------------------------------------------------------------------------
|
| 46 |
+
# System prompt shared across all tasks
|
| 47 |
+
# ---------------------------------------------------------------------------
|
| 48 |
+
|
| 49 |
+
_SYSTEM_PROMPT = """\
|
| 50 |
+
You are an expert constraint compiler for a university scheduling system.
|
| 51 |
+
Your job is to convert a natural-language scheduling constraint into a JSON AST \
|
| 52 |
+
(Abstract Syntax Tree) that precisely captures the logical structure of the rule.
|
| 53 |
+
|
| 54 |
+
════════════════════════════════════════
|
| 55 |
+
SCHEMA
|
| 56 |
+
════════════════════════════════════════
|
| 57 |
+
{
|
| 58 |
+
"type": "hard" | "soft", // hard = must not be violated; soft = penalised if violated
|
| 59 |
+
"name": "<snake_case_identifier>",
|
| 60 |
+
"forall": [ // quantifiers – declare every variable you use
|
| 61 |
+
{"var": "<letter>", "domain": "<domain>"},
|
| 62 |
+
...
|
| 63 |
+
],
|
| 64 |
+
"where": "<expression>", // OPTIONAL filter / guard condition
|
| 65 |
+
"assert": "<expression>" // the constraint body (use XOR with "minimize")
|
| 66 |
+
// OR
|
| 67 |
+
"minimize": "<expression>" // objective to minimise (use XOR with "assert")
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
════════════════════════════════════════
|
| 71 |
+
VALID DOMAINS
|
| 72 |
+
════════════════════════════════════════
|
| 73 |
+
teachers · subjects · branches · days · slots
|
| 74 |
+
|
| 75 |
+
════════════════════════════════════════
|
| 76 |
+
VALID FUNCTIONS (arity shown)
|
| 77 |
+
════════════════════════════════════════
|
| 78 |
+
subject_type(branch, subject) → 2 args returns 'online'|'practical'|...
|
| 79 |
+
schedule(branch, subject, day, slot) → 4 args returns 0 or 1
|
| 80 |
+
occupies(branch, subject, day, slot) → 4 args returns 0 or 1
|
| 81 |
+
occupies_teacher(teacher, day, slot) → 3 args returns 0 or 1
|
| 82 |
+
teaches(teacher, subject) → 2 args returns 0 or 1
|
| 83 |
+
SUM(expr) → 1 arg aggregate sum
|
| 84 |
+
COUNT(expr) → 1 arg aggregate count
|
| 85 |
+
|
| 86 |
+
════════════════════════════════════════
|
| 87 |
+
OPERATORS
|
| 88 |
+
════════════════════════════════════════
|
| 89 |
+
Logical : AND OR NOT IN
|
| 90 |
+
Compare : == != <= >= < >
|
| 91 |
+
|
| 92 |
+
Days are 0-indexed integers (Monday = 0 … Saturday = 5).
|
| 93 |
+
|
| 94 |
+
════════════════════════════════════════
|
| 95 |
+
RULES
|
| 96 |
+
════════════════════════════════════════
|
| 97 |
+
• Every variable referenced in "where", "assert", or "minimize" MUST be declared in "forall".
|
| 98 |
+
• "where" acts as a guard – only pairs/tuples satisfying it are considered.
|
| 99 |
+
• Use "hard" for absolute rules and "soft" for optimisation objectives.
|
| 100 |
+
• Nested calls like SUM(schedule(b, sub, d, s)) are allowed.
|
| 101 |
+
|
| 102 |
+
════════════════════════════════════════
|
| 103 |
+
EXAMPLE
|
| 104 |
+
═��══════════════════════════════════════
|
| 105 |
+
INPUT:
|
| 106 |
+
"There should not be any classes on saturday, except online classes."
|
| 107 |
+
|
| 108 |
+
OUTPUT:
|
| 109 |
+
{
|
| 110 |
+
"type": "hard",
|
| 111 |
+
"name": "no_non_online_saturday",
|
| 112 |
+
"forall": [
|
| 113 |
+
{"var": "b", "domain": "branches"},
|
| 114 |
+
{"var": "sub", "domain": "subjects"},
|
| 115 |
+
{"var": "d", "domain": "days"},
|
| 116 |
+
{"var": "s", "domain": "slots"}
|
| 117 |
+
],
|
| 118 |
+
"where": "subject_type(b, sub) != 'online' AND d == 5",
|
| 119 |
+
"assert": "schedule(b, sub, d, s) == 0"
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
EXPLANATION:
|
| 123 |
+
• type="hard" – absolute rule, no violation allowed.
|
| 124 |
+
• forall – all four variables appear in the expressions, so all four are declared.
|
| 125 |
+
• where – guard: only consider rows where the subject is NOT online AND the day is Saturday (index 5).
|
| 126 |
+
• assert – for every qualifying (b, sub, d, s) tuple, the schedule slot must be 0 (i.e., no class).
|
| 127 |
+
|
| 128 |
+
════════════════════════════════════════
|
| 129 |
+
Return ONLY the raw JSON object.
|
| 130 |
+
Do NOT wrap in markdown fences. Do NOT add any explanation outside the JSON.
|
| 131 |
+
Every key/value must appear on a single line in the final output if possible.
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
# ---------------------------------------------------------------------------
|
| 136 |
+
# LLM call
|
| 137 |
+
# ---------------------------------------------------------------------------
|
| 138 |
+
|
| 139 |
+
def _call_llm(prompt: str) -> str:
|
| 140 |
+
"""Call the LLM and return the raw text content."""
|
| 141 |
+
response = _llm.chat.completions.create(
|
| 142 |
+
model=MODEL_NAME,
|
| 143 |
+
messages=[
|
| 144 |
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 145 |
+
{"role": "user", "content": prompt},
|
| 146 |
+
],
|
| 147 |
+
temperature=0.0,
|
| 148 |
+
)
|
| 149 |
+
return response.choices[0].message.content or ""
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# ---------------------------------------------------------------------------
|
| 153 |
+
# Single-task evaluation (synchronous, one step per task)
|
| 154 |
+
# ---------------------------------------------------------------------------
|
| 155 |
+
|
| 156 |
+
def _run_task(task_id: str, env_url: str = "http://localhost:8000") -> None:
|
| 157 |
+
"""
|
| 158 |
+
Run one episode for the given difficulty task.
|
| 159 |
+
|
| 160 |
+
Uses the ConstraintEnv WebSocket client to interact with the live server,
|
| 161 |
+
then prints [START], one [STEP], and [END] to stdout.
|
| 162 |
+
"""
|
| 163 |
+
try:
|
| 164 |
+
from client import ConstraintEnv
|
| 165 |
+
except ImportError:
|
| 166 |
+
from constraint_env.client import ConstraintEnv # type: ignore
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
from models import ConstraintAction
|
| 170 |
+
except ImportError:
|
| 171 |
+
from constraint_env.models import ConstraintAction # type: ignore
|
| 172 |
+
|
| 173 |
+
rewards: list[float] = []
|
| 174 |
+
step_count = 0
|
| 175 |
+
last_error = None
|
| 176 |
+
success = False
|
| 177 |
+
|
| 178 |
+
# ------------------------------------------------------------------
|
| 179 |
+
async def _run():
|
| 180 |
+
nonlocal step_count, last_error, success
|
| 181 |
+
|
| 182 |
+
async with ConstraintEnv(
|
| 183 |
+
env_url,
|
| 184 |
+
connect_timeout_s=30.0,
|
| 185 |
+
message_timeout_s=120.0,
|
| 186 |
+
) as env:
|
| 187 |
+
# Reset with the requested difficulty
|
| 188 |
+
obs = await env.reset(task_id=task_id)
|
| 189 |
+
|
| 190 |
+
_emit_start(task_id)
|
| 191 |
+
|
| 192 |
+
prompt_text = obs.observation.prompt
|
| 193 |
+
|
| 194 |
+
# Generate AST from LLM
|
| 195 |
+
raw_output = _call_llm(prompt_text)
|
| 196 |
+
|
| 197 |
+
# Sanitise: strip markdown fences if present
|
| 198 |
+
raw_output = raw_output.strip()
|
| 199 |
+
if raw_output.startswith("```"):
|
| 200 |
+
raw_output = raw_output.split("\n", 1)[-1]
|
| 201 |
+
raw_output = raw_output.rsplit("```", 1)[0].strip()
|
| 202 |
+
|
| 203 |
+
# Compact the JSON to a single line (rule: no newlines inside [STEP])
|
| 204 |
+
try:
|
| 205 |
+
action_str = json.dumps(json.loads(raw_output), separators=(",", ":"))
|
| 206 |
+
except (json.JSONDecodeError, TypeError):
|
| 207 |
+
action_str = raw_output.replace("\n", " ").replace("\r", "")
|
| 208 |
+
|
| 209 |
+
action = ConstraintAction(ast_output=action_str)
|
| 210 |
+
step_result = await env.step(action)
|
| 211 |
+
|
| 212 |
+
step_count = 1
|
| 213 |
+
reward = float(step_result.reward or 0.0)
|
| 214 |
+
done = bool(step_result.done)
|
| 215 |
+
last_error = step_result.observation.info.get("error") if step_result.observation else None
|
| 216 |
+
rewards.append(reward)
|
| 217 |
+
|
| 218 |
+
_emit_step(step_count, action_str, reward, done, last_error)
|
| 219 |
+
|
| 220 |
+
success = (reward >= (_W_JSON + _W_STRUCTURE)) if done else False
|
| 221 |
+
|
| 222 |
+
asyncio.run(_run())
|
| 223 |
+
_emit_end(success, step_count, rewards)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# ---------------------------------------------------------------------------
|
| 227 |
+
# Reward weights (mirrored from environment for success threshold)
|
| 228 |
+
# ---------------------------------------------------------------------------
|
| 229 |
+
|
| 230 |
+
_W_JSON = 1 / 8
|
| 231 |
+
_W_STRUCTURE = 2 / 8
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# ---------------------------------------------------------------------------
|
| 235 |
+
# Stdout emitters – the ONLY things that print to stdout
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
|
| 238 |
+
def _emit_start(task_id: str) -> None:
|
| 239 |
+
print(f"[START] task={task_id} env=constraint_env model={MODEL_NAME}", flush=True)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def _emit_step(
|
| 243 |
+
step: int,
|
| 244 |
+
action: str,
|
| 245 |
+
reward: float,
|
| 246 |
+
done: bool,
|
| 247 |
+
error: str | None,
|
| 248 |
+
) -> None:
|
| 249 |
+
done_str = "true" if done else "false"
|
| 250 |
+
error_str = error if error else "null"
|
| 251 |
+
# Ensure action has no newlines (single line rule)
|
| 252 |
+
action_safe = action.replace("\n", " ").replace("\r", "")
|
| 253 |
+
print(
|
| 254 |
+
f"[STEP] step={step} action={action_safe} "
|
| 255 |
+
f"reward={reward:.2f} done={done_str} error={error_str}",
|
| 256 |
+
flush=True,
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _emit_end(success: bool, steps: int, rewards: list[float]) -> None:
|
| 261 |
+
success_str = "true" if success else "false"
|
| 262 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.00"
|
| 263 |
+
print(
|
| 264 |
+
f"[END] success={success_str} steps={steps} rewards={rewards_str}",
|
| 265 |
+
flush=True,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
# ---------------------------------------------------------------------------
|
| 270 |
+
# Entry point
|
| 271 |
+
# ---------------------------------------------------------------------------
|
| 272 |
+
|
| 273 |
+
if __name__ == "__main__":
|
| 274 |
+
SERVER_URL = os.getenv("ENV_SERVER_URL", "http://localhost:8000")
|
| 275 |
+
|
| 276 |
+
for difficulty in ("easy", "medium", "hard"):
|
| 277 |
+
try:
|
| 278 |
+
_run_task(task_id=difficulty, env_url=SERVER_URL)
|
| 279 |
+
except Exception as exc:
|
| 280 |
+
# Guarantee [END] is always emitted even on failure
|
| 281 |
+
_emit_end(success=False, steps=0, rewards=[])
|
| 282 |
+
print(f"# ERROR during {difficulty}: {exc}", file=sys.stderr, flush=True)
|
models.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Constraint Env Environment.
|
| 9 |
+
|
| 10 |
+
The constraint_env environment is a simple test environment that echoes back messages.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 14 |
+
from typing import Dict, Any, Optional
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class ConstraintAction(Action):
|
| 18 |
+
"""Output as AST by LLM."""
|
| 19 |
+
|
| 20 |
+
ast_output: str
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class ConstraintObservation(Observation):
|
| 24 |
+
"""Observation from the environment, user prompt and rewards"""
|
| 25 |
+
|
| 26 |
+
prompt: str
|
| 27 |
+
done: bool
|
| 28 |
+
reward: float
|
| 29 |
+
info: Dict[str, Any]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class ConstraintState(State):
|
| 33 |
+
"""Current state from the environment"""
|
| 34 |
+
episode_id: Optional[str] = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
openenv.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: constraint_env
|
| 3 |
+
description: >
|
| 4 |
+
Natural-language to constraint-AST translation environment for RL training.
|
| 5 |
+
Agents translate scheduling constraints written in plain English into a
|
| 6 |
+
structured JSON DSL (Abstract Syntax Tree).
|
| 7 |
+
type: space
|
| 8 |
+
runtime: fastapi
|
| 9 |
+
app: server.app:app
|
| 10 |
+
port: 8000
|
| 11 |
+
tasks:
|
| 12 |
+
- id: easy
|
| 13 |
+
description: Single quantifier, direct assert, no WHERE clause
|
| 14 |
+
difficulty: easy
|
| 15 |
+
- id: medium
|
| 16 |
+
description: Two quantifiers with a WHERE filter clause and combined assert
|
| 17 |
+
difficulty: medium
|
| 18 |
+
- id: hard
|
| 19 |
+
description: Multiple quantifiers, nested WHERE with AND/OR, minimize objective
|
| 20 |
+
difficulty: hard
|
| 21 |
+
tags:
|
| 22 |
+
- openenv
|
| 23 |
+
- scheduling
|
| 24 |
+
- nlp-to-dsl
|
| 25 |
+
- constraint-satisfaction
|
openenv_constraint_env.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-constraint_env
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Constraint Env environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: dotenv>=0.9.9
|
| 7 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 8 |
+
Provides-Extra: dev
|
| 9 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 10 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_constraint_env.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
__init__.py
|
| 3 |
+
client.py
|
| 4 |
+
dataset_example.py
|
| 5 |
+
inference.py
|
| 6 |
+
models.py
|
| 7 |
+
pyproject.toml
|
| 8 |
+
./__init__.py
|
| 9 |
+
./client.py
|
| 10 |
+
./dataset_example.py
|
| 11 |
+
./inference.py
|
| 12 |
+
./models.py
|
| 13 |
+
openenv_constraint_env.egg-info/PKG-INFO
|
| 14 |
+
openenv_constraint_env.egg-info/SOURCES.txt
|
| 15 |
+
openenv_constraint_env.egg-info/dependency_links.txt
|
| 16 |
+
openenv_constraint_env.egg-info/entry_points.txt
|
| 17 |
+
openenv_constraint_env.egg-info/requires.txt
|
| 18 |
+
openenv_constraint_env.egg-info/top_level.txt
|
| 19 |
+
server/__init__.py
|
| 20 |
+
server/app.py
|
| 21 |
+
server/constraint_env_environment.py
|
openenv_constraint_env.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_constraint_env.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = constraint_env.server.app:main
|
openenv_constraint_env.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dotenv>=0.9.9
|
| 2 |
+
openenv-core[core]>=0.2.2
|
| 3 |
+
|
| 4 |
+
[dev]
|
| 5 |
+
pytest>=8.0.0
|
| 6 |
+
pytest-cov>=4.0.0
|
openenv_constraint_env.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
constraint_env
|
problem_statement.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Functional Requirements
|
| 2 |
+
1. Real-World Task Simulation
|
| 3 |
+
The environment must represent tasks that humans actually perform in real settings—no games or toy problems.
|
| 4 |
+
Examples include email triage, code review, data cleaning, scheduling, customer support, and content moderation.
|
| 5 |
+
________________
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
2. OpenEnv Specification Compliance
|
| 9 |
+
The environment must fully implement the OpenEnv interface, including:
|
| 10 |
+
* Typed Observation, Action, and Reward models using Pydantic
|
| 11 |
+
* step(action) → returns (observation, reward, done, info)
|
| 12 |
+
* reset() → returns the initial observation
|
| 13 |
+
* state() → returns the current state
|
| 14 |
+
* An openenv.yaml file containing metadata
|
| 15 |
+
The implementation must successfully pass validation via openenv validate.
|
| 16 |
+
________________
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
3. Minimum of Three Tasks with Agent Graders
|
| 20 |
+
* Provide at least three tasks, each with a clearly defined objective
|
| 21 |
+
* Tasks should span increasing difficulty: easy → medium → hard
|
| 22 |
+
* Each task must include a programmatic grader that assigns a score between 0.0 and 1.0
|
| 23 |
+
* Grading criteria must be clear, deterministic, and reproducible
|
| 24 |
+
________________
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
4. Meaningful Reward Function
|
| 28 |
+
* The reward function must provide feedback throughout the task trajectory, not just at completion
|
| 29 |
+
* It should reward incremental progress toward the objective
|
| 30 |
+
* It must penalize undesirable behaviors such as infinite loops or destructive actions
|
| 31 |
+
________________
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
5. Baseline Inference Script
|
| 35 |
+
* Include an inference script that uses the OpenAI API client to evaluate a model within the environment
|
| 36 |
+
* API credentials must be read from environment variables (HF_TOKEN)
|
| 37 |
+
* The script should produce a reproducible baseline score across all tasks
|
| 38 |
+
________________
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
Non-Functional Requirements
|
| 42 |
+
1. Deployment on Hugging Face Spaces
|
| 43 |
+
* The environment must be deployable as a containerized Hugging Face Space
|
| 44 |
+
* It should be tagged with openenv
|
| 45 |
+
________________
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
2. Containerized Execution
|
| 49 |
+
* Provide a working Dockerfile
|
| 50 |
+
* The environment must build and run successfully using:
|
| 51 |
+
* docker build
|
| 52 |
+
* docker run
|
| 53 |
+
________________
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
3. Documentation
|
| 57 |
+
The README must include:
|
| 58 |
+
* Environment overview and motivation
|
| 59 |
+
* Definitions of action and observation spaces
|
| 60 |
+
* Task descriptions with expected difficulty levels
|
| 61 |
+
* Setup and usage instructions
|
| 62 |
+
* Baseline performance scores
|
| 63 |
+
|
| 64 |
+
Additional Guideline: Meta OpenEnv Hackathon: Guidelines
|
pyproject.toml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-constraint_env"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Constraint Env environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
"dotenv>=0.9.9",
|
| 18 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 19 |
+
# install from github
|
| 20 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 21 |
+
"openenv-core[core]>=0.2.2",
|
| 22 |
+
# Environment-specific dependencies
|
| 23 |
+
# Add all dependencies needed for your environment here
|
| 24 |
+
# Examples:
|
| 25 |
+
# "numpy>=1.19.0",
|
| 26 |
+
# "torch>=2.0.0",
|
| 27 |
+
# "gymnasium>=0.29.0",
|
| 28 |
+
# "openspiel>=1.0.0",
|
| 29 |
+
# "smolagents>=1.22.0,<2",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
[project.optional-dependencies]
|
| 33 |
+
dev = [
|
| 34 |
+
"pytest>=8.0.0",
|
| 35 |
+
"pytest-cov>=4.0.0",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
[project.scripts]
|
| 39 |
+
# Server entry point - enables running via: uv run --project . server
|
| 40 |
+
# or: python -m constraint_env.server.app
|
| 41 |
+
server = "constraint_env.server.app:main"
|
| 42 |
+
|
| 43 |
+
[tool.setuptools]
|
| 44 |
+
include-package-data = true
|
| 45 |
+
packages = ["constraint_env", "constraint_env.server"]
|
| 46 |
+
package-dir = { "constraint_env" = ".", "constraint_env.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Constraint Env environment server components."""
|
| 8 |
+
|
| 9 |
+
from .constraint_env_environment import ConstraintEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["ConstraintEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Constraint Env Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the ConstraintEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from openenv.core.env_server.http_server import create_app
|
| 33 |
+
except Exception as e: # pragma: no cover
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
+
) from e
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from ..models import ConstraintAction, ConstraintObservation
|
| 40 |
+
from .constraint_env_environment import ConstraintEnvironment
|
| 41 |
+
except ImportError:
|
| 42 |
+
from constraint_env.models import ConstraintAction, ConstraintObservation
|
| 43 |
+
from constraint_env.server.constraint_env_environment import ConstraintEnvironment
|
| 44 |
+
|
| 45 |
+
# Load the dataset so the environment can be initialised without crashing.
|
| 46 |
+
try:
|
| 47 |
+
from dataset_example import dataset as _DATASET
|
| 48 |
+
except ImportError:
|
| 49 |
+
from constraint_env.dataset_example import dataset as _DATASET # type: ignore
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _make_env():
|
| 53 |
+
"""Factory that passes the pre-loaded dataset into ConstraintEnvironment."""
|
| 54 |
+
return ConstraintEnvironment(dataset=_DATASET)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Create the app – pass the factory so create_app calls _make_env() per session.
|
| 58 |
+
app = create_app(
|
| 59 |
+
_make_env,
|
| 60 |
+
ConstraintAction,
|
| 61 |
+
ConstraintObservation,
|
| 62 |
+
env_name="constraint_env",
|
| 63 |
+
max_concurrent_envs=1,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# ---------------------------------------------------------------------------
|
| 68 |
+
# PWA manifest – browsers request this at root level for the web UI
|
| 69 |
+
# ---------------------------------------------------------------------------
|
| 70 |
+
|
| 71 |
+
from fastapi.responses import JSONResponse # noqa: E402
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@app.get("/manifest.json", include_in_schema=False)
|
| 75 |
+
async def web_manifest():
|
| 76 |
+
return JSONResponse(
|
| 77 |
+
content={
|
| 78 |
+
"name": "Constraint Environment",
|
| 79 |
+
"short_name": "ConstraintEnv",
|
| 80 |
+
"description": "RL training environment: natural-language → constraint AST",
|
| 81 |
+
"start_url": "/web/",
|
| 82 |
+
"display": "standalone",
|
| 83 |
+
"background_color": "#1e1e2e",
|
| 84 |
+
"theme_color": "#7c3aed",
|
| 85 |
+
"icons": [
|
| 86 |
+
{
|
| 87 |
+
"src": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
|
| 88 |
+
"sizes": "any",
|
| 89 |
+
"type": "image/svg+xml",
|
| 90 |
+
}
|
| 91 |
+
],
|
| 92 |
+
},
|
| 93 |
+
headers={"Cache-Control": "public, max-age=3600"},
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def main(host: str = "localhost", port: int = 8000):
|
| 98 |
+
"""
|
| 99 |
+
Entry point for direct execution via uv run or python -m.
|
| 100 |
+
|
| 101 |
+
This function enables running the server without Docker:
|
| 102 |
+
uv run --project . server
|
| 103 |
+
uv run --project . server --port 8001
|
| 104 |
+
python -m constraint_env.server.app
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
host: Host address to bind to (default: "localhost")
|
| 108 |
+
port: Port number to listen on (default: 8000)
|
| 109 |
+
|
| 110 |
+
For production deployments, consider using uvicorn directly with
|
| 111 |
+
multiple workers:
|
| 112 |
+
uvicorn constraint_env.server.app:app --workers 4
|
| 113 |
+
"""
|
| 114 |
+
import uvicorn
|
| 115 |
+
|
| 116 |
+
uvicorn.run(app, host=host, port=port)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
import argparse
|
| 121 |
+
|
| 122 |
+
parser = argparse.ArgumentParser(description="Constraint Env FastAPI Server")
|
| 123 |
+
parser.add_argument("--host", type=str, default="localhost")
|
| 124 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 125 |
+
args = parser.parse_args()
|
| 126 |
+
main(host=args.host, port=args.port)
|
server/constraint_env_environment.py
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Constraint Env Environment Implementation.
|
| 9 |
+
|
| 10 |
+
Evaluates an LLM's ability to convert natural-language scheduling
|
| 11 |
+
constraints into a JSON AST DSL.
|
| 12 |
+
|
| 13 |
+
Reward breakdown per step:
|
| 14 |
+
+0.125 valid JSON (1 / 8 of max)
|
| 15 |
+
+0.250 correct top-level structure (2 / 8 of max)
|
| 16 |
+
+0.625 exact match with target AST (5 / 8 of max)
|
| 17 |
+
──────
|
| 18 |
+
1.000 total maximum reward
|
| 19 |
+
|
| 20 |
+
Penalties:
|
| 21 |
+
-0.250 bad_structure (structure wrong but JSON parsed)
|
| 22 |
+
-0.250 invalid_json (cannot parse at all, replaces reward=0)
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import re
|
| 26 |
+
import json
|
| 27 |
+
from uuid import uuid4
|
| 28 |
+
from typing import Any, Dict, List, Optional
|
| 29 |
+
|
| 30 |
+
from openenv.core.env_server.interfaces import Environment
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
from ..models import ConstraintAction, ConstraintObservation, ConstraintState
|
| 34 |
+
except ImportError:
|
| 35 |
+
from models import ConstraintAction, ConstraintObservation, ConstraintState
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
+
# Domain knowledge
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
|
| 42 |
+
VALID_DOMAINS = {"teachers", "subjects", "branches", "days", "slots"}
|
| 43 |
+
|
| 44 |
+
VALID_FUNCTIONS: Dict[str, int] = {
|
| 45 |
+
"subject_type": 2,
|
| 46 |
+
"schedule": 4,
|
| 47 |
+
"occupies": 4,
|
| 48 |
+
"occupies_teacher": 3,
|
| 49 |
+
"teaches": 2,
|
| 50 |
+
"SUM": 1,
|
| 51 |
+
"COUNT": 1,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Reward weights (must sum to 1.0)
|
| 55 |
+
_W_JSON = 1 / 8 # 0.125
|
| 56 |
+
_W_STRUCTURE = 2 / 8 # 0.250
|
| 57 |
+
_W_MATCH = 5 / 8 # 0.625
|
| 58 |
+
_PENALTY_BAD_STRUCTURE = -0.250
|
| 59 |
+
_PENALTY_INVALID_JSON = -0.250
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ---------------------------------------------------------------------------
|
| 63 |
+
# Environment
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
class ConstraintEnvironment(Environment):
|
| 67 |
+
"""
|
| 68 |
+
OpenEnv environment for natural-language → constraint-AST translation.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
dataset: Dict with keys "easy", "medium", "hard", each a list of
|
| 72 |
+
{"prompt": str, "target_ast": dict} entries.
|
| 73 |
+
If omitted, the built-in dataset_example is used.
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
def __init__(self, dataset: Optional[Dict[str, List[Dict]]] = None):
|
| 77 |
+
if dataset is None:
|
| 78 |
+
try:
|
| 79 |
+
from dataset_example import dataset as _ds
|
| 80 |
+
except ImportError:
|
| 81 |
+
from constraint_env.dataset_example import dataset as _ds # type: ignore
|
| 82 |
+
dataset = _ds
|
| 83 |
+
|
| 84 |
+
self._dataset = dataset
|
| 85 |
+
self._difficulty: str = "easy"
|
| 86 |
+
self._indexes: Dict[str, int] = {k: 0 for k in dataset}
|
| 87 |
+
self._current_sample: Optional[Dict] = None
|
| 88 |
+
self._state = ConstraintState(episode_id=None)
|
| 89 |
+
|
| 90 |
+
# ------------------------------------------------------------------
|
| 91 |
+
# OpenEnv interface
|
| 92 |
+
# ------------------------------------------------------------------
|
| 93 |
+
|
| 94 |
+
def reset(self, task_id: Optional[str] = None):
|
| 95 |
+
"""
|
| 96 |
+
Reset the environment for a new episode.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
task_id: One of "easy", "medium", or "hard".
|
| 100 |
+
Defaults to cycling through "easy".
|
| 101 |
+
"""
|
| 102 |
+
if task_id and task_id in self._dataset:
|
| 103 |
+
self._difficulty = task_id
|
| 104 |
+
elif task_id is None:
|
| 105 |
+
# Default: cycle through easy samples
|
| 106 |
+
self._difficulty = "easy"
|
| 107 |
+
|
| 108 |
+
pool = self._dataset[self._difficulty]
|
| 109 |
+
idx = self._indexes[self._difficulty]
|
| 110 |
+
self._current_sample = pool[idx]
|
| 111 |
+
self._indexes[self._difficulty] = (idx + 1) % len(pool)
|
| 112 |
+
self._state = ConstraintState(episode_id=str(uuid4()))
|
| 113 |
+
|
| 114 |
+
return ConstraintObservation(
|
| 115 |
+
prompt=self._current_sample["prompt"],
|
| 116 |
+
done=False,
|
| 117 |
+
reward=0.0,
|
| 118 |
+
info={"difficulty": self._difficulty},
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
def step(self, action: ConstraintAction):
|
| 122 |
+
"""
|
| 123 |
+
Evaluate the agent's AST output and return a scored observation.
|
| 124 |
+
Reward is normalised to [−0.25, 1.0].
|
| 125 |
+
"""
|
| 126 |
+
reward = 0.0
|
| 127 |
+
info: Dict[str, Any] = {"difficulty": self._difficulty}
|
| 128 |
+
|
| 129 |
+
# ── 1. Parse JSON ────────────────────────────────────────────
|
| 130 |
+
try:
|
| 131 |
+
ast = json.loads(action.ast_output)
|
| 132 |
+
reward += _W_JSON
|
| 133 |
+
except (json.JSONDecodeError, TypeError):
|
| 134 |
+
reward += _PENALTY_INVALID_JSON
|
| 135 |
+
return ConstraintObservation(
|
| 136 |
+
prompt=self._current_sample["prompt"],
|
| 137 |
+
done=True,
|
| 138 |
+
reward=round(reward, 4),
|
| 139 |
+
info={**info, "error": "invalid_json"},
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# ── 2. Validate structure ─────────────────────────────────────
|
| 143 |
+
if self._validate_structure(ast):
|
| 144 |
+
reward += _W_STRUCTURE
|
| 145 |
+
else:
|
| 146 |
+
reward += _PENALTY_BAD_STRUCTURE
|
| 147 |
+
info["error"] = "bad_structure"
|
| 148 |
+
|
| 149 |
+
# ── 3. Logic match (ignores "name" – user-chosen identifier) ──────
|
| 150 |
+
if "target_ast" in self._current_sample:
|
| 151 |
+
if self._logic_match(ast, self._current_sample["target_ast"]):
|
| 152 |
+
reward += _W_MATCH
|
| 153 |
+
info["exact_match"] = True
|
| 154 |
+
else:
|
| 155 |
+
info["exact_match"] = False
|
| 156 |
+
|
| 157 |
+
return ConstraintObservation(
|
| 158 |
+
prompt=self._current_sample["prompt"],
|
| 159 |
+
done=True,
|
| 160 |
+
reward=round(reward, 4),
|
| 161 |
+
info=info,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
@property
|
| 165 |
+
def state(self):
|
| 166 |
+
return self._state
|
| 167 |
+
|
| 168 |
+
# ------------------------------------------------------------------
|
| 169 |
+
# Validation helpers
|
| 170 |
+
# ------------------------------------------------------------------
|
| 171 |
+
|
| 172 |
+
@staticmethod
|
| 173 |
+
def _logic_match(ast: Dict[str, Any], target: Dict[str, Any]) -> bool:
|
| 174 |
+
"""
|
| 175 |
+
Compare two ASTs on every logically meaningful field, ignoring "name".
|
| 176 |
+
|
| 177 |
+
Fields compared:
|
| 178 |
+
• type – hard / soft
|
| 179 |
+
• forall – same variable declarations (order-independent)
|
| 180 |
+
• where – optional guard expression (string equality)
|
| 181 |
+
• assert – constraint body } exactly one
|
| 182 |
+
• minimize – objective body }
|
| 183 |
+
|
| 184 |
+
"name" is intentionally excluded — it is a free-form user-chosen
|
| 185 |
+
snake_case identifier that has no effect on the constraint's semantics.
|
| 186 |
+
"""
|
| 187 |
+
_LOGIC_KEYS = {"type", "forall", "where", "assert", "minimize"}
|
| 188 |
+
|
| 189 |
+
# Collect only logic keys present in either dict
|
| 190 |
+
all_keys = (set(ast.keys()) | set(target.keys())) & _LOGIC_KEYS
|
| 191 |
+
|
| 192 |
+
for key in all_keys:
|
| 193 |
+
a_val = ast.get(key)
|
| 194 |
+
t_val = target.get(key)
|
| 195 |
+
|
| 196 |
+
if key == "forall":
|
| 197 |
+
# Order of variable declarations doesn't matter;
|
| 198 |
+
# compare as a frozenset of (var, domain) pairs.
|
| 199 |
+
try:
|
| 200 |
+
a_set = frozenset(
|
| 201 |
+
(d["var"], d["domain"]) for d in (a_val or [])
|
| 202 |
+
)
|
| 203 |
+
t_set = frozenset(
|
| 204 |
+
(d["var"], d["domain"]) for d in (t_val or [])
|
| 205 |
+
)
|
| 206 |
+
if a_set != t_set:
|
| 207 |
+
return False
|
| 208 |
+
except (TypeError, KeyError):
|
| 209 |
+
return False
|
| 210 |
+
else:
|
| 211 |
+
if a_val != t_val:
|
| 212 |
+
return False
|
| 213 |
+
|
| 214 |
+
return True
|
| 215 |
+
|
| 216 |
+
def _validate_structure(self, ast: Dict[str, Any]) -> bool:
|
| 217 |
+
"""Return True if AST follows the expected schema."""
|
| 218 |
+
|
| 219 |
+
# 1. Top-level type field must be exactly "hard" or "soft"
|
| 220 |
+
if ast.get("type") not in {"hard", "soft"}:
|
| 221 |
+
return False
|
| 222 |
+
|
| 223 |
+
# 2. Must have forall
|
| 224 |
+
if "forall" not in ast:
|
| 225 |
+
return False
|
| 226 |
+
|
| 227 |
+
if not isinstance(ast["forall"], list) or len(ast["forall"]) == 0:
|
| 228 |
+
return False
|
| 229 |
+
|
| 230 |
+
# 3. Build variable scope from forall declarations
|
| 231 |
+
scope: Dict[str, str] = {}
|
| 232 |
+
for var_decl in ast["forall"]:
|
| 233 |
+
var = var_decl.get("var")
|
| 234 |
+
domain = var_decl.get("domain")
|
| 235 |
+
|
| 236 |
+
if not var or not domain:
|
| 237 |
+
return False
|
| 238 |
+
|
| 239 |
+
if domain not in VALID_DOMAINS:
|
| 240 |
+
return False
|
| 241 |
+
|
| 242 |
+
scope[var] = domain
|
| 243 |
+
|
| 244 |
+
# 4. Validate optional WHERE clause
|
| 245 |
+
if "where" in ast:
|
| 246 |
+
if not self._validate_expression(ast["where"], scope):
|
| 247 |
+
return False
|
| 248 |
+
|
| 249 |
+
# 5. Validate payload: assert OR minimize (one required)
|
| 250 |
+
if "assert" in ast:
|
| 251 |
+
return self._validate_expression(ast["assert"], scope)
|
| 252 |
+
elif "minimize" in ast:
|
| 253 |
+
return self._validate_expression(ast["minimize"], scope)
|
| 254 |
+
else:
|
| 255 |
+
return False
|
| 256 |
+
|
| 257 |
+
def _validate_expression(self, expr: str, scope: Dict[str, str]) -> bool:
|
| 258 |
+
"""
|
| 259 |
+
Validate that all identifiers in an expression are in scope or known.
|
| 260 |
+
|
| 261 |
+
Follows the grammar:
|
| 262 |
+
boolean_expr ::= … AND | OR | NOT … | comparison | predicate | "(" … ")"
|
| 263 |
+
comparison ::= arithmetic_expr comp_op arithmetic_expr
|
| 264 |
+
arithmetic_expr ::= … + | - | * … | function_call | number | identifier
|
| 265 |
+
number ::= digit { digit } (multi-digit allowed)
|
| 266 |
+
|
| 267 |
+
Extensions beyond the BNF (accepted but not spec'd):
|
| 268 |
+
• String literals `'online'`, `'practical'` – stripped before tokenising
|
| 269 |
+
• IN keyword – used in Python-style membership tests
|
| 270 |
+
"""
|
| 271 |
+
# Strip quoted string literals (grammar extension – not in BNF)
|
| 272 |
+
expr_stripped = re.sub(r"'[^']*'", "", expr)
|
| 273 |
+
expr_stripped = re.sub(r'"[^"]*"', "", expr_stripped)
|
| 274 |
+
|
| 275 |
+
# Strip all numbers (grammar: number ::= digit { digit })
|
| 276 |
+
expr_stripped = re.sub(r"\b\d+\b", "", expr_stripped)
|
| 277 |
+
|
| 278 |
+
tokens = self._extract_identifiers(expr_stripped)
|
| 279 |
+
|
| 280 |
+
# Keywords defined in the grammar + accepted extensions
|
| 281 |
+
GRAMMAR_KEYWORDS = {"AND", "OR", "NOT"} # boolean_expr operators
|
| 282 |
+
EXTENSIONS = {"IN", "true", "false", "null"} # beyond BNF
|
| 283 |
+
|
| 284 |
+
for token in tokens:
|
| 285 |
+
if token in scope:
|
| 286 |
+
continue
|
| 287 |
+
if token in VALID_FUNCTIONS:
|
| 288 |
+
continue
|
| 289 |
+
if token in GRAMMAR_KEYWORDS or token in EXTENSIONS:
|
| 290 |
+
continue
|
| 291 |
+
# Unknown identifier
|
| 292 |
+
return False
|
| 293 |
+
|
| 294 |
+
return self._validate_function_calls(expr)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
@staticmethod
|
| 298 |
+
def _validate_function_calls(expr: str) -> bool:
|
| 299 |
+
"""Validate that all function calls use known functions with correct arity.
|
| 300 |
+
|
| 301 |
+
Handles nested calls like SUM(occupies(b, sub, d, s)) by counting only
|
| 302 |
+
top-level commas (those not inside inner parentheses) to determine arity.
|
| 303 |
+
"""
|
| 304 |
+
# Find every function call by name; walk the string to pair parens properly
|
| 305 |
+
func_pattern = re.compile(r"([a-zA-Z_][a-zA-Z0-9_]*)\(")
|
| 306 |
+
|
| 307 |
+
i = 0
|
| 308 |
+
while i < len(expr):
|
| 309 |
+
m = func_pattern.search(expr, i)
|
| 310 |
+
if not m:
|
| 311 |
+
break
|
| 312 |
+
|
| 313 |
+
func_name = m.group(1)
|
| 314 |
+
# Position after the opening '('
|
| 315 |
+
start = m.end()
|
| 316 |
+
|
| 317 |
+
# Walk to find the matching closing ')'
|
| 318 |
+
depth = 1
|
| 319 |
+
j = start
|
| 320 |
+
while j < len(expr) and depth > 0:
|
| 321 |
+
if expr[j] == "(":
|
| 322 |
+
depth += 1
|
| 323 |
+
elif expr[j] == ")":
|
| 324 |
+
depth -= 1
|
| 325 |
+
j += 1
|
| 326 |
+
|
| 327 |
+
# args_str is everything between the outer parens
|
| 328 |
+
args_str = expr[start : j - 1]
|
| 329 |
+
|
| 330 |
+
if func_name in VALID_FUNCTIONS:
|
| 331 |
+
# Count top-level commas (depth-0 within args_str)
|
| 332 |
+
if args_str.strip() == "":
|
| 333 |
+
top_level_args = 0
|
| 334 |
+
else:
|
| 335 |
+
top_level_args = 1
|
| 336 |
+
inner_depth = 0
|
| 337 |
+
for ch in args_str:
|
| 338 |
+
if ch == "(":
|
| 339 |
+
inner_depth += 1
|
| 340 |
+
elif ch == ")":
|
| 341 |
+
inner_depth -= 1
|
| 342 |
+
elif ch == "," and inner_depth == 0:
|
| 343 |
+
top_level_args += 1
|
| 344 |
+
|
| 345 |
+
expected = VALID_FUNCTIONS[func_name]
|
| 346 |
+
if top_level_args != expected:
|
| 347 |
+
return False
|
| 348 |
+
else:
|
| 349 |
+
# Not a DSL function — unknown call
|
| 350 |
+
return False
|
| 351 |
+
|
| 352 |
+
i = j # advance past this call
|
| 353 |
+
|
| 354 |
+
return True
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
@staticmethod
|
| 358 |
+
def _extract_identifiers(expr: str):
|
| 359 |
+
return set(re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", expr))
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
# ---------------------------------------------------------------------------
|
| 363 |
+
# Quick smoke-test
|
| 364 |
+
# ---------------------------------------------------------------------------
|
| 365 |
+
|
| 366 |
+
if __name__ == "__main__":
|
| 367 |
+
import json as _json
|
| 368 |
+
|
| 369 |
+
try:
|
| 370 |
+
from ..dataset_example import dataset as _ds
|
| 371 |
+
except ImportError:
|
| 372 |
+
from constraint_env.dataset_example import dataset as _ds # type: ignore
|
| 373 |
+
|
| 374 |
+
env = ConstraintEnvironment(_ds)
|
| 375 |
+
|
| 376 |
+
for difficulty in ("easy", "medium", "hard"):
|
| 377 |
+
obs = env.reset(task_id=difficulty)
|
| 378 |
+
print(f"\n[{difficulty.upper()}] prompt: {obs.prompt}")
|
| 379 |
+
|
| 380 |
+
# send perfect answer
|
| 381 |
+
target = env._current_sample["target_ast"]
|
| 382 |
+
action = ConstraintAction(ast_output=_json.dumps(target))
|
| 383 |
+
result = env.step(action)
|
| 384 |
+
print(f" reward={result.reward} done={result.done} info={result.info}")
|
| 385 |
+
|
| 386 |
+
# send bad JSON
|
| 387 |
+
obs2 = env.reset(task_id=difficulty)
|
| 388 |
+
bad = ConstraintAction(ast_output="this is not json")
|
| 389 |
+
res2 = env.step(bad)
|
| 390 |
+
print(f" [bad JSON] reward={res2.reward} info={res2.info}")
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|