DecentSanage commited on
Commit
5b7b694
·
verified ·
1 Parent(s): cb62358

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=constraint_env
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ ENV ENABLE_WEB_INTERFACE='true'
75
+
76
+ # HF Spaces uses port 7860 by default; override with PORT env var for local use
77
+ ENV PORT=7860
78
+
79
+ # Health check (uses the PORT variable)
80
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
81
+ CMD curl -f http://localhost:${PORT}/health || exit 1
82
+
83
+ # Run the FastAPI server – respects $PORT so it works on HF Spaces and locally
84
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port ${PORT:-7860}"]
README.md CHANGED
@@ -1,10 +1,62 @@
1
- ---
2
- title: Constraint Env
3
- emoji: 🏢
4
- colorFrom: gray
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Constraint Environment
3
+ emoji: 🧩
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ short_description: RL training env — natural language to constraint AST
10
+ base_path: /web
11
+ ---
12
+
13
+ # Constraint Environment
14
+
15
+
16
+ An environment to convert to DSL from natural language
17
+
18
+
19
+
20
+ ## Development & Testing
21
+
22
+ ### Direct Environment Testing
23
+
24
+ Test the environment logic directly without starting the HTTP server:
25
+
26
+ ```bash
27
+ # From the server directory
28
+ python3 server/constraint_env_environment.py
29
+ ```
30
+
31
+ This verifies that:
32
+ - Environment resets correctly
33
+ - Step executes actions properly
34
+ - State tracking works
35
+ - Rewards are calculated correctly
36
+
37
+ ### Running Locally
38
+
39
+ Run the server locally for development:
40
+
41
+ ```bash
42
+ uvicorn server.app:app --reload
43
+ ```
44
+
45
+ ## Project Structure
46
+
47
+ ```
48
+ constraint_env/
49
+ ├── .dockerignore # Docker build exclusions
50
+ ├── __init__.py # Module exports
51
+ ├── README.md # This file
52
+ ├── openenv.yaml # OpenEnv manifest
53
+ ├── pyproject.toml # Project metadata and dependencies
54
+ ├── uv.lock # Locked dependencies (generated)
55
+ ├── client.py # ConstraintEnv client
56
+ ├── models.py # Action and Observation models
57
+ └── server/
58
+ ├── __init__.py # Server module exports
59
+ ├── constraint_env_environment.py # Core environment logic
60
+ ├── app.py # FastAPI application (HTTP + WebSocket endpoints)
61
+ └── Dockerfile # Container image definition
62
+ ```
__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constraint Env Environment."""
2
+
3
+ from .client import ConstraintEnv
4
+ from .models import ConstraintAction, ConstraintObservation
5
+
6
+ __all__ = [
7
+ "ConstraintAction",
8
+ "ConstraintObservation",
9
+ "ConstraintEnv",
10
+ ]
client.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Constraint Env Environment Client."""
8
+
9
+ from typing import Dict
10
+
11
+ from openenv.core import EnvClient
12
+ from openenv.core.client_types import StepResult
13
+ from openenv.core.env_server.types import State
14
+
15
+ from .models import ConstraintAction, ConstraintObservation, ConstraintState
16
+
17
+
18
+ class ConstraintEnv(
19
+ EnvClient[ConstraintAction, ConstraintObservation, ConstraintState]
20
+ ):
21
+ """
22
+ Client for the Constraint Env Environment.
23
+
24
+ This client maintains a persistent WebSocket connection to the environment server,
25
+ enabling efficient multi-step interactions with lower latency.
26
+ Each client instance has its own dedicated environment session on the server.
27
+
28
+ """
29
+
30
+ def _step_payload(self, action: ConstraintAction) -> Dict:
31
+ """
32
+ Convert ConstraintAction to JSON payload for step message.
33
+
34
+ Args:
35
+ action: ConstraintAction instance
36
+
37
+ Returns:
38
+ Dictionary representation suitable for JSON encoding
39
+ """
40
+ return {
41
+ "ast_output": action.ast_output,
42
+ }
43
+
44
+ def _parse_result(self, payload: Dict) -> StepResult[ConstraintObservation]:
45
+ """
46
+ Parse server response into StepResult[ConstraintObservation].
47
+
48
+ Args:
49
+ payload: JSON response data from server
50
+
51
+ Returns:
52
+ StepResult with ConstraintObservation
53
+ """
54
+ obs_data = payload.get("observation", {})
55
+ observation = ConstraintObservation(
56
+ prompt=obs_data.get("prompt", ""),
57
+ info=obs_data.get("info", 0),
58
+ done=payload.get("done", False),
59
+ reward=payload.get("reward"),
60
+ )
61
+
62
+ return StepResult(
63
+ observation=observation,
64
+ reward=payload.get("reward"),
65
+ done=payload.get("done", False),
66
+ )
67
+
68
+ def _parse_state(self, payload: Dict) -> ConstraintState:
69
+ """
70
+ Parse server response into State object.
71
+
72
+ Args:
73
+ payload: JSON response from state request
74
+
75
+ Returns:
76
+ State object with episode_id and step_count
77
+ """
78
+ return ConstraintState(
79
+ episode_id=payload.get("episode_id"),
80
+ )
dataset_example.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Dataset for the Constraint Environment.
9
+
10
+ Three difficulty tiers (easy / medium / hard) with increasing structural
11
+ complexity for translating natural-language scheduling constraints into a
12
+ JSON-based AST DSL.
13
+ """
14
+
15
+ dataset = {
16
+ # ------------------------------------------------------------------
17
+ # EASY – single quantifier, no WHERE clause, direct assert
18
+ # ------------------------------------------------------------------
19
+ "easy": [
20
+ {
21
+ "prompt": (
22
+ "No classes should be scheduled on Saturday."
23
+ ),
24
+ "target_ast": {
25
+ "type": "hard",
26
+ "name": "no_saturday_classes",
27
+ "forall": [
28
+ {"var": "b", "domain": "branches"},
29
+ {"var": "sub", "domain": "subjects"},
30
+ {"var": "d", "domain": "days"},
31
+ {"var": "s", "domain": "slots"},
32
+ ],
33
+ "where": "d == 5",
34
+ "assert": "schedule(b, sub, d, s) == 0",
35
+ },
36
+ },
37
+ {
38
+ "prompt": (
39
+ "Every teacher must teach at least one subject."
40
+ ),
41
+ "target_ast": {
42
+ "type": "soft",
43
+ "name": "teacher_teaches_one",
44
+ "forall": [
45
+ {"var": "t", "domain": "teachers"},
46
+ ],
47
+ "assert": "SUM(teaches(t, sub)) >= 1",
48
+ },
49
+ },
50
+ ],
51
+
52
+ # ------------------------------------------------------------------
53
+ # MEDIUM – two quantifiers, WHERE clause, combined assert
54
+ # ------------------------------------------------------------------
55
+ "medium": [
56
+ {
57
+ "prompt": (
58
+ "No non-online classes should be scheduled on Saturday."
59
+ ),
60
+ "target_ast": {
61
+ "type": "hard",
62
+ "name": "no_non_online_saturday",
63
+ "forall": [
64
+ {"var": "b", "domain": "branches"},
65
+ {"var": "sub", "domain": "subjects"},
66
+ {"var": "d", "domain": "days"},
67
+ {"var": "s", "domain": "slots"},
68
+ ],
69
+ "where": "subject_type(b, sub) != 'online' AND d == 5",
70
+ "assert": "schedule(b, sub, d, s) == 0",
71
+ },
72
+ },
73
+ {
74
+ "prompt": (
75
+ "A teacher cannot be assigned to two different slots at the same time."
76
+ ),
77
+ "target_ast": {
78
+ "type": "hard",
79
+ "name": "no_teacher_time_conflict",
80
+ "forall": [
81
+ {"var": "t", "domain": "teachers"},
82
+ {"var": "d", "domain": "days"},
83
+ {"var": "s", "domain": "slots"},
84
+ ],
85
+ "assert": "COUNT(occupies_teacher(t, d, s)) <= 1",
86
+ },
87
+ },
88
+ ],
89
+
90
+ # ------------------------------------------------------------------
91
+ # HARD – multiple quantifiers, nested WHERE + AND/OR, minimize
92
+ # ------------------------------------------------------------------
93
+ "hard": [
94
+ {
95
+ "prompt": (
96
+ "Minimize the number of occupied slots for each branch on any given day, "
97
+ "but only for subjects that are labelled as practical."
98
+ ),
99
+ "target_ast": {
100
+ "type": "soft",
101
+ "name": "minimize_practical_slots",
102
+ "forall": [
103
+ {"var": "b", "domain": "branches"},
104
+ {"var": "sub", "domain": "subjects"},
105
+ {"var": "d", "domain": "days"},
106
+ {"var": "s", "domain": "slots"},
107
+ ],
108
+ "where": "subject_type(b, sub) == 'practical'",
109
+ "minimize": "SUM(occupies(b, sub, d, s))",
110
+ },
111
+ },
112
+ {
113
+ "prompt": (
114
+ "No branch may have more than four scheduled slots in a single day, "
115
+ "unless a subject is online, in which case it may occupy one extra slot."
116
+ ),
117
+ "target_ast": {
118
+ "type": "hard",
119
+ "name": "daily_slot_cap",
120
+ "forall": [
121
+ {"var": "b", "domain": "branches"},
122
+ {"var": "d", "domain": "days"},
123
+ {"var": "sub", "domain": "subjects"},
124
+ {"var": "s", "domain": "slots"},
125
+ ],
126
+ "assert": (
127
+ "SUM(schedule(b, sub, d, s)) <= 4 OR "
128
+ "(subject_type(b, sub) == 'online' AND SUM(schedule(b, sub, d, s)) <= 5)"
129
+ ),
130
+ },
131
+ },
132
+ ],
133
+ }
guidelines.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 🚀 Hackathon Submission Guidelines (OpenEnv RL Challenge)
2
+ 1. Project Structure
3
+ * Your inference script must be named inference.py
4
+ * It must be located in the root directory of your project
5
+
6
+ ________________
7
+
8
+
9
+ 2. LLM Usage Requirements
10
+ * You must use the OpenAI Client for all LLM calls
11
+ * Do not use alternative SDKs or direct HTTP calls
12
+
13
+ ________________
14
+
15
+
16
+ 3. Required Environment Variables
17
+ Your inference.py must read the following environment variables:
18
+ * API_BASE_URL
19
+ * Description: API endpoint for the LLM
20
+ * Requirement: Must include a default value
21
+ * MODEL_NAME
22
+ * Description: Model identifier used for inference
23
+ * Requirement: Must include a default value
24
+ * HF_TOKEN
25
+ * Description: Hugging Face API token
26
+ * Requirement: Mandatory (no default required)
27
+
28
+
29
+ 4. INFERENCE OUTPUT FORMAT
30
+ The script must emit exactly three line types to stdout, in this order:
31
+ [START] task=<task_name> env=<benchmark> model=<model_name>
32
+ [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
33
+ [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
34
+
35
+
36
+
37
+
38
+
39
+
40
+ Rules:
41
+ - One [START] line at episode begin.
42
+ - One [STEP] line per step, immediately after env.step() returns.
43
+ - One [END] line after env.close(), always emitted (even on exception).
44
+ - reward and rewards are formatted to 2 decimal places.
45
+ - done and success are lowercase booleans: true or false.
46
+ - error is the raw last_action_error string, or null if none.
47
+ - All fields on a single line with no newlines within a line.
48
+ Example:
49
+ [START] task=click-test env=miniwob model=Qwen3-VL-30B
50
+ [STEP] step=1 action=click('123') reward=0.00 done=false error=null
51
+ [STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
52
+ [STEP] step=3 action=click('789') reward=1.00 done=true error=null
53
+ [END] success=true steps=3 rewards=0.00,0.00,1.00
54
+ ✅ Example (inference.py)
55
+ import os
56
+ from openai import OpenAI
57
+
58
+
59
+ # Read environment variables with defaults where required
60
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
61
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
62
+ HF_TOKEN = os.getenv("HF_TOKEN")
63
+
64
+
65
+ if HF_TOKEN is None:
66
+ raise ValueError("HF_TOKEN environment variable is required")
67
+
68
+
69
+ # Initialize OpenAI client
70
+ client = OpenAI(
71
+ base_url=API_BASE_URL,
72
+ api_key=HF_TOKEN
73
+ )
74
+
75
+
76
+ def run_inference(prompt: str):
77
+ response = client.chat.completions.create(
78
+ model=MODEL_NAME,
79
+ messages=[
80
+ {"role": "user", "content": prompt}
81
+ ]
82
+ )
83
+ response = response.choices[0].message.content
84
+ # Print output based on above given format
85
+
86
+
87
+
88
+
89
+ if __name__ == "__main__":
90
+ print(run_inference("Hello from OpenEnv!"))
91
+
92
+ ________________
93
+ 4. Hugging Face Space Guidelines
94
+ * Building a Hugging Face Space can take significant time, especially if multiple spaces are active
95
+ * To avoid delays:
96
+ * Turn off all unnecessary spaces
97
+ * Keep only your primary submission space running
98
+ ________________
99
+
100
+
101
+ 5. Submission Validation Rules
102
+ * The system will check if your Hugging Face Space is live
103
+ * If your space is not in a running state, your submission will fail automatically
104
+ Before submitting:
105
+ * Ensure your space is fully built
106
+ * Confirm it is in the “Running” state
107
+ ________________
108
+
109
+
110
+ 6. Hardware Requirements
111
+ * Your solution will be executed inside a Docker container with limited resources
112
+ * It must run within the following constraints:
113
+ * 2 vCPU
114
+ * 8 GB RAM
115
+ 👉 Ensure your model, dependencies, and runtime fit within these limits. Submissions exceeding these constraints may fail during evaluation.
116
+ ________________
117
+
118
+
119
+ 6. Resubmissions
120
+ * You are allowed to resubmit your project multiple times
121
+ * If your submission fails validation, you can:
122
+ * Fix the issues
123
+ * Ensure your Hugging Face Space is running
124
+ * Submit again
125
+ 👉 There is no penalty for resubmitting, so iterate until your submission passes all checks.
126
+ ________________
127
+
128
+
129
+ ⚠️ Common Failure Cases (Avoid These)
130
+ * inference.py not in root directory
131
+ * Missing default values for API_BASE_URL or MODEL_NAME
132
+ * Missing HF_TOKEN
133
+ * Hugging Face Space still building during submission
134
+ * Space stopped due to multiple active deployments
135
+ ________________
136
+
137
+
138
+ 🚀 Reference projects to guide you
139
+ Here are some strong examples from the San Francisco edition to help you understand how to structure your environment:
140
+ * Calendar Environment Server
141
+ https://github.com/meta-pytorch/OpenEnv/tree/main/envs/calendar_env
142
+ * Reasoning Gym Environment Server
143
+ https://github.com/meta-pytorch/OpenEnv/tree/main/envs/reasoning_gym_env
144
+ * TB2 Environment Server
145
+ https://github.com/meta-pytorch/OpenEnv/tree/main/envs/tbench2_env
146
+ * CARLA Environment Server
147
+ https://github.com/meta-pytorch/OpenEnv/tree/main/envs/carla_env
148
+ * REPL Environment Server
149
+ https://github.com/meta-pytorch/OpenEnv/tree/main/envs/repl_env
150
+ Use these as direction, not as templates. Focus on understanding structure and approach.
inference.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ inference.py – Baseline evaluator for the Constraint Environment.
3
+
4
+ Reads environment variables:
5
+ API_BASE_URL – LLM endpoint (default: https://openrouter.ai/api/v1)
6
+ MODEL_NAME – Model id (default: openai/gpt-oss-120b)
7
+ HF_TOKEN – API key (REQUIRED – raises ValueError if missing)
8
+
9
+ Output format (stdout only):
10
+ [START] task=<task_name> env=constraint_env model=<model_name>
11
+ [STEP] step=<n> action=<ast_json> reward=<0.00> done=<true|false> error=<msg|null>
12
+ [END] success=<true|false> steps=<n> rewards=<r1,r2,...>
13
+ """
14
+
15
+ import os
16
+ import sys
17
+ import json
18
+ import asyncio
19
+
20
+ from openai import OpenAI
21
+ import dotenv
22
+ dotenv.load_dotenv()
23
+ # ---------------------------------------------------------------------------
24
+ # Environment variables
25
+ # ---------------------------------------------------------------------------
26
+
27
+ API_BASE_URL: str = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
28
+ MODEL_NAME: str = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
29
+ HF_TOKEN: str | None = os.getenv("HF_TOKEN")
30
+
31
+ if HF_TOKEN is None:
32
+ raise ValueError(
33
+ "HF_TOKEN environment variable is required. "
34
+ "Export it before running:\n"
35
+ ' $env:HF_TOKEN="sk-or-v1-..." (PowerShell)\n'
36
+ ' export HF_TOKEN="sk-or-v1-..." (bash)'
37
+ )
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # OpenAI client (routed through OpenRouter by default)
41
+ # ---------------------------------------------------------------------------
42
+
43
+ _llm = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # System prompt shared across all tasks
47
+ # ---------------------------------------------------------------------------
48
+
49
+ _SYSTEM_PROMPT = """\
50
+ You are an expert constraint compiler for a university scheduling system.
51
+ Your job is to convert a natural-language scheduling constraint into a JSON AST \
52
+ (Abstract Syntax Tree) that precisely captures the logical structure of the rule.
53
+
54
+ ════════════════════════════════════════
55
+ SCHEMA
56
+ ════════════════════════════════════════
57
+ {
58
+ "type": "hard" | "soft", // hard = must not be violated; soft = penalised if violated
59
+ "name": "<snake_case_identifier>",
60
+ "forall": [ // quantifiers – declare every variable you use
61
+ {"var": "<letter>", "domain": "<domain>"},
62
+ ...
63
+ ],
64
+ "where": "<expression>", // OPTIONAL filter / guard condition
65
+ "assert": "<expression>" // the constraint body (use XOR with "minimize")
66
+ // OR
67
+ "minimize": "<expression>" // objective to minimise (use XOR with "assert")
68
+ }
69
+
70
+ ════════════════════════════════════════
71
+ VALID DOMAINS
72
+ ════════════════════════════════════════
73
+ teachers · subjects · branches · days · slots
74
+
75
+ ════════════════════════════════════════
76
+ VALID FUNCTIONS (arity shown)
77
+ ════════════════════════════════════════
78
+ subject_type(branch, subject) → 2 args returns 'online'|'practical'|...
79
+ schedule(branch, subject, day, slot) → 4 args returns 0 or 1
80
+ occupies(branch, subject, day, slot) → 4 args returns 0 or 1
81
+ occupies_teacher(teacher, day, slot) → 3 args returns 0 or 1
82
+ teaches(teacher, subject) → 2 args returns 0 or 1
83
+ SUM(expr) → 1 arg aggregate sum
84
+ COUNT(expr) → 1 arg aggregate count
85
+
86
+ ════════════════════════════════════════
87
+ OPERATORS
88
+ ════════════════════════════════════════
89
+ Logical : AND OR NOT IN
90
+ Compare : == != <= >= < >
91
+
92
+ Days are 0-indexed integers (Monday = 0 … Saturday = 5).
93
+
94
+ ════════════════════════════════════════
95
+ RULES
96
+ ════════════════════════════════════════
97
+ • Every variable referenced in "where", "assert", or "minimize" MUST be declared in "forall".
98
+ • "where" acts as a guard – only pairs/tuples satisfying it are considered.
99
+ • Use "hard" for absolute rules and "soft" for optimisation objectives.
100
+ • Nested calls like SUM(schedule(b, sub, d, s)) are allowed.
101
+
102
+ ════════════════════════════════════════
103
+ EXAMPLE
104
+ ═��══════════════════════════════════════
105
+ INPUT:
106
+ "There should not be any classes on saturday, except online classes."
107
+
108
+ OUTPUT:
109
+ {
110
+ "type": "hard",
111
+ "name": "no_non_online_saturday",
112
+ "forall": [
113
+ {"var": "b", "domain": "branches"},
114
+ {"var": "sub", "domain": "subjects"},
115
+ {"var": "d", "domain": "days"},
116
+ {"var": "s", "domain": "slots"}
117
+ ],
118
+ "where": "subject_type(b, sub) != 'online' AND d == 5",
119
+ "assert": "schedule(b, sub, d, s) == 0"
120
+ }
121
+
122
+ EXPLANATION:
123
+ • type="hard" – absolute rule, no violation allowed.
124
+ • forall – all four variables appear in the expressions, so all four are declared.
125
+ • where – guard: only consider rows where the subject is NOT online AND the day is Saturday (index 5).
126
+ • assert – for every qualifying (b, sub, d, s) tuple, the schedule slot must be 0 (i.e., no class).
127
+
128
+ ════════════════════════════════════════
129
+ Return ONLY the raw JSON object.
130
+ Do NOT wrap in markdown fences. Do NOT add any explanation outside the JSON.
131
+ Every key/value must appear on a single line in the final output if possible.
132
+ """
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # LLM call
137
+ # ---------------------------------------------------------------------------
138
+
139
+ def _call_llm(prompt: str) -> str:
140
+ """Call the LLM and return the raw text content."""
141
+ response = _llm.chat.completions.create(
142
+ model=MODEL_NAME,
143
+ messages=[
144
+ {"role": "system", "content": _SYSTEM_PROMPT},
145
+ {"role": "user", "content": prompt},
146
+ ],
147
+ temperature=0.0,
148
+ )
149
+ return response.choices[0].message.content or ""
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # Single-task evaluation (synchronous, one step per task)
154
+ # ---------------------------------------------------------------------------
155
+
156
+ def _run_task(task_id: str, env_url: str = "http://localhost:8000") -> None:
157
+ """
158
+ Run one episode for the given difficulty task.
159
+
160
+ Uses the ConstraintEnv WebSocket client to interact with the live server,
161
+ then prints [START], one [STEP], and [END] to stdout.
162
+ """
163
+ try:
164
+ from client import ConstraintEnv
165
+ except ImportError:
166
+ from constraint_env.client import ConstraintEnv # type: ignore
167
+
168
+ try:
169
+ from models import ConstraintAction
170
+ except ImportError:
171
+ from constraint_env.models import ConstraintAction # type: ignore
172
+
173
+ rewards: list[float] = []
174
+ step_count = 0
175
+ last_error = None
176
+ success = False
177
+
178
+ # ------------------------------------------------------------------
179
+ async def _run():
180
+ nonlocal step_count, last_error, success
181
+
182
+ async with ConstraintEnv(
183
+ env_url,
184
+ connect_timeout_s=30.0,
185
+ message_timeout_s=120.0,
186
+ ) as env:
187
+ # Reset with the requested difficulty
188
+ obs = await env.reset(task_id=task_id)
189
+
190
+ _emit_start(task_id)
191
+
192
+ prompt_text = obs.observation.prompt
193
+
194
+ # Generate AST from LLM
195
+ raw_output = _call_llm(prompt_text)
196
+
197
+ # Sanitise: strip markdown fences if present
198
+ raw_output = raw_output.strip()
199
+ if raw_output.startswith("```"):
200
+ raw_output = raw_output.split("\n", 1)[-1]
201
+ raw_output = raw_output.rsplit("```", 1)[0].strip()
202
+
203
+ # Compact the JSON to a single line (rule: no newlines inside [STEP])
204
+ try:
205
+ action_str = json.dumps(json.loads(raw_output), separators=(",", ":"))
206
+ except (json.JSONDecodeError, TypeError):
207
+ action_str = raw_output.replace("\n", " ").replace("\r", "")
208
+
209
+ action = ConstraintAction(ast_output=action_str)
210
+ step_result = await env.step(action)
211
+
212
+ step_count = 1
213
+ reward = float(step_result.reward or 0.0)
214
+ done = bool(step_result.done)
215
+ last_error = step_result.observation.info.get("error") if step_result.observation else None
216
+ rewards.append(reward)
217
+
218
+ _emit_step(step_count, action_str, reward, done, last_error)
219
+
220
+ success = (reward >= (_W_JSON + _W_STRUCTURE)) if done else False
221
+
222
+ asyncio.run(_run())
223
+ _emit_end(success, step_count, rewards)
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Reward weights (mirrored from environment for success threshold)
228
+ # ---------------------------------------------------------------------------
229
+
230
+ _W_JSON = 1 / 8
231
+ _W_STRUCTURE = 2 / 8
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # Stdout emitters – the ONLY things that print to stdout
236
+ # ---------------------------------------------------------------------------
237
+
238
+ def _emit_start(task_id: str) -> None:
239
+ print(f"[START] task={task_id} env=constraint_env model={MODEL_NAME}", flush=True)
240
+
241
+
242
+ def _emit_step(
243
+ step: int,
244
+ action: str,
245
+ reward: float,
246
+ done: bool,
247
+ error: str | None,
248
+ ) -> None:
249
+ done_str = "true" if done else "false"
250
+ error_str = error if error else "null"
251
+ # Ensure action has no newlines (single line rule)
252
+ action_safe = action.replace("\n", " ").replace("\r", "")
253
+ print(
254
+ f"[STEP] step={step} action={action_safe} "
255
+ f"reward={reward:.2f} done={done_str} error={error_str}",
256
+ flush=True,
257
+ )
258
+
259
+
260
+ def _emit_end(success: bool, steps: int, rewards: list[float]) -> None:
261
+ success_str = "true" if success else "false"
262
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.00"
263
+ print(
264
+ f"[END] success={success_str} steps={steps} rewards={rewards_str}",
265
+ flush=True,
266
+ )
267
+
268
+
269
+ # ---------------------------------------------------------------------------
270
+ # Entry point
271
+ # ---------------------------------------------------------------------------
272
+
273
+ if __name__ == "__main__":
274
+ SERVER_URL = os.getenv("ENV_SERVER_URL", "http://localhost:8000")
275
+
276
+ for difficulty in ("easy", "medium", "hard"):
277
+ try:
278
+ _run_task(task_id=difficulty, env_url=SERVER_URL)
279
+ except Exception as exc:
280
+ # Guarantee [END] is always emitted even on failure
281
+ _emit_end(success=False, steps=0, rewards=[])
282
+ print(f"# ERROR during {difficulty}: {exc}", file=sys.stderr, flush=True)
models.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Constraint Env Environment.
9
+
10
+ The constraint_env environment is a simple test environment that echoes back messages.
11
+ """
12
+
13
+ from openenv.core.env_server.types import Action, Observation, State
14
+ from typing import Dict, Any, Optional
15
+
16
+
17
+ class ConstraintAction(Action):
18
+ """Output as AST by LLM."""
19
+
20
+ ast_output: str
21
+
22
+
23
+ class ConstraintObservation(Observation):
24
+ """Observation from the environment, user prompt and rewards"""
25
+
26
+ prompt: str
27
+ done: bool
28
+ reward: float
29
+ info: Dict[str, Any]
30
+
31
+
32
+ class ConstraintState(State):
33
+ """Current state from the environment"""
34
+ episode_id: Optional[str] = None
35
+
36
+
37
+
38
+
openenv.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: constraint_env
3
+ description: >
4
+ Natural-language to constraint-AST translation environment for RL training.
5
+ Agents translate scheduling constraints written in plain English into a
6
+ structured JSON DSL (Abstract Syntax Tree).
7
+ type: space
8
+ runtime: fastapi
9
+ app: server.app:app
10
+ port: 8000
11
+ tasks:
12
+ - id: easy
13
+ description: Single quantifier, direct assert, no WHERE clause
14
+ difficulty: easy
15
+ - id: medium
16
+ description: Two quantifiers with a WHERE filter clause and combined assert
17
+ difficulty: medium
18
+ - id: hard
19
+ description: Multiple quantifiers, nested WHERE with AND/OR, minimize objective
20
+ difficulty: hard
21
+ tags:
22
+ - openenv
23
+ - scheduling
24
+ - nlp-to-dsl
25
+ - constraint-satisfaction
openenv_constraint_env.egg-info/PKG-INFO ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: openenv-constraint_env
3
+ Version: 0.1.0
4
+ Summary: Constraint Env environment for OpenEnv
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: dotenv>=0.9.9
7
+ Requires-Dist: openenv-core[core]>=0.2.2
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
10
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
openenv_constraint_env.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ __init__.py
3
+ client.py
4
+ dataset_example.py
5
+ inference.py
6
+ models.py
7
+ pyproject.toml
8
+ ./__init__.py
9
+ ./client.py
10
+ ./dataset_example.py
11
+ ./inference.py
12
+ ./models.py
13
+ openenv_constraint_env.egg-info/PKG-INFO
14
+ openenv_constraint_env.egg-info/SOURCES.txt
15
+ openenv_constraint_env.egg-info/dependency_links.txt
16
+ openenv_constraint_env.egg-info/entry_points.txt
17
+ openenv_constraint_env.egg-info/requires.txt
18
+ openenv_constraint_env.egg-info/top_level.txt
19
+ server/__init__.py
20
+ server/app.py
21
+ server/constraint_env_environment.py
openenv_constraint_env.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
openenv_constraint_env.egg-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ server = constraint_env.server.app:main
openenv_constraint_env.egg-info/requires.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dotenv>=0.9.9
2
+ openenv-core[core]>=0.2.2
3
+
4
+ [dev]
5
+ pytest>=8.0.0
6
+ pytest-cov>=4.0.0
openenv_constraint_env.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ constraint_env
problem_statement.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Functional Requirements
2
+ 1. Real-World Task Simulation
3
+ The environment must represent tasks that humans actually perform in real settings—no games or toy problems.
4
+ Examples include email triage, code review, data cleaning, scheduling, customer support, and content moderation.
5
+ ________________
6
+
7
+
8
+ 2. OpenEnv Specification Compliance
9
+ The environment must fully implement the OpenEnv interface, including:
10
+ * Typed Observation, Action, and Reward models using Pydantic
11
+ * step(action) → returns (observation, reward, done, info)
12
+ * reset() → returns the initial observation
13
+ * state() → returns the current state
14
+ * An openenv.yaml file containing metadata
15
+ The implementation must successfully pass validation via openenv validate.
16
+ ________________
17
+
18
+
19
+ 3. Minimum of Three Tasks with Agent Graders
20
+ * Provide at least three tasks, each with a clearly defined objective
21
+ * Tasks should span increasing difficulty: easy → medium → hard
22
+ * Each task must include a programmatic grader that assigns a score between 0.0 and 1.0
23
+ * Grading criteria must be clear, deterministic, and reproducible
24
+ ________________
25
+
26
+
27
+ 4. Meaningful Reward Function
28
+ * The reward function must provide feedback throughout the task trajectory, not just at completion
29
+ * It should reward incremental progress toward the objective
30
+ * It must penalize undesirable behaviors such as infinite loops or destructive actions
31
+ ________________
32
+
33
+
34
+ 5. Baseline Inference Script
35
+ * Include an inference script that uses the OpenAI API client to evaluate a model within the environment
36
+ * API credentials must be read from environment variables (HF_TOKEN)
37
+ * The script should produce a reproducible baseline score across all tasks
38
+ ________________
39
+
40
+
41
+ Non-Functional Requirements
42
+ 1. Deployment on Hugging Face Spaces
43
+ * The environment must be deployable as a containerized Hugging Face Space
44
+ * It should be tagged with openenv
45
+ ________________
46
+
47
+
48
+ 2. Containerized Execution
49
+ * Provide a working Dockerfile
50
+ * The environment must build and run successfully using:
51
+ * docker build
52
+ * docker run
53
+ ________________
54
+
55
+
56
+ 3. Documentation
57
+ The README must include:
58
+ * Environment overview and motivation
59
+ * Definitions of action and observation spaces
60
+ * Task descriptions with expected difficulty levels
61
+ * Setup and usage instructions
62
+ * Baseline performance scores
63
+
64
+ Additional Guideline: Meta OpenEnv Hackathon: Guidelines
pyproject.toml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-constraint_env"
13
+ version = "0.1.0"
14
+ description = "Constraint Env environment for OpenEnv"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ "dotenv>=0.9.9",
18
+ # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
19
+ # install from github
20
+ # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
21
+ "openenv-core[core]>=0.2.2",
22
+ # Environment-specific dependencies
23
+ # Add all dependencies needed for your environment here
24
+ # Examples:
25
+ # "numpy>=1.19.0",
26
+ # "torch>=2.0.0",
27
+ # "gymnasium>=0.29.0",
28
+ # "openspiel>=1.0.0",
29
+ # "smolagents>=1.22.0,<2",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "pytest>=8.0.0",
35
+ "pytest-cov>=4.0.0",
36
+ ]
37
+
38
+ [project.scripts]
39
+ # Server entry point - enables running via: uv run --project . server
40
+ # or: python -m constraint_env.server.app
41
+ server = "constraint_env.server.app:main"
42
+
43
+ [tool.setuptools]
44
+ include-package-data = true
45
+ packages = ["constraint_env", "constraint_env.server"]
46
+ package-dir = { "constraint_env" = ".", "constraint_env.server" = "server" }
server/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Constraint Env environment server components."""
8
+
9
+ from .constraint_env_environment import ConstraintEnvironment
10
+
11
+ __all__ = ["ConstraintEnvironment"]
server/app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Constraint Env Environment.
9
+
10
+ This module creates an HTTP server that exposes the ConstraintEnvironment
11
+ over HTTP and WebSocket endpoints, compatible with EnvClient.
12
+
13
+ Endpoints:
14
+ - POST /reset: Reset the environment
15
+ - POST /step: Execute an action
16
+ - GET /state: Get current environment state
17
+ - GET /schema: Get action/observation schemas
18
+ - WS /ws: WebSocket endpoint for persistent sessions
19
+
20
+ Usage:
21
+ # Development (with auto-reload):
22
+ uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
23
+
24
+ # Production:
25
+ uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
26
+
27
+ # Or run directly:
28
+ python -m server.app
29
+ """
30
+
31
+ try:
32
+ from openenv.core.env_server.http_server import create_app
33
+ except Exception as e: # pragma: no cover
34
+ raise ImportError(
35
+ "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
36
+ ) from e
37
+
38
+ try:
39
+ from ..models import ConstraintAction, ConstraintObservation
40
+ from .constraint_env_environment import ConstraintEnvironment
41
+ except ImportError:
42
+ from constraint_env.models import ConstraintAction, ConstraintObservation
43
+ from constraint_env.server.constraint_env_environment import ConstraintEnvironment
44
+
45
+ # Load the dataset so the environment can be initialised without crashing.
46
+ try:
47
+ from dataset_example import dataset as _DATASET
48
+ except ImportError:
49
+ from constraint_env.dataset_example import dataset as _DATASET # type: ignore
50
+
51
+
52
+ def _make_env():
53
+ """Factory that passes the pre-loaded dataset into ConstraintEnvironment."""
54
+ return ConstraintEnvironment(dataset=_DATASET)
55
+
56
+
57
+ # Create the app – pass the factory so create_app calls _make_env() per session.
58
+ app = create_app(
59
+ _make_env,
60
+ ConstraintAction,
61
+ ConstraintObservation,
62
+ env_name="constraint_env",
63
+ max_concurrent_envs=1,
64
+ )
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # PWA manifest – browsers request this at root level for the web UI
69
+ # ---------------------------------------------------------------------------
70
+
71
+ from fastapi.responses import JSONResponse # noqa: E402
72
+
73
+
74
+ @app.get("/manifest.json", include_in_schema=False)
75
+ async def web_manifest():
76
+ return JSONResponse(
77
+ content={
78
+ "name": "Constraint Environment",
79
+ "short_name": "ConstraintEnv",
80
+ "description": "RL training environment: natural-language → constraint AST",
81
+ "start_url": "/web/",
82
+ "display": "standalone",
83
+ "background_color": "#1e1e2e",
84
+ "theme_color": "#7c3aed",
85
+ "icons": [
86
+ {
87
+ "src": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
88
+ "sizes": "any",
89
+ "type": "image/svg+xml",
90
+ }
91
+ ],
92
+ },
93
+ headers={"Cache-Control": "public, max-age=3600"},
94
+ )
95
+
96
+
97
+ def main(host: str = "localhost", port: int = 8000):
98
+ """
99
+ Entry point for direct execution via uv run or python -m.
100
+
101
+ This function enables running the server without Docker:
102
+ uv run --project . server
103
+ uv run --project . server --port 8001
104
+ python -m constraint_env.server.app
105
+
106
+ Args:
107
+ host: Host address to bind to (default: "localhost")
108
+ port: Port number to listen on (default: 8000)
109
+
110
+ For production deployments, consider using uvicorn directly with
111
+ multiple workers:
112
+ uvicorn constraint_env.server.app:app --workers 4
113
+ """
114
+ import uvicorn
115
+
116
+ uvicorn.run(app, host=host, port=port)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ import argparse
121
+
122
+ parser = argparse.ArgumentParser(description="Constraint Env FastAPI Server")
123
+ parser.add_argument("--host", type=str, default="localhost")
124
+ parser.add_argument("--port", type=int, default=8000)
125
+ args = parser.parse_args()
126
+ main(host=args.host, port=args.port)
server/constraint_env_environment.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Constraint Env Environment Implementation.
9
+
10
+ Evaluates an LLM's ability to convert natural-language scheduling
11
+ constraints into a JSON AST DSL.
12
+
13
+ Reward breakdown per step:
14
+ +0.125 valid JSON (1 / 8 of max)
15
+ +0.250 correct top-level structure (2 / 8 of max)
16
+ +0.625 exact match with target AST (5 / 8 of max)
17
+ ──────
18
+ 1.000 total maximum reward
19
+
20
+ Penalties:
21
+ -0.250 bad_structure (structure wrong but JSON parsed)
22
+ -0.250 invalid_json (cannot parse at all, replaces reward=0)
23
+ """
24
+
25
+ import re
26
+ import json
27
+ from uuid import uuid4
28
+ from typing import Any, Dict, List, Optional
29
+
30
+ from openenv.core.env_server.interfaces import Environment
31
+
32
+ try:
33
+ from ..models import ConstraintAction, ConstraintObservation, ConstraintState
34
+ except ImportError:
35
+ from models import ConstraintAction, ConstraintObservation, ConstraintState
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Domain knowledge
40
+ # ---------------------------------------------------------------------------
41
+
42
+ VALID_DOMAINS = {"teachers", "subjects", "branches", "days", "slots"}
43
+
44
+ VALID_FUNCTIONS: Dict[str, int] = {
45
+ "subject_type": 2,
46
+ "schedule": 4,
47
+ "occupies": 4,
48
+ "occupies_teacher": 3,
49
+ "teaches": 2,
50
+ "SUM": 1,
51
+ "COUNT": 1,
52
+ }
53
+
54
+ # Reward weights (must sum to 1.0)
55
+ _W_JSON = 1 / 8 # 0.125
56
+ _W_STRUCTURE = 2 / 8 # 0.250
57
+ _W_MATCH = 5 / 8 # 0.625
58
+ _PENALTY_BAD_STRUCTURE = -0.250
59
+ _PENALTY_INVALID_JSON = -0.250
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Environment
64
+ # ---------------------------------------------------------------------------
65
+
66
+ class ConstraintEnvironment(Environment):
67
+ """
68
+ OpenEnv environment for natural-language → constraint-AST translation.
69
+
70
+ Args:
71
+ dataset: Dict with keys "easy", "medium", "hard", each a list of
72
+ {"prompt": str, "target_ast": dict} entries.
73
+ If omitted, the built-in dataset_example is used.
74
+ """
75
+
76
+ def __init__(self, dataset: Optional[Dict[str, List[Dict]]] = None):
77
+ if dataset is None:
78
+ try:
79
+ from dataset_example import dataset as _ds
80
+ except ImportError:
81
+ from constraint_env.dataset_example import dataset as _ds # type: ignore
82
+ dataset = _ds
83
+
84
+ self._dataset = dataset
85
+ self._difficulty: str = "easy"
86
+ self._indexes: Dict[str, int] = {k: 0 for k in dataset}
87
+ self._current_sample: Optional[Dict] = None
88
+ self._state = ConstraintState(episode_id=None)
89
+
90
+ # ------------------------------------------------------------------
91
+ # OpenEnv interface
92
+ # ------------------------------------------------------------------
93
+
94
+ def reset(self, task_id: Optional[str] = None):
95
+ """
96
+ Reset the environment for a new episode.
97
+
98
+ Args:
99
+ task_id: One of "easy", "medium", or "hard".
100
+ Defaults to cycling through "easy".
101
+ """
102
+ if task_id and task_id in self._dataset:
103
+ self._difficulty = task_id
104
+ elif task_id is None:
105
+ # Default: cycle through easy samples
106
+ self._difficulty = "easy"
107
+
108
+ pool = self._dataset[self._difficulty]
109
+ idx = self._indexes[self._difficulty]
110
+ self._current_sample = pool[idx]
111
+ self._indexes[self._difficulty] = (idx + 1) % len(pool)
112
+ self._state = ConstraintState(episode_id=str(uuid4()))
113
+
114
+ return ConstraintObservation(
115
+ prompt=self._current_sample["prompt"],
116
+ done=False,
117
+ reward=0.0,
118
+ info={"difficulty": self._difficulty},
119
+ )
120
+
121
+ def step(self, action: ConstraintAction):
122
+ """
123
+ Evaluate the agent's AST output and return a scored observation.
124
+ Reward is normalised to [−0.25, 1.0].
125
+ """
126
+ reward = 0.0
127
+ info: Dict[str, Any] = {"difficulty": self._difficulty}
128
+
129
+ # ── 1. Parse JSON ────────────────────────────────────────────
130
+ try:
131
+ ast = json.loads(action.ast_output)
132
+ reward += _W_JSON
133
+ except (json.JSONDecodeError, TypeError):
134
+ reward += _PENALTY_INVALID_JSON
135
+ return ConstraintObservation(
136
+ prompt=self._current_sample["prompt"],
137
+ done=True,
138
+ reward=round(reward, 4),
139
+ info={**info, "error": "invalid_json"},
140
+ )
141
+
142
+ # ── 2. Validate structure ─────────────────────────────────────
143
+ if self._validate_structure(ast):
144
+ reward += _W_STRUCTURE
145
+ else:
146
+ reward += _PENALTY_BAD_STRUCTURE
147
+ info["error"] = "bad_structure"
148
+
149
+ # ── 3. Logic match (ignores "name" – user-chosen identifier) ──────
150
+ if "target_ast" in self._current_sample:
151
+ if self._logic_match(ast, self._current_sample["target_ast"]):
152
+ reward += _W_MATCH
153
+ info["exact_match"] = True
154
+ else:
155
+ info["exact_match"] = False
156
+
157
+ return ConstraintObservation(
158
+ prompt=self._current_sample["prompt"],
159
+ done=True,
160
+ reward=round(reward, 4),
161
+ info=info,
162
+ )
163
+
164
+ @property
165
+ def state(self):
166
+ return self._state
167
+
168
+ # ------------------------------------------------------------------
169
+ # Validation helpers
170
+ # ------------------------------------------------------------------
171
+
172
+ @staticmethod
173
+ def _logic_match(ast: Dict[str, Any], target: Dict[str, Any]) -> bool:
174
+ """
175
+ Compare two ASTs on every logically meaningful field, ignoring "name".
176
+
177
+ Fields compared:
178
+ • type – hard / soft
179
+ • forall – same variable declarations (order-independent)
180
+ • where – optional guard expression (string equality)
181
+ • assert – constraint body } exactly one
182
+ • minimize – objective body }
183
+
184
+ "name" is intentionally excluded — it is a free-form user-chosen
185
+ snake_case identifier that has no effect on the constraint's semantics.
186
+ """
187
+ _LOGIC_KEYS = {"type", "forall", "where", "assert", "minimize"}
188
+
189
+ # Collect only logic keys present in either dict
190
+ all_keys = (set(ast.keys()) | set(target.keys())) & _LOGIC_KEYS
191
+
192
+ for key in all_keys:
193
+ a_val = ast.get(key)
194
+ t_val = target.get(key)
195
+
196
+ if key == "forall":
197
+ # Order of variable declarations doesn't matter;
198
+ # compare as a frozenset of (var, domain) pairs.
199
+ try:
200
+ a_set = frozenset(
201
+ (d["var"], d["domain"]) for d in (a_val or [])
202
+ )
203
+ t_set = frozenset(
204
+ (d["var"], d["domain"]) for d in (t_val or [])
205
+ )
206
+ if a_set != t_set:
207
+ return False
208
+ except (TypeError, KeyError):
209
+ return False
210
+ else:
211
+ if a_val != t_val:
212
+ return False
213
+
214
+ return True
215
+
216
+ def _validate_structure(self, ast: Dict[str, Any]) -> bool:
217
+ """Return True if AST follows the expected schema."""
218
+
219
+ # 1. Top-level type field must be exactly "hard" or "soft"
220
+ if ast.get("type") not in {"hard", "soft"}:
221
+ return False
222
+
223
+ # 2. Must have forall
224
+ if "forall" not in ast:
225
+ return False
226
+
227
+ if not isinstance(ast["forall"], list) or len(ast["forall"]) == 0:
228
+ return False
229
+
230
+ # 3. Build variable scope from forall declarations
231
+ scope: Dict[str, str] = {}
232
+ for var_decl in ast["forall"]:
233
+ var = var_decl.get("var")
234
+ domain = var_decl.get("domain")
235
+
236
+ if not var or not domain:
237
+ return False
238
+
239
+ if domain not in VALID_DOMAINS:
240
+ return False
241
+
242
+ scope[var] = domain
243
+
244
+ # 4. Validate optional WHERE clause
245
+ if "where" in ast:
246
+ if not self._validate_expression(ast["where"], scope):
247
+ return False
248
+
249
+ # 5. Validate payload: assert OR minimize (one required)
250
+ if "assert" in ast:
251
+ return self._validate_expression(ast["assert"], scope)
252
+ elif "minimize" in ast:
253
+ return self._validate_expression(ast["minimize"], scope)
254
+ else:
255
+ return False
256
+
257
+ def _validate_expression(self, expr: str, scope: Dict[str, str]) -> bool:
258
+ """
259
+ Validate that all identifiers in an expression are in scope or known.
260
+
261
+ Follows the grammar:
262
+ boolean_expr ::= … AND | OR | NOT … | comparison | predicate | "(" … ")"
263
+ comparison ::= arithmetic_expr comp_op arithmetic_expr
264
+ arithmetic_expr ::= … + | - | * … | function_call | number | identifier
265
+ number ::= digit { digit } (multi-digit allowed)
266
+
267
+ Extensions beyond the BNF (accepted but not spec'd):
268
+ • String literals `'online'`, `'practical'` – stripped before tokenising
269
+ • IN keyword – used in Python-style membership tests
270
+ """
271
+ # Strip quoted string literals (grammar extension – not in BNF)
272
+ expr_stripped = re.sub(r"'[^']*'", "", expr)
273
+ expr_stripped = re.sub(r'"[^"]*"', "", expr_stripped)
274
+
275
+ # Strip all numbers (grammar: number ::= digit { digit })
276
+ expr_stripped = re.sub(r"\b\d+\b", "", expr_stripped)
277
+
278
+ tokens = self._extract_identifiers(expr_stripped)
279
+
280
+ # Keywords defined in the grammar + accepted extensions
281
+ GRAMMAR_KEYWORDS = {"AND", "OR", "NOT"} # boolean_expr operators
282
+ EXTENSIONS = {"IN", "true", "false", "null"} # beyond BNF
283
+
284
+ for token in tokens:
285
+ if token in scope:
286
+ continue
287
+ if token in VALID_FUNCTIONS:
288
+ continue
289
+ if token in GRAMMAR_KEYWORDS or token in EXTENSIONS:
290
+ continue
291
+ # Unknown identifier
292
+ return False
293
+
294
+ return self._validate_function_calls(expr)
295
+
296
+
297
+ @staticmethod
298
+ def _validate_function_calls(expr: str) -> bool:
299
+ """Validate that all function calls use known functions with correct arity.
300
+
301
+ Handles nested calls like SUM(occupies(b, sub, d, s)) by counting only
302
+ top-level commas (those not inside inner parentheses) to determine arity.
303
+ """
304
+ # Find every function call by name; walk the string to pair parens properly
305
+ func_pattern = re.compile(r"([a-zA-Z_][a-zA-Z0-9_]*)\(")
306
+
307
+ i = 0
308
+ while i < len(expr):
309
+ m = func_pattern.search(expr, i)
310
+ if not m:
311
+ break
312
+
313
+ func_name = m.group(1)
314
+ # Position after the opening '('
315
+ start = m.end()
316
+
317
+ # Walk to find the matching closing ')'
318
+ depth = 1
319
+ j = start
320
+ while j < len(expr) and depth > 0:
321
+ if expr[j] == "(":
322
+ depth += 1
323
+ elif expr[j] == ")":
324
+ depth -= 1
325
+ j += 1
326
+
327
+ # args_str is everything between the outer parens
328
+ args_str = expr[start : j - 1]
329
+
330
+ if func_name in VALID_FUNCTIONS:
331
+ # Count top-level commas (depth-0 within args_str)
332
+ if args_str.strip() == "":
333
+ top_level_args = 0
334
+ else:
335
+ top_level_args = 1
336
+ inner_depth = 0
337
+ for ch in args_str:
338
+ if ch == "(":
339
+ inner_depth += 1
340
+ elif ch == ")":
341
+ inner_depth -= 1
342
+ elif ch == "," and inner_depth == 0:
343
+ top_level_args += 1
344
+
345
+ expected = VALID_FUNCTIONS[func_name]
346
+ if top_level_args != expected:
347
+ return False
348
+ else:
349
+ # Not a DSL function — unknown call
350
+ return False
351
+
352
+ i = j # advance past this call
353
+
354
+ return True
355
+
356
+
357
+ @staticmethod
358
+ def _extract_identifiers(expr: str):
359
+ return set(re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", expr))
360
+
361
+
362
+ # ---------------------------------------------------------------------------
363
+ # Quick smoke-test
364
+ # ---------------------------------------------------------------------------
365
+
366
+ if __name__ == "__main__":
367
+ import json as _json
368
+
369
+ try:
370
+ from ..dataset_example import dataset as _ds
371
+ except ImportError:
372
+ from constraint_env.dataset_example import dataset as _ds # type: ignore
373
+
374
+ env = ConstraintEnvironment(_ds)
375
+
376
+ for difficulty in ("easy", "medium", "hard"):
377
+ obs = env.reset(task_id=difficulty)
378
+ print(f"\n[{difficulty.upper()}] prompt: {obs.prompt}")
379
+
380
+ # send perfect answer
381
+ target = env._current_sample["target_ast"]
382
+ action = ConstraintAction(ast_output=_json.dumps(target))
383
+ result = env.step(action)
384
+ print(f" reward={result.reward} done={result.done} info={result.info}")
385
+
386
+ # send bad JSON
387
+ obs2 = env.reset(task_id=difficulty)
388
+ bad = ConstraintAction(ast_output="this is not json")
389
+ res2 = env.step(bad)
390
+ print(f" [bad JSON] reward={res2.reward} info={res2.info}")
server/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
4
+
5
+
6
+
uv.lock ADDED
The diff for this file is too large to render. See raw diff