Sandeep Suresh commited on
Commit
d95de8e
·
1 Parent(s): 0394a5e

feat: Update class names and enhance documentation for CoEnv integration

Browse files
COEnv_Project_Documentation.md CHANGED
@@ -387,7 +387,7 @@ coenv/
387
  ├── openenv.yaml # OpenEnv manifest
388
  ├── pyproject.toml # Project metadata and dependencies
389
  ├── uv.lock # Locked dependencies
390
- ├── client.py # CoenvEnv client / inference-side runner
391
  ├── models.py # Shared action and observation models
392
  ├── config.json # Cluster defaults and simulation params
393
  ├── mkdocs.yml # Docs site configuration
 
387
  ├── openenv.yaml # OpenEnv manifest
388
  ├── pyproject.toml # Project metadata and dependencies
389
  ├── uv.lock # Locked dependencies
390
+ ├── client.py # CoEnv client / inference-side runner
391
  ├── models.py # Shared action and observation models
392
  ├── config.json # Cluster defaults and simulation params
393
  ├── mkdocs.yml # Docs site configuration
README.md CHANGED
@@ -19,14 +19,14 @@ The environment exposes realistic cluster state (nodes, pods, deployments, servi
19
 
20
  ## Quick Start
21
 
22
- The simplest way to use the Coenv environment is through the `CoenvEnv` class:
23
 
24
  ```python
25
- from coenv import CoenvAction, CoenvEnv
26
 
27
  try:
28
  # Create environment from Docker image
29
- coenvenv = CoenvEnv.from_docker_image("coenv-env:latest")
30
 
31
  # Reset with a task
32
  result = coenvenv.reset(task="pod_recovery")
@@ -50,7 +50,7 @@ finally:
50
  coenvenv.close()
51
  ```
52
 
53
- That's it! The `CoenvEnv.from_docker_image()` method handles:
54
  - Starting the Docker container
55
  - Waiting for the server to be ready
56
  - Connecting to the environment
@@ -157,10 +157,10 @@ Reward is task-dependent and based on service health progression:
157
  If you already have a Coenv environment server running, you can connect directly:
158
 
159
  ```python
160
- from coenv import CoenvAction, CoenvEnv
161
 
162
  # Connect to existing server
163
- coenvenv = CoenvEnv(base_url="<ENV_HTTP_URL_HERE>")
164
 
165
  # Use as normal
166
  result = coenvenv.reset(task="incident")
@@ -176,10 +176,10 @@ Note: When connecting to an existing server, `coenvenv.close()` will NOT stop th
176
  The client supports context manager usage for automatic connection management:
177
 
178
  ```python
179
- from coenv import CoenvAction, CoenvEnv
180
 
181
  # Connect with context manager (auto-connects and closes)
182
- with CoenvEnv(base_url="http://localhost:8000") as env:
183
  result = env.reset(task="autoscaling")
184
  print(f"Reset objective: {result.observation.objective}")
185
  # Multiple steps with low latency
@@ -213,11 +213,11 @@ app = create_app(
213
  Then multiple clients can connect simultaneously:
214
 
215
  ```python
216
- from coenv import CoenvAction, CoenvEnv
217
  from concurrent.futures import ThreadPoolExecutor
218
 
219
  def run_episode(client_id: int):
220
- with CoenvEnv(base_url="http://localhost:8000") as env:
221
  result = env.reset(task="pod_recovery")
222
  for i in range(10):
223
  result = env.step(
@@ -265,7 +265,7 @@ coenv/
265
  ├── openenv.yaml # OpenEnv manifest
266
  ├── pyproject.toml # Project metadata and dependencies
267
  ├── uv.lock # Locked dependencies (generated)
268
- ├── client.py # CoenvEnv client
269
  ├── models.py # Action and Observation models
270
  └── server/
271
  ├── __init__.py # Server module exports
 
19
 
20
  ## Quick Start
21
 
22
+ The simplest way to use the Coenv environment is through the `CoEnv` class:
23
 
24
  ```python
25
+ from coenv import CoenvAction, CoEnv
26
 
27
  try:
28
  # Create environment from Docker image
29
+ coenvenv = CoEnv.from_docker_image("coenv-env:latest")
30
 
31
  # Reset with a task
32
  result = coenvenv.reset(task="pod_recovery")
 
50
  coenvenv.close()
51
  ```
52
 
53
+ That's it! The `CoEnv.from_docker_image()` method handles:
54
  - Starting the Docker container
55
  - Waiting for the server to be ready
56
  - Connecting to the environment
 
157
  If you already have a Coenv environment server running, you can connect directly:
158
 
159
  ```python
160
+ from coenv import CoenvAction, CoEnv
161
 
162
  # Connect to existing server
163
+ coenvenv = CoEnv(base_url="<ENV_HTTP_URL_HERE>")
164
 
165
  # Use as normal
166
  result = coenvenv.reset(task="incident")
 
176
  The client supports context manager usage for automatic connection management:
177
 
178
  ```python
179
+ from coenv import CoenvAction, CoEnv
180
 
181
  # Connect with context manager (auto-connects and closes)
182
+ with CoEnv(base_url="http://localhost:8000") as env:
183
  result = env.reset(task="autoscaling")
184
  print(f"Reset objective: {result.observation.objective}")
185
  # Multiple steps with low latency
 
213
  Then multiple clients can connect simultaneously:
214
 
215
  ```python
216
+ from coenv import CoenvAction, CoEnv
217
  from concurrent.futures import ThreadPoolExecutor
218
 
219
  def run_episode(client_id: int):
220
+ with CoEnv(base_url="http://localhost:8000") as env:
221
  result = env.reset(task="pod_recovery")
222
  for i in range(10):
223
  result = env.step(
 
265
  ├── openenv.yaml # OpenEnv manifest
266
  ├── pyproject.toml # Project metadata and dependencies
267
  ├── uv.lock # Locked dependencies (generated)
268
+ ├── client.py # CoEnv client
269
  ├── models.py # Action and Observation models
270
  └── server/
271
  ├── __init__.py # Server module exports
__init__.py CHANGED
@@ -6,11 +6,11 @@
6
 
7
  """Coenv Environment."""
8
 
9
- from .client import CoenvEnv
10
  from .models import CoenvAction, CoenvObservation
11
 
12
  __all__ = [
13
  "CoenvAction",
14
  "CoenvObservation",
15
- "CoenvEnv",
16
  ]
 
6
 
7
  """Coenv Environment."""
8
 
9
+ from .client import CoEnv
10
  from .models import CoenvAction, CoenvObservation
11
 
12
  __all__ = [
13
  "CoenvAction",
14
  "CoenvObservation",
15
+ "CoEnv",
16
  ]
client.py CHANGED
@@ -12,10 +12,10 @@ from openenv.core import EnvClient
12
  from openenv.core.client_types import StepResult
13
  from openenv.core.env_server.types import State
14
 
15
- from .models import CoenvAction, CoenvObservation
16
 
17
 
18
- class CoenvEnv(
19
  EnvClient[CoenvAction, CoenvObservation, State]
20
  ):
21
  """
@@ -27,7 +27,7 @@ class CoenvEnv(
27
 
28
  Example:
29
  >>> # Connect to a running server
30
- >>> with CoenvEnv(base_url="http://localhost:8000") as client:
31
  ... result = client.reset()
32
  ... print(result.observation.echoed_message)
33
  ...
@@ -36,7 +36,7 @@ class CoenvEnv(
36
 
37
  Example with Docker:
38
  >>> # Automatically start container and connect
39
- >>> client = CoenvEnv.from_docker_image("coenv-env:latest")
40
  >>> try:
41
  ... result = client.reset()
42
  ... result = client.step(CoenvAction(message="Test"))
 
12
  from openenv.core.client_types import StepResult
13
  from openenv.core.env_server.types import State
14
 
15
+ from models import CoenvAction, CoenvObservation
16
 
17
 
18
+ class CoEnv(
19
  EnvClient[CoenvAction, CoenvObservation, State]
20
  ):
21
  """
 
27
 
28
  Example:
29
  >>> # Connect to a running server
30
+ >>> with CoEnv(base_url="http://localhost:8000") as client:
31
  ... result = client.reset()
32
  ... print(result.observation.echoed_message)
33
  ...
 
36
 
37
  Example with Docker:
38
  >>> # Automatically start container and connect
39
+ >>> client = CoEnv.from_docker_image("coenv-env:latest")
40
  >>> try:
41
  ... result = client.reset()
42
  ... result = client.step(CoenvAction(message="Test"))
inference.py CHANGED
@@ -1,86 +1,188 @@
1
  """
2
- coenv Inference Script
3
- Used by validators to run episodes with LLMs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
 
6
  import os
7
- import sys
8
- import json
9
- import argparse
10
- import requests
11
- from typing import Dict, Any, Optional
12
-
13
- API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
14
- MODEL_NAME = os.getenv("MODEL_NAME", "Qwen3-30B")
15
- HF_TOKEN = os.getenv("HF_TOKEN")
16
-
17
-
18
- def main():
19
- parser = argparse.ArgumentParser(description='Run coenv inference')
20
- parser.add_argument('--api-base-url', type=str, default=API_BASE_URL, help='Base URL for the coenv API')
21
- parser.add_argument('--model-name', type=str, default=MODEL_NAME, help='Name of the model to use')
22
- parser.add_argument('--hf-token', type=str, default=HF_TOKEN, help='Hugging Face token (if needed)')
23
- parser.add_argument('--task-id', type=str, default='pod_recovery', help='Task ID to run')
24
- parser.add_argument('--max-steps', type=int, default=15, help='Maximum steps per episode')
25
-
26
- args = parser.parse_args()
27
-
28
- api_base_url = args.api_base_url.rstrip('/')
29
- model_name = args.model_name
30
- hf_token = args.hf_token or HF_TOKEN
31
- task_id = args.task_id
32
- max_steps = args.max_steps
33
-
34
- print(f"[START] task={task_id} env=coenv model={model_name}")
35
-
36
- reset_url = f"{api_base_url}/reset"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
- response = requests.post(reset_url, json={"task": task_id})
39
- response.raise_for_status()
40
- observation = response.json()
41
- except Exception as e:
42
- print(f"[ERROR] Failed to reset environment: {e}")
43
- return 1
44
-
45
- total_reward = []
46
-
47
- for step in range(1, max_steps + 1):
48
- action = {
49
- "action_type": "describe",
50
- "resource_type": "deployment",
51
- "name": "frontend"
52
- }
53
- action_str = f"describe('deployment','frontend')"
54
-
55
- step_url = f"{api_base_url}/step"
56
- try:
57
- response = requests.post(step_url, json={"action": action})
58
- response.raise_for_status()
59
- result = response.json()
60
-
61
- reward = result.get('reward', 0.0)
62
- done = result.get('done', False)
63
- info = result.get('info', {})
64
- error_str = "null"
65
-
66
- if 'error' in info and info['error']:
67
- error_str = f"\"{info['error']}\""
68
-
69
- total_reward.append(reward)
70
-
71
- print(f"[STEP] step={step} action={action_str} reward={reward:.2f} done={done} error={error_str}")
72
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if done:
74
- print(f"[END] success={str(done).lower()} steps={step} rewards={total_reward}")
75
- return 0
76
-
 
 
 
 
 
 
77
  except Exception as e:
78
- print(f"[ERROR] Failed to step environment: {e}")
79
- print(f"[STEP] step={step} action={action_str} reward=0.00 done=false error=\"{str(e)}\"")
80
-
81
- print(f"[END] success=false steps={max_steps} rewards={total_reward}")
82
- return 0
83
 
84
 
85
  if __name__ == "__main__":
86
- sys.exit(main())
 
1
  """
2
+ Inference Script Example
3
+ ===================================
4
+ MANDATORY
5
+ - Before submitting, ensure the following variables are defined in your environment configuration:
6
+ API_BASE_URL The API endpoint for the LLM.
7
+ MODEL_NAME The model identifier to use for inference.
8
+ HF_TOKEN Your Hugging Face / API key.
9
+ LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
10
+ method
11
+
12
+ - Defaults are set only for API_BASE_URL and MODEL_NAME
13
+ (and should reflect your active inference setup):
14
+ API_BASE_URL = os.getenv("API_BASE_URL", "<your-active-endpoint>")
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "<your-active-model>")
16
+
17
+ - The inference script must be named `inference.py` and placed in the root directory of the project
18
+ - Participants must use OpenAI Client for all LLM calls using above variables
19
+
20
+ STDOUT FORMAT
21
+ - The script must emit exactly three line types to stdout, in this order:
22
+
23
+ [START] task=<task_name> env=<benchmark> model=<model_name>
24
+ [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
25
+ [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
26
+
27
+ Rules:
28
+ - One [START] line at episode begin.
29
+ - One [STEP] line per step, immediately after env.step() returns.
30
+ - One [END] line after env.close(), always emitted (even on exception).
31
+ - reward and rewards are formatted to 2 decimal places.
32
+ - done and success are lowercase booleans: true or false.
33
+ - error is the raw last_action_error string, or null if none.
34
+ - All fields on a single line with no newlines within a line.
35
+ - Each tasks should return score in [0, 1]
36
+
37
+ Example:
38
+ [START] task=click-test env=miniwob model=Qwen3-VL-30B
39
+ [STEP] step=1 action=click('123') reward=0.00 done=false error=null
40
+ [STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
41
+ [STEP] step=3 action=click('789') reward=1.00 done=true error=null
42
+ [END] success=true steps=3 score=1.00 rewards=0.00,0.00,1.00
43
  """
44
 
45
+ import asyncio
46
  import os
47
+ import textwrap
48
+ from typing import List, Optional
49
+
50
+ from openai import OpenAI
51
+ from models import CoenvAction
52
+ from client import CoEnv
53
+ IMAGE_NAME = os.getenv("IMAGE_NAME")
54
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
55
+
56
+ API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
57
+ MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
58
+ TASK_NAME = os.getenv("MY_ENV_V4_TASK", "echo")
59
+ BENCHMARK = os.getenv("MY_ENV_V4_BENCHMARK", "my_env_v4")
60
+ MAX_STEPS = 8
61
+ TEMPERATURE = 0.7
62
+ MAX_TOKENS = 150
63
+ SUCCESS_SCORE_THRESHOLD = 0.1 # normalized score in [0, 1]
64
+
65
+ # Max possible reward: each token contributes 0.1, across all steps
66
+ _MAX_REWARD_PER_STEP = MAX_TOKENS * 0.1
67
+ MAX_TOTAL_REWARD = MAX_STEPS * _MAX_REWARD_PER_STEP
68
+
69
+ SYSTEM_PROMPT = textwrap.dedent(
70
+ """
71
+ You are interacting with a simple echo environment.
72
+ Each turn you must send a message. The environment will echo it back.
73
+ Reward is proportional to message length: reward = len(message) * 0.1
74
+ Your goal is to maximize total reward by sending meaningful, substantive messages.
75
+ Reply with exactly one message string — no quotes, no prefixes, just the message text.
76
+ """
77
+ ).strip()
78
+
79
+
80
+ def log_start(task: str, env: str, model: str) -> None:
81
+ print(f"[START] task={task} env={env} model={model}", flush=True)
82
+
83
+
84
+ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
85
+ error_val = error if error else "null"
86
+ done_val = str(done).lower()
87
+ print(
88
+ f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
89
+ flush=True,
90
+ )
91
+
92
+
93
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
94
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
95
+ print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
96
+
97
+
98
+ def build_user_prompt(step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
99
+ history_block = "\n".join(history[-4:]) if history else "None"
100
+ return textwrap.dedent(
101
+ f"""
102
+ Step: {step}
103
+ Last echoed message: {last_echoed!r}
104
+ Last reward: {last_reward:.2f}
105
+ Previous steps:
106
+ {history_block}
107
+ Send your next message.
108
+ """
109
+ ).strip()
110
+
111
+
112
+ def get_model_message(client: OpenAI, step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
113
+ user_prompt = build_user_prompt(step, last_echoed, last_reward, history)
114
  try:
115
+ completion = client.chat.completions.create(
116
+ model=MODEL_NAME,
117
+ messages=[
118
+ {"role": "system", "content": SYSTEM_PROMPT},
119
+ {"role": "user", "content": user_prompt},
120
+ ],
121
+ temperature=TEMPERATURE,
122
+ max_tokens=MAX_TOKENS,
123
+ stream=False,
124
+ )
125
+ text = (completion.choices[0].message.content or "").strip()
126
+ return text if text else "hello"
127
+ except Exception as exc:
128
+ print(f"[DEBUG] Model request failed: {exc}", flush=True)
129
+ return "hello"
130
+
131
+
132
+ async def main() -> None:
133
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
134
+
135
+ env = await CoEnv.from_docker_image(IMAGE_NAME)
136
+
137
+ history: List[str] = []
138
+ rewards: List[float] = []
139
+ steps_taken = 0
140
+ score = 0.0
141
+ success = False
142
+
143
+ log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
144
+
145
+ try:
146
+ result = await env.reset() # OpenENV.reset()
147
+ last_echoed = result.observation.echoed_message
148
+ last_reward = 0.0
149
+
150
+ for step in range(1, MAX_STEPS + 1):
151
+ if result.done:
152
+ break
153
+
154
+ message = get_model_message(client, step, last_echoed, last_reward, history)
155
+
156
+ result = await env.step(CoenvAction(message=message))
157
+ obs = result.observation
158
+
159
+ reward = result.reward or 0.0
160
+ done = result.done
161
+ error = None
162
+
163
+ rewards.append(reward)
164
+ steps_taken = step
165
+ last_echoed = obs.echoed_message
166
+ last_reward = reward
167
+
168
+ log_step(step=step, action=message, reward=reward, done=done, error=error)
169
+
170
+ history.append(f"Step {step}: {message!r} -> reward {reward:+.2f}")
171
+
172
  if done:
173
+ break
174
+
175
+ score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
176
+ score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
177
+ success = score >= SUCCESS_SCORE_THRESHOLD
178
+
179
+ finally:
180
+ try:
181
+ await env.close()
182
  except Exception as e:
183
+ print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True)
184
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
 
 
185
 
186
 
187
  if __name__ == "__main__":
188
+ asyncio.run(main())
models.py CHANGED
@@ -11,7 +11,7 @@ These models define the public OpenEnv action/observation schema for the
11
  Kubernetes simulation.
12
  """
13
 
14
- from openenv.core.env_server.types import Action, Observation
15
  from pydantic import Field
16
  from typing import Dict, Any, Optional, Literal, List
17
 
@@ -74,3 +74,9 @@ class CoenvObservation(Observation):
74
  events: List[ClusterEvent] = Field(default_factory=list)
75
  step: int = Field(default=0)
76
  objective: str = Field(default="")
 
 
 
 
 
 
 
11
  Kubernetes simulation.
12
  """
13
 
14
+ from openenv.core.env_server.types import Action, Observation, State
15
  from pydantic import Field
16
  from typing import Dict, Any, Optional, Literal, List
17
 
 
74
  events: List[ClusterEvent] = Field(default_factory=list)
75
  step: int = Field(default=0)
76
  objective: str = Field(default="")
77
+
78
+ class CoenvState(State):
79
+ """State model for the Kubernetes simulator."""
80
+
81
+ episode_id: str = Field(default="")
82
+ step_count: int = Field(default=0)
pre-submission.sh ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # validate-submission.sh — OpenEnv Submission Validator
4
+ #
5
+ # Checks that your HF Space is live, Docker image builds, and openenv validate passes.
6
+ #
7
+ # Prerequisites:
8
+ # - Docker: https://docs.docker.com/get-docker/
9
+ # - openenv-core: pip install openenv-core
10
+ # - curl (usually pre-installed)
11
+ #
12
+ # Run:
13
+ # curl -fsSL https://raw.githubusercontent.com/<owner>/<repo>/main/scripts/validate-submission.sh | bash -s -- <ping_url> [repo_dir]
14
+ #
15
+ # Or download and run locally:
16
+ # chmod +x validate-submission.sh
17
+ # ./validate-submission.sh <ping_url> [repo_dir]
18
+ #
19
+ # Arguments:
20
+ # ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)
21
+ # repo_dir Path to your repo (default: current directory)
22
+ #
23
+ # Examples:
24
+ # ./validate-submission.sh https://my-team.hf.space
25
+ # ./validate-submission.sh https://my-team.hf.space ./my-repo
26
+ #
27
+
28
+ set -uo pipefail
29
+
30
+ DOCKER_BUILD_TIMEOUT=600
31
+ if [ -t 1 ]; then
32
+ RED='\033[0;31m'
33
+ GREEN='\033[0;32m'
34
+ YELLOW='\033[1;33m'
35
+ BOLD='\033[1m'
36
+ NC='\033[0m'
37
+ else
38
+ RED='' GREEN='' YELLOW='' BOLD='' NC=''
39
+ fi
40
+
41
+ run_with_timeout() {
42
+ local secs="$1"; shift
43
+ if command -v timeout &>/dev/null; then
44
+ timeout "$secs" "$@"
45
+ elif command -v gtimeout &>/dev/null; then
46
+ gtimeout "$secs" "$@"
47
+ else
48
+ "$@" &
49
+ local pid=$!
50
+ ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
51
+ local watcher=$!
52
+ wait "$pid" 2>/dev/null
53
+ local rc=$?
54
+ kill "$watcher" 2>/dev/null
55
+ wait "$watcher" 2>/dev/null
56
+ return $rc
57
+ fi
58
+ }
59
+
60
+ portable_mktemp() {
61
+ local prefix="${1:-validate}"
62
+ mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
63
+ }
64
+
65
+ CLEANUP_FILES=()
66
+ cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
67
+ trap cleanup EXIT
68
+
69
+ PING_URL="${1:-}"
70
+ REPO_DIR="${2:-.}"
71
+
72
+ if [ -z "$PING_URL" ]; then
73
+ printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
74
+ printf "\n"
75
+ printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
76
+ printf " repo_dir Path to your repo (default: current directory)\n"
77
+ exit 1
78
+ fi
79
+
80
+ if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
81
+ printf "Error: directory '%s' not found\n" "${2:-.}"
82
+ exit 1
83
+ fi
84
+ PING_URL="${PING_URL%/}"
85
+ export PING_URL
86
+ PASS=0
87
+
88
+ log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
89
+ pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
90
+ fail() { log "${RED}FAILED${NC} -- $1"; }
91
+ hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; }
92
+ stop_at() {
93
+ printf "\n"
94
+ printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
95
+ exit 1
96
+ }
97
+
98
+ printf "\n"
99
+ printf "${BOLD}========================================${NC}\n"
100
+ printf "${BOLD} OpenEnv Submission Validator${NC}\n"
101
+ printf "${BOLD}========================================${NC}\n"
102
+ log "Repo: $REPO_DIR"
103
+ log "Ping URL: $PING_URL"
104
+ printf "\n"
105
+
106
+ log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
107
+
108
+ CURL_OUTPUT=$(portable_mktemp "validate-curl")
109
+ CLEANUP_FILES+=("$CURL_OUTPUT")
110
+ HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
111
+ -H "Content-Type: application/json" -d '{}' \
112
+ "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
113
+
114
+ if [ "$HTTP_CODE" = "200" ]; then
115
+ pass "HF Space is live and responds to /reset"
116
+ elif [ "$HTTP_CODE" = "000" ]; then
117
+ fail "HF Space not reachable (connection failed or timed out)"
118
+ hint "Check your network connection and that the Space is running."
119
+ hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
120
+ stop_at "Step 1"
121
+ else
122
+ fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
123
+ hint "Make sure your Space is running and the URL is correct."
124
+ hint "Try opening $PING_URL in your browser first."
125
+ stop_at "Step 1"
126
+ fi
127
+
128
+ log "${BOLD}Step 2/3: Running docker build${NC} ..."
129
+
130
+ if ! command -v docker &>/dev/null; then
131
+ fail "docker command not found"
132
+ hint "Install Docker: https://docs.docker.com/get-docker/"
133
+ stop_at "Step 2"
134
+ fi
135
+
136
+ if [ -f "$REPO_DIR/Dockerfile" ]; then
137
+ DOCKER_CONTEXT="$REPO_DIR"
138
+ elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
139
+ DOCKER_CONTEXT="$REPO_DIR/server"
140
+ else
141
+ fail "No Dockerfile found in repo root or server/ directory"
142
+ stop_at "Step 2"
143
+ fi
144
+
145
+ log " Found Dockerfile in $DOCKER_CONTEXT"
146
+
147
+ BUILD_OK=false
148
+ BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
149
+
150
+ if [ "$BUILD_OK" = true ]; then
151
+ pass "Docker build succeeded"
152
+ else
153
+ fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
154
+ printf "%s\n" "$BUILD_OUTPUT" | tail -20
155
+ stop_at "Step 2"
156
+ fi
157
+
158
+ log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
159
+
160
+ if ! command -v openenv &>/dev/null; then
161
+ fail "openenv command not found"
162
+ hint "Install it: pip install openenv-core"
163
+ stop_at "Step 3"
164
+ fi
165
+
166
+ VALIDATE_OK=false
167
+ VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
168
+
169
+ if [ "$VALIDATE_OK" = true ]; then
170
+ pass "openenv validate passed"
171
+ [ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT"
172
+ else
173
+ fail "openenv validate failed"
174
+ printf "%s\n" "$VALIDATE_OUTPUT"
175
+ stop_at "Step 3"
176
+ fi
177
+
178
+ printf "\n"
179
+ printf "${BOLD}========================================${NC}\n"
180
+ printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
181
+ printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
182
+ printf "${BOLD}========================================${NC}\n"
183
+ printf "\n"
184
+
185
+ exit 0
server/Dockerfile CHANGED
@@ -17,19 +17,33 @@ COPY . /app/env
17
 
18
  WORKDIR /app/env
19
 
20
- # Install dependencies with uv (locked when uv.lock is available).
 
 
21
  RUN --mount=type=cache,target=/root/.cache/uv \
22
- if [ -f uv.lock ]; then \
23
- uv sync --frozen --no-install-project --no-editable; \
 
 
 
 
 
 
 
24
  else \
25
- uv sync --no-install-project --no-editable; \
 
26
  fi
27
 
28
  RUN --mount=type=cache,target=/root/.cache/uv \
29
- if [ -f uv.lock ]; then \
30
- uv sync --frozen --no-editable; \
 
 
 
 
31
  else \
32
- uv sync --no-editable; \
33
  fi
34
 
35
  FROM ${BASE_IMAGE}
 
17
 
18
  WORKDIR /app/env
19
 
20
+ # Install dependencies with uv.
21
+ # Use pyproject/uv.lock when present, otherwise fall back to requirements.txt
22
+ # because submission validators often build from server/ as context.
23
  RUN --mount=type=cache,target=/root/.cache/uv \
24
+ if [ -f pyproject.toml ]; then \
25
+ if [ -f uv.lock ]; then \
26
+ uv sync --frozen --no-install-project --no-editable; \
27
+ else \
28
+ uv sync --no-install-project --no-editable; \
29
+ fi; \
30
+ elif [ -f requirements.txt ]; then \
31
+ uv venv .venv; \
32
+ uv pip install --python .venv/bin/python -r requirements.txt; \
33
  else \
34
+ echo "No pyproject.toml or requirements.txt found in build context" >&2; \
35
+ exit 2; \
36
  fi
37
 
38
  RUN --mount=type=cache,target=/root/.cache/uv \
39
+ if [ -f pyproject.toml ]; then \
40
+ if [ -f uv.lock ]; then \
41
+ uv sync --frozen --no-editable; \
42
+ else \
43
+ uv sync --no-editable; \
44
+ fi; \
45
  else \
46
+ true; \
47
  fi
48
 
49
  FROM ${BASE_IMAGE}
server/simulation_service.py CHANGED
@@ -16,9 +16,9 @@ except ImportError:
16
  from coenv_environment import World
17
 
18
  try:
19
- from ..models import CoenvAction, CoenvObservation
20
  except ImportError:
21
- from models import CoenvAction, CoenvObservation
22
 
23
 
24
  def load_config() -> Dict[str, Any]:
@@ -147,6 +147,7 @@ class CoenvEnvironment(Environment):
147
 
148
  def __init__(self):
149
  self.config: Dict[str, Any] = load_config()
 
150
  self.world = World(self.config, seed=self.config.get("seed"))
151
  self.current_task = "pod_recovery"
152
  self.current_objective = get_objective_for_task(self.current_task)
@@ -225,14 +226,16 @@ class CoenvEnvironment(Environment):
225
  done = True
226
 
227
  return self._observation(done=done, reward=reward, info=info)
228
-
229
- def state(self, **_: Any) -> Dict[str, Any]:
230
- """Return lightweight environment state metadata."""
231
- return {
232
- "step": self.world.step_count,
233
- "task": self.current_task,
234
- "objective": self.current_objective,
235
- }
 
 
236
 
237
  def _observation(self, done: bool, reward: float, info: Dict[str, Any]) -> CoenvObservation:
238
  obs = self.world.get_observation(self.current_objective)
 
16
  from coenv_environment import World
17
 
18
  try:
19
+ from ..models import CoenvAction, CoenvObservation, CoenvState
20
  except ImportError:
21
+ from models import CoenvAction, CoenvObservation, CoenvState
22
 
23
 
24
  def load_config() -> Dict[str, Any]:
 
147
 
148
  def __init__(self):
149
  self.config: Dict[str, Any] = load_config()
150
+ self.episode_id = f"episode-{os.getpid()}-{int(os.times()[4] * 1000)}"
151
  self.world = World(self.config, seed=self.config.get("seed"))
152
  self.current_task = "pod_recovery"
153
  self.current_objective = get_objective_for_task(self.current_task)
 
226
  done = True
227
 
228
  return self._observation(done=done, reward=reward, info=info)
229
+
230
+ @property
231
+ def state(self) -> CoenvState:
232
+ """Return current observation without applying an action."""
233
+ reward = calculate_reward(self.world, self.current_task)
234
+ done = check_task_complete(self.world, self.current_task)
235
+ return CoenvState(
236
+ episode_id=self.episode_id,
237
+ step_count=self.world.step_count
238
+ )
239
 
240
  def _observation(self, done: bool, reward: float, info: Dict[str, Any]) -> CoenvObservation:
241
  obs = self.world.get_observation(self.current_objective)
tests/test_server.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..client import CoEnv
2
+ from ..models import CoenvAction
3
+ import pytest
4
+
5
+ @pytest.mark.asyncio
6
+ async def test_client_step_and_state():
7
+ async with CoEnv(base_url="http://localhost:8000") as client:
8
+ # Test reset and initial state
9
+ reset_result = await client.reset()
10
+ assert hasattr(reset_result.observation, "step")
11
+ assert hasattr(reset_result.observation, "done")
12
+ assert reset_result.observation.step == 0
13
+ assert reset_result.done is False
14
+
15
+ # Test step with a sample action
16
+ action = CoenvAction(action_type="describe", resource_type="pod", name="test-pod")
17
+ step_result = await client.step(action)
18
+ assert hasattr(step_result.observation, "step")
19
+ assert step_result.observation.step == 1
20
+
21
+ # Test state retrieval
22
+
23
+ step_result = await client.step(action)
24
+
25
+ state_attr = getattr(client, "state")
26
+ state = await state_attr() if callable(state_attr) else state_attr
27
+ if isinstance(state, dict):
28
+ step_count = state.get("step_count", state.get("step", -1))
29
+ else:
30
+ step_count = getattr(state, "step_count", getattr(state, "step", -1))
31
+ assert step_count == 2