Revanth-ml commited on
Commit
2709f05
·
verified ·
1 Parent(s): ccd9f86

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. inference.py +134 -195
  2. validate_submission.sh +158 -0
inference.py CHANGED
@@ -2,14 +2,12 @@
2
  """
3
  AgentOps Gym — Baseline inference script.
4
 
5
- Connects to the environment server via HTTP (no WebSocket client needed).
6
- The validator sets IMAGE_NAME and starts the container; this script connects
7
- to it on the expected port using plain HTTP requests + OpenAI client.
8
 
9
  Environment variables:
10
  IMAGE_NAME Docker image name (set by validator)
11
- HF_TOKEN HuggingFace API key (or OPENAI_API_KEY)
12
- OPENAI_API_KEY OpenAI API key
13
  API_BASE_URL LLM endpoint (default: https://router.huggingface.co/v1)
14
  MODEL_NAME Model name (default: Qwen/Qwen2.5-72B-Instruct)
15
  ENV_BASE_URL Server URL (default: http://localhost:8000)
@@ -24,7 +22,6 @@ import json
24
  import os
25
  import re
26
  import sys
27
- import time
28
  from typing import Dict, List, Optional
29
 
30
  # Load .env if present
@@ -34,27 +31,38 @@ try:
34
  except ImportError:
35
  pass
36
 
37
- import requests
38
  from openai import OpenAI
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  # ---------------------------------------------------------------------------
41
  # Configuration
42
  # ---------------------------------------------------------------------------
43
 
44
  IMAGE_NAME = os.getenv("IMAGE_NAME")
45
  API_KEY = (
46
- os.getenv("HF_TOKEN") or
47
- os.getenv("OPENAI_API_KEY")
48
  or os.getenv("API_KEY")
49
  )
50
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
51
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
52
- BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
53
 
54
- BENCHMARK = "agentops-gym"
55
- MAX_STEPS = 10
56
- TEMPERATURE = 0.3
57
- MAX_TOKENS = 600
 
58
 
59
  ALL_TASKS = ["task_1", "task_2", "task_3", "task_4"]
60
 
@@ -85,7 +93,7 @@ Example:
85
  """
86
 
87
  # ---------------------------------------------------------------------------
88
- # Stdout log helpers (mandatory OpenEnv format)
89
  # ---------------------------------------------------------------------------
90
 
91
  def log_start(task: str, env: str, model: str) -> None:
@@ -95,121 +103,42 @@ def log_start(task: str, env: str, model: str) -> None:
95
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
96
  err_val = error if error else "null"
97
  print(
98
- f"[STEP] step={step} action={str(action).replace(chr(10),' ')[:200]} "
99
  f"reward={reward:.2f} done={str(done).lower()} error={err_val}",
100
  flush=True,
101
  )
102
 
103
 
104
- def log_end(success: bool, steps: int, rewards: List[float]) -> None:
 
105
  print(
106
  f"[END] success={str(success).lower()} steps={steps} "
107
- f"rewards={','.join(f'{r:.2f}' for r in rewards)}",
108
  flush=True,
109
  )
110
 
111
  # ---------------------------------------------------------------------------
112
- # HTTP helpers
113
  # ---------------------------------------------------------------------------
114
 
115
- def wait_for_server(base_url: str, retries: int = 20, delay: float = 3.0) -> bool:
116
- """Poll /health until the server is ready."""
117
- for i in range(retries):
118
- try:
119
- r = requests.get(f"{base_url}/health", timeout=5)
120
- if r.status_code == 200:
121
- print(f"[DEBUG] Server ready at {base_url}", flush=True)
122
- return True
123
- except Exception:
124
- pass
125
- print(f"[DEBUG] Waiting for server... ({i+1}/{retries})", flush=True)
126
- time.sleep(delay)
127
- return False
128
-
129
-
130
- def http_reset(base_url: str, task_id: str) -> Dict:
131
- r = requests.post(f"{base_url}/reset", json={"task_id": task_id}, timeout=30)
132
- r.raise_for_status()
133
- return r.json()
134
-
135
-
136
- def http_step(base_url: str, tool: str, parameters: Dict, reasoning: str = "") -> Dict:
137
- body = {"action": {"tool": tool, "parameters": parameters, "reasoning": reasoning}}
138
- r = requests.post(f"{base_url}/step", json=body, timeout=30)
139
- r.raise_for_status()
140
- return r.json()
141
-
142
-
143
- def http_grader(base_url: str) -> Dict:
144
- try:
145
- r = requests.get(f"{base_url}/grader", timeout=10)
146
- if r.status_code == 200:
147
- return r.json()
148
- except Exception:
149
- pass
150
- return {}
151
-
152
- # ---------------------------------------------------------------------------
153
- # Docker helpers (start container if IMAGE_NAME is set)
154
- # ---------------------------------------------------------------------------
155
-
156
- def start_container(image_name: str, port: int = 8000) -> Optional[str]:
157
- """Start the Docker container and return the container ID."""
158
- import subprocess
159
- try:
160
- result = subprocess.run(
161
- ["docker", "run", "-d", "--rm", "-p", f"{port}:{port}", image_name],
162
- capture_output=True, text=True, timeout=120,
163
- )
164
- if result.returncode == 0:
165
- cid = result.stdout.strip()
166
- print(f"[DEBUG] Container started: {cid[:12]}", flush=True)
167
- return cid
168
- else:
169
- print(f"[DEBUG] docker run failed: {result.stderr.strip()}", flush=True)
170
- except Exception as e:
171
- print(f"[DEBUG] Could not start container: {e}", flush=True)
172
- return None
173
-
174
-
175
- def stop_container(container_id: str) -> None:
176
- """Stop the Docker container, ignoring timeouts."""
177
- import subprocess
178
- try:
179
- subprocess.run(
180
- ["docker", "stop", "--time", "5", container_id],
181
- timeout=15, capture_output=True,
182
- )
183
- print(f"[DEBUG] Container stopped: {container_id[:12]}", flush=True)
184
- except Exception as e:
185
- print(f"[DEBUG] Container stop skipped: {e}", flush=True)
186
-
187
- # ---------------------------------------------------------------------------
188
- # Prompt builder
189
- # ---------------------------------------------------------------------------
190
-
191
- def build_prompt(obs: Dict) -> str:
192
- parts = [f"TASK: {obs.get('task_description', '')}"]
193
- parts.append(f"\nVisible files: {obs.get('visible_files', [])}")
194
- last = obs.get("last_tool_result")
195
  if last:
196
  parts.append(f"\nLast tool result:\n{str(last)[:1500]}")
197
- history = obs.get("action_history", [])
198
  if history:
199
  parts.append(f"\nHistory (last 3): {history[-3:]}")
200
- if obs.get("message"):
201
- parts.append(f"\nEnv message: {obs['message']}")
202
- meta = obs.get("metadata", {})
203
  parts.append(
204
- f"\nStep {obs.get('step_count', 0)}, "
205
  f"steps remaining: {meta.get('steps_remaining', '?')}"
206
  )
207
  parts.append("\nRespond with a single JSON tool call:")
208
  return "\n".join(parts)
209
 
210
- # ---------------------------------------------------------------------------
211
- # JSON extraction
212
- # ---------------------------------------------------------------------------
213
 
214
  def extract_tool_call(text: str) -> Optional[Dict]:
215
  text = text.strip()
@@ -235,89 +164,120 @@ def extract_tool_call(text: str) -> Optional[Dict]:
235
  pass
236
  return None
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # ---------------------------------------------------------------------------
239
- # Episode runner
240
  # ---------------------------------------------------------------------------
241
 
242
- def run_episode(base_url: str, client: OpenAI, task_id: str) -> Dict:
243
- log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
244
 
 
 
 
 
 
 
 
245
  rewards: List[float] = []
246
  steps_taken = 0
247
  score = 0.0
248
  success = False
 
249
 
250
- try:
251
- reset_resp = http_reset(base_url, task_id)
252
- obs = reset_resp.get("observation", {})
253
- done = reset_resp.get("done", False)
254
-
255
- for step in range(1, MAX_STEPS + 1):
256
- if done or obs.get("done", False):
257
- break
258
 
259
- # Ask LLM
260
- try:
261
- completion = client.chat.completions.create(
262
- model=MODEL_NAME,
263
- messages=[
264
- {"role": "system", "content": SYSTEM_PROMPT},
265
- {"role": "user", "content": build_prompt(obs)},
266
- ],
267
- max_tokens=MAX_TOKENS,
268
- temperature=TEMPERATURE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  )
270
- raw = (completion.choices[0].message.content or "").strip()
271
- except Exception as e:
272
- log_step(step=step, action="(llm_error)", reward=0.0, done=True, error=str(e))
273
- break
274
 
275
- tool_call = extract_tool_call(raw) or {
276
- "tool": "Grep",
277
- "parameters": {"pattern": "def "},
278
- "reasoning": "fallback",
279
- }
280
-
281
- tool = tool_call.get("tool", "Grep")
282
- params = tool_call.get("parameters", {})
283
- reasoning = tool_call.get("reasoning", "")
284
- action_str = f"{tool}({json.dumps(params)})"
285
-
286
- try:
287
- step_resp = http_step(base_url, tool, params, reasoning)
288
- except requests.HTTPError as e:
289
- log_step(step=step, action=action_str, reward=0.0, done=True, error=str(e))
290
- break
291
 
292
- obs = step_resp.get("observation", {})
293
- reward = float(step_resp.get("reward") or 0.0)
294
- done = bool(step_resp.get("done", False))
295
 
296
- rewards.append(reward)
297
- steps_taken = step
298
- log_step(step=step, action=action_str, reward=reward, done=done, error=None)
299
 
300
- if done:
301
- break
302
 
303
- # Score from grader endpoint (set when episode ends)
304
- grader = http_grader(base_url)
305
- raw_score = float(grader.get("score") or 0.0)
306
- # Fallback: use cumulative reward from last obs metadata
307
- if raw_score == 0.0:
308
- raw_score = float(obs.get("metadata", {}).get("grader_score") or 0.0)
309
-
310
- # Clamp score to be strictly between 0 and 1
311
- score = max(0.001, min(0.999, raw_score))
312
- success = score >= 0.5
313
 
314
  except Exception as e:
315
- print(f"[DEBUG] Episode error for {task_id}: {e}", flush=True)
316
- # Clamp score to be strictly between 0 and 1
317
- score = 0.001
318
 
319
  finally:
320
- log_end(success=success, steps=steps_taken, rewards=rewards)
321
 
322
  return {
323
  "task_id": task_id,
@@ -336,39 +296,18 @@ def main() -> None:
336
  print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
337
  sys.exit(1)
338
 
339
- container_id = None
340
-
341
- # If IMAGE_NAME is set, start the container ourselves
342
- if IMAGE_NAME:
343
- container_id = start_container(IMAGE_NAME, port=8000)
344
- if container_id is None:
345
- print("[DEBUG] Could not start container — assuming server already running.", flush=True)
346
-
347
- # Wait for server to be ready
348
- if not wait_for_server(BASE_URL, retries=40, delay=3.0):
349
- print("ERROR: Server never became ready.", file=sys.stderr)
350
- if container_id:
351
- stop_container(container_id)
352
- sys.exit(1)
353
-
354
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
355
 
356
  print("=" * 60, flush=True)
357
  print("AgentOps Gym — Baseline Inference", flush=True)
358
- print(f"Model: {MODEL_NAME} | Server: {BASE_URL}", flush=True)
359
  print("=" * 60, flush=True)
360
 
361
  results = []
362
- try:
363
- for task_id in ALL_TASKS:
364
- print("─" * 40, flush=True)
365
- results.append(run_episode(BASE_URL, client, task_id))
366
- finally:
367
- # Always stop container we started, even if something crashed
368
- if container_id:
369
- stop_container(container_id)
370
 
371
- # Summary
372
  total = sum(r["score"] for r in results)
373
  solved = sum(1 for r in results if r["success"])
374
  avg = total / len(results) if results else 0.0
 
2
  """
3
  AgentOps Gym — Baseline inference script.
4
 
5
+ Uses the synchronous OpenEnv client pattern (env.sync()) matching the
6
+ hackathon sample inference.py. No async/await needed.
 
7
 
8
  Environment variables:
9
  IMAGE_NAME Docker image name (set by validator)
10
+ HF_TOKEN HuggingFace / API key (or OPENAI_API_KEY)
 
11
  API_BASE_URL LLM endpoint (default: https://router.huggingface.co/v1)
12
  MODEL_NAME Model name (default: Qwen/Qwen2.5-72B-Instruct)
13
  ENV_BASE_URL Server URL (default: http://localhost:8000)
 
22
  import os
23
  import re
24
  import sys
 
25
  from typing import Dict, List, Optional
26
 
27
  # Load .env if present
 
31
  except ImportError:
32
  pass
33
 
 
34
  from openai import OpenAI
35
 
36
+ # Ensure package is importable when run from inside the package dir
37
+ import pathlib, sys as _sys
38
+ _root = pathlib.Path(__file__).resolve().parent
39
+ _parent = _root.parent
40
+ for _p in (_root, _parent):
41
+ if str(_p) not in _sys.path:
42
+ _sys.path.insert(0, str(_p))
43
+
44
+ from agentops_gym.client import AgentOpsEnv
45
+ from agentops_gym.models import ToolCall
46
+
47
  # ---------------------------------------------------------------------------
48
  # Configuration
49
  # ---------------------------------------------------------------------------
50
 
51
  IMAGE_NAME = os.getenv("IMAGE_NAME")
52
  API_KEY = (
53
+ os.getenv("HF_TOKEN")
54
+ or os.getenv("OPENAI_API_KEY")
55
  or os.getenv("API_KEY")
56
  )
57
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
58
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
59
+ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
60
 
61
+ BENCHMARK = "agentops-gym"
62
+ MAX_STEPS = 10
63
+ TEMPERATURE = 0.5
64
+ MAX_TOKENS = 1024
65
+ SUCCESS_SCORE_THRESHOLD = 0.5
66
 
67
  ALL_TASKS = ["task_1", "task_2", "task_3", "task_4"]
68
 
 
93
  """
94
 
95
  # ---------------------------------------------------------------------------
96
+ # Stdout log helpers must match spec exactly
97
  # ---------------------------------------------------------------------------
98
 
99
  def log_start(task: str, env: str, model: str) -> None:
 
103
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
104
  err_val = error if error else "null"
105
  print(
106
+ f"[STEP] step={step} action={str(action).replace(chr(10), ' ')[:200]} "
107
  f"reward={reward:.2f} done={str(done).lower()} error={err_val}",
108
  flush=True,
109
  )
110
 
111
 
112
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
113
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
114
  print(
115
  f"[END] success={str(success).lower()} steps={steps} "
116
+ f"score={score:.3f} rewards={rewards_str}",
117
  flush=True,
118
  )
119
 
120
  # ---------------------------------------------------------------------------
121
+ # Helpers
122
  # ---------------------------------------------------------------------------
123
 
124
+ def build_prompt(obs_data: Dict, history: List[str]) -> str:
125
+ parts = [f"TASK: {obs_data.get('task_description', '')}"]
126
+ parts.append(f"\nVisible files: {obs_data.get('visible_files', [])}")
127
+ last = obs_data.get("last_tool_result")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  if last:
129
  parts.append(f"\nLast tool result:\n{str(last)[:1500]}")
 
130
  if history:
131
  parts.append(f"\nHistory (last 3): {history[-3:]}")
132
+ if obs_data.get("message"):
133
+ parts.append(f"\nEnv message: {obs_data['message']}")
134
+ meta = obs_data.get("metadata", {})
135
  parts.append(
136
+ f"\nStep {obs_data.get('step_count', 0)}, "
137
  f"steps remaining: {meta.get('steps_remaining', '?')}"
138
  )
139
  parts.append("\nRespond with a single JSON tool call:")
140
  return "\n".join(parts)
141
 
 
 
 
142
 
143
  def extract_tool_call(text: str) -> Optional[Dict]:
144
  text = text.strip()
 
164
  pass
165
  return None
166
 
167
+
168
+ def get_model_action(client: OpenAI, obs_data: Dict, history: List[str]) -> Optional[Dict]:
169
+ """Ask the LLM for a tool call. Returns parsed dict or None."""
170
+ try:
171
+ completion = client.chat.completions.create(
172
+ model=MODEL_NAME,
173
+ messages=[
174
+ {"role": "system", "content": SYSTEM_PROMPT},
175
+ {"role": "user", "content": build_prompt(obs_data, history)},
176
+ ],
177
+ max_tokens=MAX_TOKENS,
178
+ temperature=TEMPERATURE,
179
+ )
180
+ raw = (completion.choices[0].message.content or "").strip()
181
+ return extract_tool_call(raw)
182
+ except Exception as e:
183
+ print(f"[DEBUG] LLM error: {e}", flush=True)
184
+ return None
185
+
186
  # ---------------------------------------------------------------------------
187
+ # Single task runner — sync pattern matching sample inference.py
188
  # ---------------------------------------------------------------------------
189
 
190
+ def run_task(client: OpenAI, task_id: str) -> Dict:
191
+ """Run one episode synchronously. Returns result dict."""
192
 
193
+ # Build client — use docker image if set, else connect to running server
194
+ if IMAGE_NAME:
195
+ env_client = AgentOpsEnv.from_docker_image(IMAGE_NAME)
196
+ else:
197
+ env_client = AgentOpsEnv(base_url=ENV_BASE_URL)
198
+
199
+ history: List[str] = []
200
  rewards: List[float] = []
201
  steps_taken = 0
202
  score = 0.0
203
  success = False
204
+ last_error: Optional[str] = None
205
 
206
+ log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
 
 
 
 
 
 
 
207
 
208
+ try:
209
+ # Use .sync() context manager — same pattern as sample inference.py
210
+ with env_client.sync() as env:
211
+ if IMAGE_NAME:
212
+ result = env.reset()
213
+ else:
214
+ result = env.reset(task_id=task_id)
215
+
216
+ obs_data = (
217
+ result.observation.model_dump()
218
+ if hasattr(result.observation, "model_dump")
219
+ else result.observation.dict()
220
+ )
221
+
222
+ for step in range(1, MAX_STEPS + 1):
223
+ if result.done or obs_data.get("done", False):
224
+ break
225
+
226
+ tool_call = get_model_action(client, obs_data, history)
227
+ if tool_call is None:
228
+ tool_call = {
229
+ "tool": "Grep",
230
+ "parameters": {"pattern": "def "},
231
+ "reasoning": "fallback",
232
+ }
233
+
234
+ tool = tool_call.get("tool", "Grep")
235
+ params = tool_call.get("parameters", {})
236
+ reasoning = tool_call.get("reasoning", "")
237
+ action_str = f"{tool}({json.dumps(params)})"
238
+
239
+ try:
240
+ result = env.step(
241
+ ToolCall(tool=tool, parameters=params, reasoning=reasoning)
242
+ )
243
+ last_error = None
244
+ except Exception as e:
245
+ last_error = str(e)
246
+ log_step(step=step, action=action_str, reward=0.0, done=True, error=last_error)
247
+ break
248
+
249
+ obs_data = (
250
+ result.observation.model_dump()
251
+ if hasattr(result.observation, "model_dump")
252
+ else result.observation.dict()
253
  )
 
 
 
 
254
 
255
+ reward = float(result.reward or 0.0)
256
+ done = bool(result.done)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
+ rewards.append(reward)
259
+ steps_taken = step
260
+ history.append(f"Step {step}: {action_str} → reward {reward:.2f}")
261
 
262
+ log_step(step=step, action=action_str, reward=reward, done=done, error=None)
 
 
263
 
264
+ if done:
265
+ break
266
 
267
+ # Pull grader score from last observation metadata
268
+ meta = obs_data.get("metadata", {})
269
+ score = float(meta.get("grader_score") or 0.0)
270
+ if score == 0.0 and rewards:
271
+ score = float(meta.get("cumulative_reward") or 0.0)
272
+ score = max(0.0, min(score, 1.0))
273
+ success = score >= SUCCESS_SCORE_THRESHOLD
 
 
 
274
 
275
  except Exception as e:
276
+ print(f"[DEBUG] Task {task_id} error: {e}", flush=True)
277
+ last_error = str(e)
 
278
 
279
  finally:
280
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
281
 
282
  return {
283
  "task_id": task_id,
 
296
  print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
297
  sys.exit(1)
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
300
 
301
  print("=" * 60, flush=True)
302
  print("AgentOps Gym — Baseline Inference", flush=True)
303
+ print(f"Model: {MODEL_NAME} | Image: {IMAGE_NAME or ENV_BASE_URL}", flush=True)
304
  print("=" * 60, flush=True)
305
 
306
  results = []
307
+ for task_id in ALL_TASKS:
308
+ print("─" * 40, flush=True)
309
+ results.append(run_task(client, task_id))
 
 
 
 
 
310
 
 
311
  total = sum(r["score"] for r in results)
312
  solved = sum(1 for r in results if r["success"])
313
  avg = total / len(results) if results else 0.0
validate_submission.sh ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set -uo pipefail
2
+
3
+ DOCKER_BUILD_TIMEOUT=600
4
+ if [ -t 1 ]; then
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[1;33m'
8
+ BOLD='\033[1m'
9
+ NC='\033[0m'
10
+ else
11
+ RED='' GREEN='' YELLOW='' BOLD='' NC=''
12
+ fi
13
+
14
+ run_with_timeout() {
15
+ local secs="$1"; shift
16
+ if command -v timeout &>/dev/null; then
17
+ timeout "$secs" "$@"
18
+ elif command -v gtimeout &>/dev/null; then
19
+ gtimeout "$secs" "$@"
20
+ else
21
+ "$@" &
22
+ local pid=$!
23
+ ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
24
+ local watcher=$!
25
+ wait "$pid" 2>/dev/null
26
+ local rc=$?
27
+ kill "$watcher" 2>/dev/null
28
+ wait "$watcher" 2>/dev/null
29
+ return $rc
30
+ fi
31
+ }
32
+
33
+ portable_mktemp() {
34
+ local prefix="${1:-validate}"
35
+ mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
36
+ }
37
+
38
+ CLEANUP_FILES=()
39
+ cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
40
+ trap cleanup EXIT
41
+
42
+ PING_URL="${1:-}"
43
+ REPO_DIR="${2:-.}"
44
+
45
+ if [ -z "$PING_URL" ]; then
46
+ printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
47
+ printf "\n"
48
+ printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
49
+ printf " repo_dir Path to your repo (default: current directory)\n"
50
+ exit 1
51
+ fi
52
+
53
+ if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
54
+ printf "Error: directory '%s' not found\n" "${2:-.}"
55
+ exit 1
56
+ fi
57
+ PING_URL="${PING_URL%/}"
58
+ export PING_URL
59
+ PASS=0
60
+
61
+ log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
62
+ pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
63
+ fail() { log "${RED}FAILED${NC} -- $1"; }
64
+ hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; }
65
+ stop_at() {
66
+ printf "\n"
67
+ printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
68
+ exit 1
69
+ }
70
+
71
+ printf "\n"
72
+ printf "${BOLD}========================================${NC}\n"
73
+ printf "${BOLD} OpenEnv Submission Validator${NC}\n"
74
+ printf "${BOLD}========================================${NC}\n"
75
+ log "Repo: $REPO_DIR"
76
+ log "Ping URL: $PING_URL"
77
+ printf "\n"
78
+
79
+ log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
80
+
81
+ CURL_OUTPUT=$(portable_mktemp "validate-curl")
82
+ CLEANUP_FILES+=("$CURL_OUTPUT")
83
+ HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
84
+ -H "Content-Type: application/json" -d '{}' \
85
+ "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
86
+
87
+ if [ "$HTTP_CODE" = "200" ]; then
88
+ pass "HF Space is live and responds to /reset"
89
+ elif [ "$HTTP_CODE" = "000" ]; then
90
+ fail "HF Space not reachable (connection failed or timed out)"
91
+ hint "Check your network connection and that the Space is running."
92
+ hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
93
+ stop_at "Step 1"
94
+ else
95
+ fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
96
+ hint "Make sure your Space is running and the URL is correct."
97
+ hint "Try opening $PING_URL in your browser first."
98
+ stop_at "Step 1"
99
+ fi
100
+
101
+ log "${BOLD}Step 2/3: Running docker build${NC} ..."
102
+
103
+ if ! command -v docker &>/dev/null; then
104
+ fail "docker command not found"
105
+ hint "Install Docker: https://docs.docker.com/get-docker/"
106
+ stop_at "Step 2"
107
+ fi
108
+
109
+ if [ -f "$REPO_DIR/Dockerfile" ]; then
110
+ DOCKER_CONTEXT="$REPO_DIR"
111
+ elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
112
+ DOCKER_CONTEXT="$REPO_DIR/server"
113
+ else
114
+ fail "No Dockerfile found in repo root or server/ directory"
115
+ stop_at "Step 2"
116
+ fi
117
+
118
+ log " Found Dockerfile in $DOCKER_CONTEXT"
119
+
120
+ BUILD_OK=false
121
+ BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
122
+
123
+ if [ "$BUILD_OK" = true ]; then
124
+ pass "Docker build succeeded"
125
+ else
126
+ fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
127
+ printf "%s\n" "$BUILD_OUTPUT" | tail -20
128
+ stop_at "Step 2"
129
+ fi
130
+
131
+ log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
132
+
133
+ if ! command -v openenv &>/dev/null; then
134
+ fail "openenv command not found"
135
+ hint "Install it: pip install openenv-core"
136
+ stop_at "Step 3"
137
+ fi
138
+
139
+ VALIDATE_OK=false
140
+ VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
141
+
142
+ if [ "$VALIDATE_OK" = true ]; then
143
+ pass "openenv validate passed"
144
+ [ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT"
145
+ else
146
+ fail "openenv validate failed"
147
+ printf "%s\n" "$VALIDATE_OUTPUT"
148
+ stop_at "Step 3"
149
+ fi
150
+
151
+ printf "\n"
152
+ printf "${BOLD}========================================${NC}\n"
153
+ printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
154
+ printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
155
+ printf "${BOLD}========================================${NC}\n"
156
+ printf "\n"
157
+
158
+ exit 0