swappy commited on
Commit
c1d3003
·
1 Parent(s): ed78dc2

fix: update QED_MATH_URL secret reference

Browse files

- remove server health check steps
- fix error handling in inference script

.github/workflows/validate-inference.yml CHANGED
@@ -13,7 +13,7 @@ jobs:
13
  env:
14
 
15
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
16
- QED_MATH_URL: ${{ secrets.QED_MATH_URL }}
17
  MAX_STEPS: "8"
18
 
19
  steps:
@@ -31,25 +31,39 @@ jobs:
31
  - name: Install dependencies
32
  run: uv sync
33
 
34
- - name: Start server
35
- run: uv run uvicorn server.app:app --host 0.0.0.0 --port 8000 &
 
 
 
 
 
 
 
 
 
 
36
 
37
- - name: Wait for server to be healthy
 
38
  run: |
39
- for i in $(seq 1 30); do
40
- if curl -sf http://localhost:8000/healthz; then
41
- echo "Server is healthy"
 
 
 
 
 
42
  exit 0
43
  fi
44
- echo "Attempt $i/30 — waiting..."
45
- sleep 2
 
46
  done
47
- echo "::error::Server did not become healthy in time"
 
48
  exit 1
49
 
50
  - name: Run inference
51
- run: uv run python inference.py
52
-
53
- - name: Stop server
54
- if: always()
55
- run: pkill -f "uvicorn server.app:app" || true
 
13
  env:
14
 
15
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
16
+ QED_MATH_URL: ${{ vars.QED_MATH_URL }}
17
  MAX_STEPS: "8"
18
 
19
  steps:
 
31
  - name: Install dependencies
32
  run: uv sync
33
 
34
+ - name: Validate CI variables
35
+ shell: bash
36
+ run: |
37
+ if [ -z "${HF_TOKEN}" ]; then
38
+ echo "::error::HF_TOKEN is empty or undefined. Configure repository secret HF_TOKEN."
39
+ exit 1
40
+ fi
41
+ if [ -z "${QED_MATH_URL}" ]; then
42
+ echo "::error::QED_MATH_URL is empty or undefined. Configure repository variable QED_MATH_URL."
43
+ exit 1
44
+ fi
45
+ echo "QED_MATH_URL=${QED_MATH_URL}"
46
 
47
+ - name: Verify QED Math endpoint
48
+ shell: bash
49
  run: |
50
+ BASE_URL="${QED_MATH_URL%/}"
51
+ for attempt in 1 2 3 4 5; do
52
+ code=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
53
+ -H "Content-Type: application/json" -d '{}' \
54
+ "${BASE_URL}/reset" || echo "000")
55
+
56
+ if [ "${code}" = "200" ]; then
57
+ echo "Endpoint is reachable at ${BASE_URL}/reset"
58
  exit 0
59
  fi
60
+
61
+ echo "Attempt ${attempt}/5: ${BASE_URL}/reset returned HTTP ${code}; retrying..."
62
+ sleep 6
63
  done
64
+
65
+ echo "::error::Unable to reach ${BASE_URL}/reset after retries. Check URL and Space status."
66
  exit 1
67
 
68
  - name: Run inference
69
+ run: uv run python inference.py
 
 
 
 
inference.py CHANGED
@@ -9,6 +9,8 @@ import asyncio
9
  import json
10
  import os
11
  import re
 
 
12
  from pathlib import Path
13
  from typing import Any, Optional, cast
14
 
@@ -53,7 +55,10 @@ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
53
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:novita")
54
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
55
 
56
- QED_MATH_URL = os.getenv("QED_MATH_URL", "http://localhost:8000")
 
 
 
57
  TASK_NAME = os.getenv("TASK_NAME", "solve-qed-math")
58
  BENCHMARK = os.getenv("BENCHMARK", "qed-math")
59
 
@@ -250,6 +255,8 @@ async def run_episode(
250
  async def async_main() -> None:
251
  if not HF_TOKEN:
252
  raise SystemExit("HF_TOKEN must be set.\nOptional fallback: API_KEY.")
 
 
253
 
254
  client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
255
 
@@ -274,6 +281,17 @@ async def async_main() -> None:
274
  except Exception as exc:
275
  caught_error = exc
276
  success = False
 
 
 
 
 
 
 
 
 
 
 
277
  finally:
278
  log_end(success=success, steps=steps_taken, rewards=rewards)
279
 
 
9
  import json
10
  import os
11
  import re
12
+ import sys
13
+ import traceback
14
  from pathlib import Path
15
  from typing import Any, Optional, cast
16
 
 
55
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:novita")
56
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
57
 
58
+ _raw_qed_math_url = os.getenv("QED_MATH_URL")
59
+ QED_MATH_URL = (
60
+ _raw_qed_math_url.strip() if _raw_qed_math_url is not None else "http://localhost:8000"
61
+ )
62
  TASK_NAME = os.getenv("TASK_NAME", "solve-qed-math")
63
  BENCHMARK = os.getenv("BENCHMARK", "qed-math")
64
 
 
255
  async def async_main() -> None:
256
  if not HF_TOKEN:
257
  raise SystemExit("HF_TOKEN must be set.\nOptional fallback: API_KEY.")
258
+ if not QED_MATH_URL:
259
+ raise SystemExit("QED_MATH_URL must be set (for example: https://<space>.hf.space/).")
260
 
261
  client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
262
 
 
281
  except Exception as exc:
282
  caught_error = exc
283
  success = False
284
+ print(
285
+ f"[ERROR] type={type(exc).__name__} message={exc}",
286
+ file=sys.stderr,
287
+ flush=True,
288
+ )
289
+ print(
290
+ f"[ERROR] QED_MATH_URL={QED_MATH_URL}",
291
+ file=sys.stderr,
292
+ flush=True,
293
+ )
294
+ traceback.print_exc(file=sys.stderr)
295
  finally:
296
  log_end(success=success, steps=steps_taken, rewards=rewards)
297