Spaces:

rycerzes
/

qed-math-openenv

Sleeping

swappy commited on Apr 5

Commit

c1d3003

1 Parent(s): ed78dc2

fix: update QED_MATH_URL secret reference

- remove server health check steps
- fix error handling in inference script

Files changed (2) hide show

.github/workflows/validate-inference.yml CHANGED Viewed

@@ -13,7 +13,7 @@ jobs:
     env:
       HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      QED_MATH_URL: ${{ secrets.QED_MATH_URL }}
       MAX_STEPS: "8"
     steps:
@@ -31,25 +31,39 @@ jobs:
       - name: Install dependencies
         run: uv sync
-      - name: Start server
-        run: uv run uvicorn server.app:app --host 0.0.0.0 --port 8000 &
-      - name: Wait for server to be healthy
         run: |
-          for i in $(seq 1 30); do
-            if curl -sf http://localhost:8000/healthz; then
-              echo "Server is healthy"
               exit 0
             fi
-            echo "Attempt $i/30 — waiting..."
-            sleep 2
           done
-          echo "::error::Server did not become healthy in time"
           exit 1
       - name: Run inference
-        run: uv run python inference.py
-      - name: Stop server
-        if: always()
-        run: pkill -f "uvicorn server.app:app" || true

     env:
       HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      QED_MATH_URL: ${{ vars.QED_MATH_URL }}
       MAX_STEPS: "8"
     steps:
       - name: Install dependencies
         run: uv sync
+      - name: Validate CI variables
+        shell: bash
+        run: |
+          if [ -z "${HF_TOKEN}" ]; then
+            echo "::error::HF_TOKEN is empty or undefined. Configure repository secret HF_TOKEN."
+            exit 1
+          fi
+          if [ -z "${QED_MATH_URL}" ]; then
+            echo "::error::QED_MATH_URL is empty or undefined. Configure repository variable QED_MATH_URL."
+            exit 1
+          fi
+          echo "QED_MATH_URL=${QED_MATH_URL}"
+      - name: Verify QED Math endpoint
+        shell: bash
         run: |
+          BASE_URL="${QED_MATH_URL%/}"
+          for attempt in 1 2 3 4 5; do
+            code=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
+              -H "Content-Type: application/json" -d '{}' \
+              "${BASE_URL}/reset" || echo "000")
+            if [ "${code}" = "200" ]; then
+              echo "Endpoint is reachable at ${BASE_URL}/reset"
               exit 0
             fi
+            echo "Attempt ${attempt}/5: ${BASE_URL}/reset returned HTTP ${code}; retrying..."
+            sleep 6
           done
+          echo "::error::Unable to reach ${BASE_URL}/reset after retries. Check URL and Space status."
           exit 1
       - name: Run inference
+        run: uv run python inference.py

inference.py CHANGED Viewed

@@ -9,6 +9,8 @@ import asyncio
 import json
 import os
 import re
 from pathlib import Path
 from typing import Any, Optional, cast
@@ -53,7 +55,10 @@ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:novita")
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
-QED_MATH_URL = os.getenv("QED_MATH_URL", "http://localhost:8000")
 TASK_NAME = os.getenv("TASK_NAME", "solve-qed-math")
 BENCHMARK = os.getenv("BENCHMARK", "qed-math")
@@ -250,6 +255,8 @@ async def run_episode(
 async def async_main() -> None:
     if not HF_TOKEN:
         raise SystemExit("HF_TOKEN must be set.\nOptional fallback: API_KEY.")
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
@@ -274,6 +281,17 @@ async def async_main() -> None:
     except Exception as exc:
         caught_error = exc
         success = False
     finally:
         log_end(success=success, steps=steps_taken, rewards=rewards)

 import json
 import os
 import re
+import sys
+import traceback
 from pathlib import Path
 from typing import Any, Optional, cast
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:novita")
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+_raw_qed_math_url = os.getenv("QED_MATH_URL")
+QED_MATH_URL = (
+    _raw_qed_math_url.strip() if _raw_qed_math_url is not None else "http://localhost:8000"
+)
 TASK_NAME = os.getenv("TASK_NAME", "solve-qed-math")
 BENCHMARK = os.getenv("BENCHMARK", "qed-math")
 async def async_main() -> None:
     if not HF_TOKEN:
         raise SystemExit("HF_TOKEN must be set.\nOptional fallback: API_KEY.")
+    if not QED_MATH_URL:
+        raise SystemExit("QED_MATH_URL must be set (for example: https://<space>.hf.space/).")
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
     except Exception as exc:
         caught_error = exc
         success = False
+        print(
+            f"[ERROR] type={type(exc).__name__} message={exc}",
+            file=sys.stderr,
+            flush=True,
+        )
+        print(
+            f"[ERROR] QED_MATH_URL={QED_MATH_URL}",
+            file=sys.stderr,
+            flush=True,
+        )
+        traceback.print_exc(file=sys.stderr)
     finally:
         log_end(success=success, steps=steps_taken, rewards=rewards)