Spaces:
Running
Running
Commit ·
a2c523c
1
Parent(s): 9b2abc6
Fix dashboard logging URL to use proxy path, force Docker rebuild
Browse files- log_to_dashboard now posts to /api/env/training/log (through proxy)
instead of /training/log (direct, which 404s on Next.js)
- ARG CACHEBUST in Dockerfile to invalidate build cache
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- Dockerfile +1 -1
- training/train_grpo.ipynb +1 -1
Dockerfile
CHANGED
|
@@ -19,7 +19,7 @@ COPY lib/ ./lib/
|
|
| 19 |
COPY public/ ./public/
|
| 20 |
COPY package.json package-lock.json tsconfig.json next.config.ts postcss.config.mjs eslint.config.mjs .eslintrc.json next-env.d.ts metadata.json ./
|
| 21 |
ENV NEXT_TELEMETRY_DISABLED=1
|
| 22 |
-
|
| 23 |
RUN npm run build
|
| 24 |
|
| 25 |
FROM python:3.11-slim AS runner
|
|
|
|
| 19 |
COPY public/ ./public/
|
| 20 |
COPY package.json package-lock.json tsconfig.json next.config.ts postcss.config.mjs eslint.config.mjs .eslintrc.json next-env.d.ts metadata.json ./
|
| 21 |
ENV NEXT_TELEMETRY_DISABLED=1
|
| 22 |
+
ARG CACHEBUST=3
|
| 23 |
RUN npm run build
|
| 24 |
|
| 25 |
FROM python:3.11-slim AS runner
|
training/train_grpo.ipynb
CHANGED
|
@@ -98,7 +98,7 @@
|
|
| 98 |
"execution_count": null,
|
| 99 |
"metadata": {},
|
| 100 |
"outputs": [],
|
| 101 |
-
"source": "PRINTER = 0\n\ndef extract_fold_json(response):\n \"\"\"Extract FOLD JSON from LLM response text.\"\"\"\n m = re.search(r\"```(?:json)?\\s*(\\{.*?\\})\\s*```\", response, re.DOTALL)\n if m:\n try: return json.loads(m.group(1))\n except: pass\n m = re.search(r'\\{[^{}]*\"vertices_coords\"[^{}]*\\}', response, re.DOTALL)\n if m:\n try: return json.loads(m.group(0))\n except: pass\n try:\n d = json.loads(response.strip())\n if isinstance(d, dict) and \"vertices_coords\" in d: return d\n except: pass\n return None\n\n\ndef log_to_dashboard(task_name, reward, shape_similarity, is_valid, error=None, fold_data=None, final_positions=None, target_positions=None):\n \"\"\"Send training step data to the frontend dashboard via
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"cell_type": "markdown",
|
|
|
|
| 98 |
"execution_count": null,
|
| 99 |
"metadata": {},
|
| 100 |
"outputs": [],
|
| 101 |
+
"source": "PRINTER = 0\n\ndef extract_fold_json(response):\n \"\"\"Extract FOLD JSON from LLM response text.\"\"\"\n m = re.search(r\"```(?:json)?\\s*(\\{.*?\\})\\s*```\", response, re.DOTALL)\n if m:\n try: return json.loads(m.group(1))\n except: pass\n m = re.search(r'\\{[^{}]*\"vertices_coords\"[^{}]*\\}', response, re.DOTALL)\n if m:\n try: return json.loads(m.group(0))\n except: pass\n try:\n d = json.loads(response.strip())\n if isinstance(d, dict) and \"vertices_coords\" in d: return d\n except: pass\n return None\n\n\ndef log_to_dashboard(task_name, reward, shape_similarity, is_valid, error=None, fold_data=None, final_positions=None, target_positions=None):\n \"\"\"Send training step data to the frontend dashboard via the proxy.\"\"\"\n try:\n # Goes through Next.js proxy: /api/env/training/log → localhost:8000/training/log\n requests.post(\n f\"{OPENENV_URL}/training/log\",\n json={\n \"task_name\": task_name,\n \"reward\": reward,\n \"shape_similarity\": shape_similarity,\n \"is_valid\": is_valid,\n \"error\": error,\n \"fold_data\": fold_data,\n \"final_positions\": final_positions or [],\n \"target_positions\": target_positions or [],\n },\n timeout=5,\n )\n except:\n pass # Don't let dashboard logging break training\n\n\ndef valid_fold_reward(completions, **kwargs):\n \"\"\"Reward 1 (local): +1.0 valid FOLD structure, -0.5 bad structure, -2.0 unparseable.\"\"\"\n REQUIRED = {\"vertices_coords\", \"edges_vertices\", \"edges_assignment\"}\n scores = []\n for c in completions:\n fold = extract_fold_json(c[0][\"content\"])\n if fold is None:\n scores.append(-2.0)\n continue\n # Basic structural checks\n if not REQUIRED.issubset(fold.keys()):\n scores.append(-0.5); continue\n verts = fold[\"vertices_coords\"]\n edges = fold[\"edges_vertices\"]\n asgn = fold[\"edges_assignment\"]\n if len(verts) < 3 or len(edges) < 3 or len(edges) != len(asgn):\n scores.append(-0.5); continue\n if not any(a in (\"M\",\"V\") for a in asgn):\n scores.append(-0.5); continue\n if not any(a == \"B\" for a in asgn):\n scores.append(-0.5); continue\n scores.append(1.0)\n return scores\n\n\ndef openenv_reward(completions, task_name, **kwargs):\n \"\"\"Reward 2 (OpenEnv API): Submit fold to environment, get simulation reward.\n\n Calls POST /reset and POST /step on the HF Space OpenEnv environment.\n The environment runs the fold simulation and computes shape similarity.\n Also logs each step to the frontend training dashboard.\n \"\"\"\n global PRINTER\n # task_name comes as a list from the dataset\n tn = task_name[0] if isinstance(task_name, list) else task_name\n\n scores = []\n for c in completions:\n resp = c[0][\"content\"]\n\n # Periodic logging\n if PRINTER % 10 == 0:\n print(f\"\\n--- [{tn}] Sample {PRINTER} ---\\n{resp[:300]}\")\n PRINTER += 1\n\n # Parse the FOLD JSON from the LLM response\n fold = extract_fold_json(resp)\n if fold is None:\n scores.append(-2.0)\n log_to_dashboard(tn, -2.0, 0.0, False, error=\"No JSON parsed\")\n continue\n\n try:\n # Reset environment for this task\n env.reset(task_name=tn)\n\n # Submit the fold to OpenEnv — environment simulates and scores it\n result = env.step(fold)\n\n # Get reward from the environment\n reward = result.get(\"reward\", None)\n obs = result.get(\"observation\", {})\n sim = obs.get(\"shape_similarity\", 0.0)\n is_valid = not bool(obs.get(\"error\"))\n\n if reward is not None:\n scores.append(float(reward))\n else:\n if obs.get(\"error\"):\n scores.append(-2.0)\n else:\n reward = float(sim) * 20.0\n scores.append(reward)\n\n # Log to frontend dashboard\n log_to_dashboard(\n task_name=tn,\n reward=float(reward) if reward is not None else scores[-1],\n shape_similarity=float(sim),\n is_valid=is_valid,\n error=obs.get(\"error\"),\n fold_data=fold,\n final_positions=obs.get(\"final_positions\", []),\n target_positions=obs.get(\"target_positions\", []),\n )\n\n except requests.exceptions.RequestException as e:\n print(f\"OpenEnv API error: {e}\")\n scores.append(-1.0)\n log_to_dashboard(tn, -1.0, 0.0, False, error=str(e))\n except Exception as e:\n print(f\"Reward error: {e}\")\n scores.append(-1.0)\n log_to_dashboard(tn, -1.0, 0.0, False, error=str(e))\n\n return scores\n\n\nprint(\"Reward functions ready (valid_fold=local, openenv_reward=API + dashboard logging).\")"
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"cell_type": "markdown",
|