Spaces:

luuow
/

photon-route

Running

luuow Claude Opus 4.7 (1M context) commited on May 5

Commit

e0480d0

1 Parent(s): 6038606

http_server: /weights.npz download + train: temp 0.1->5.0, lr 3e-2->5e-3

The first successful build (commit 6038606) produced trained weights but
the InfoNCE loss oscillated wildly (174 -> 84 -> 140 -> 0.6 -> 68 -> 31
across 100 steps), worse than random init. Root cause: temperature=0.1
with the D-distance contrastive form — D values are 0-50, so logits
-D/0.1 reach ±500, the softmax saturates, gradients explode through
AdamW (lr=3e-2). New scale: temp=5.0 keeps logits in [-10, 0],
lr=5e-3 takes smaller steps from the SHA-init starting point.

Also adds GET /weights.npz to FastAPI so the eval harness on the VM
can pull the baked weights from the live Space and run recall/nDCG
locally (the Space's /rank ranks the 5-doc fixture corpus, not the
20-doc eval corpus the model was trained on).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

space/train.py +5 -2
src/photon_route/http_server.py +15 -0

space/train.py CHANGED Viewed

@@ -381,8 +381,11 @@ def main() -> None:
     ap = argparse.ArgumentParser()
     ap.add_argument("--out", type=Path, default=ROOT / "weights.npz")
     ap.add_argument("--steps", type=int, default=100)
-    ap.add_argument("--lr", type=float, default=3e-2)
-    ap.add_argument("--temperature", type=float, default=0.1)
     ap.add_argument("--photon-lambda", type=float, default=1e-2)
     ap.add_argument("--negatives", type=int, default=8)
     ap.add_argument("--clip", type=float, default=1.0)

     ap = argparse.ArgumentParser()
     ap.add_argument("--out", type=Path, default=ROOT / "weights.npz")
     ap.add_argument("--steps", type=int, default=100)
+    ap.add_argument("--lr", type=float, default=5e-3)
+    # D-scale logits: with D in [0, 50], temp=0.1 made -D/temp logits up to
+    # -500, exploding cross_entropy + gradients. temp=5 keeps logit magnitudes
+    # in a sensible range (~0-10) so AdamW can converge instead of oscillate.
+    ap.add_argument("--temperature", type=float, default=5.0)
     ap.add_argument("--photon-lambda", type=float, default=1e-2)
     ap.add_argument("--negatives", type=int, default=8)
     ap.add_argument("--clip", type=float, default=1.0)

src/photon_route/http_server.py CHANGED Viewed

@@ -25,6 +25,7 @@ from typing import Any
 import numpy as np
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from photon_route import __version__
 from photon_route.corpus import Document, load_fixture
@@ -177,6 +178,20 @@ def version() -> dict[str, str]:
     return {"version": __version__, "default_backend": DEFAULT_BACKEND}
 @app.get("/rank")
 def rank(
     q: str = Query(..., min_length=1, max_length=500, description="query text"),

 import numpy as np
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
 from photon_route import __version__
 from photon_route.corpus import Document, load_fixture
     return {"version": __version__, "default_backend": DEFAULT_BACKEND}
+@app.get("/weights.npz")
+def weights_download():
+    """Serve the trained weights.npz so the eval harness on the VM can
+    score `--backend v2 --weights weights.npz` without rebuilding the
+    whole training pipeline locally. Read-only, baked at build time."""
+    if not WEIGHTS_PATH.exists():
+        raise HTTPException(status_code=404, detail="no trained weights on this build")
+    return FileResponse(
+        WEIGHTS_PATH,
+        media_type="application/octet-stream",
+        filename="weights.npz",
+    )
 @app.get("/rank")
 def rank(
     q: str = Query(..., min_length=1, max_length=500, description="query text"),