APISAvant2

Running

App Files Files Community

antonypamo commited on Dec 13, 2025

Commit

e027e92

verified ·

1 Parent(s): 4b2d865

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -72

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ======================================================
-# Savant RRF Φ12.0 — app.py (AGIRRFCore-aligned)
 # Uses the same AGIRRFCore logic as RRFSavant_AGI_Core_Colab
 # ======================================================
@@ -22,6 +22,16 @@ from huggingface_hub import hf_hub_download
 import joblib
 # ======================================================
 # 1) MANIFEST
 # ======================================================
@@ -35,27 +45,27 @@ DEFAULT_MANIFEST = {
 MANIFEST_PATH = Path(__file__).parent / "savant_rrf_api_manifest_phi12.json"
-def load_manifest() -> Dict[str, Any]:
     if MANIFEST_PATH.exists():
         try:
             print(f"[Manifest] Loading from {MANIFEST_PATH}", flush=True)
             return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
         except Exception as e:
             print(f"[Manifest] Invalid JSON: {e}", flush=True)
     print("[Manifest] Using DEFAULT_MANIFEST", flush=True)
     return DEFAULT_MANIFEST
-manifest = load_manifest()
-print("[Manifest] version:", manifest.get("version"), flush=True)
 # ======================================================
 # 2) Global config
 # ======================================================
-HF_TOKEN = os.environ.get("HF_TOKEN", "")
-os.environ["HF_TOKEN"] = HF_TOKEN
 ENCODER_MODEL_ID    = "antonypamo/RRFSAVANTMADE"
 META_LOGIT_REPO     = "antonypamo/RRFSavantMetaLogicV2"
@@ -63,29 +73,57 @@ META_LOGIT_FILENAME = "logreg_rrf_savant.joblib"
 RRF_DATASET_REPO = "antonypamo/savant_rrf1_curated"
-def hf_data_path(filename: str) -> Optional[str]:
     try:
         return hf_hub_download(
-            repo_id=RRF_DATASET_REPO,
             filename=filename,
-            repo_type="dataset",
-            token=HF_TOKEN or None,
         )
     except Exception as e:
-        print(f"[Dataset] Missing {filename}: {e}", flush=True)
         return None
 # ======================================================
-# 3) Optional artifacts
 # ======================================================
-SAVANT_CNN_PATH  = hf_data_path("savant_cnn.pt")
-RRF_NODES_PATH   = hf_data_path("rrf_nodes.pt")
-RRF_TUTOR_JSONL  = hf_data_path("rrf_tutor_curated.jsonl")
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ======================================================
@@ -109,6 +147,7 @@ class SavantCNN(nn.Module):
         x = x.view(x.size(0), -1)
         return self.fc(x)
 savant_cnn = None
 if SAVANT_CNN_PATH:
     try:
@@ -125,11 +164,11 @@ if RRF_NODES_PATH:
         rrf_nodes = torch.load(RRF_NODES_PATH, map_location=device)
         print("✅ RRF nodes loaded", flush=True)
     except Exception as e:
-        print(f"⚠️ RRF nodes failed: {e}", flush=True)
 # ======================================================
-# 5) Φ-node ontology (same spirit as notebook: 8 nodes -> one-hot 8)
 # ======================================================
 @dataclass
@@ -137,7 +176,7 @@ class PhiNode:
     name: str
     description: str
     tags: List[str] = field(default_factory=list)
-    embedding: Optional[np.ndarray] = None
 PHI_NODES: List[PhiNode] = [
     PhiNode("Φ0_seed",      "Genesis seed, core identity and origin.", ["genesis","identity","anchor"]),
@@ -152,8 +191,13 @@ PHI_NODES: List[PhiNode] = [
 PHI_NAME_TO_IDX = {n.name: i for i, n in enumerate(PHI_NODES)}
 # ======================================================
-# 6) CoherenceModel (fully implemented; notebook had '...' placeholders)
 # ======================================================
 class CoherenceModel:
@@ -186,7 +230,7 @@ coherence_model = CoherenceModel()
 # ======================================================
-# 7) AGIRRFCore (same logic as notebook; cleaned)
 # ======================================================
 class AGIRRFCore:
@@ -199,9 +243,7 @@ class AGIRRFCore:
         self.phi_nodes = phi_nodes
         self.coherence_model = coherence_model
-        print(f"🔄 Loading sentence-transformer: {st_model_name} on {device} ...", flush=True)
-        # SentenceTransformer expects device as string usually
-        st_device = "cuda" if torch.cuda.is_available() else "cpu"
         self.embedder = SentenceTransformer(st_model_name, device=st_device)
         print("✅ Embedder loaded", flush=True)
@@ -228,9 +270,8 @@ class AGIRRFCore:
         return float(freqs[idx])
     def _phi_omega(self, energy: float, dom_freq: float) -> Tuple[float, float]:
-        # same mapping as notebook
-        phi = 1.0 - math.exp(-float(energy))      # saturating [0,1)
-        omega = math.tanh(dom_freq * 10.0)        # [0,1)
         return float(phi), float(omega)
     def _closest_phi_node(self, vec: np.ndarray) -> Tuple[str, float]:
@@ -238,8 +279,7 @@ class AGIRRFCore:
             return "unknown", 0.0
         v = np.asarray(vec, dtype=float).ravel()
         v_norm = np.linalg.norm(v) + 1e-9
-        best_name = "unknown"
-        best_cos = -1.0
         for node in self.phi_nodes:
             e = node.embedding
             if e is None:
@@ -253,9 +293,7 @@ class AGIRRFCore:
     def analyze(self, text: str, context_label: str = "query") -> Dict[str, Any]:
         vec = self._embed_text(text)
-        # notebook energy = dot(vec, vec) (not normalized)
         energy = float(np.dot(vec, vec))
         dom_freq = self._dominant_frequency(vec)
         phi, omega = self._phi_omega(energy, dom_freq)
@@ -265,7 +303,6 @@ class AGIRRFCore:
             S_RRF, C_RRF = 0.0, 0.0
         coherence = 0.5 * float(S_RRF) + 0.5 * float(C_RRF)
         closest_name, closest_cos = self._closest_phi_node(vec)
         return {
@@ -291,80 +328,91 @@ agirrf_core = AGIRRFCore(
 # ======================================================
-# 8) Load Meta-Logit (15D pipeline)
 # ======================================================
 print("🔄 Loading meta-logit...", flush=True)
-meta_logit_path = hf_hub_download(
     repo_id=META_LOGIT_REPO,
     filename=META_LOGIT_FILENAME,
-    token=HF_TOKEN or None,
 )
 meta_logit = joblib.load(meta_logit_path)
-print("✅ Meta-logit ready", flush=True)
 # ======================================================
-# 9) Feature mapping (exact notebook structure: 7 + one-hot 8 = 15)
 # ======================================================
 def rrf_state_to_features(state: Dict[str, Any]) -> np.ndarray:
-    phi = float(state.get("phi", 0.0))
     omega = float(state.get("omega", 0.0))
-    coh = float(state.get("coherence", 0.0))
     S_RRF = float(state.get("S_RRF", 0.0))
     C_RRF = float(state.get("C_RRF", 0.0))
-    E_H = float(state.get("hamiltonian_energy", 0.0))
     dom_f = float(state.get("dominant_frequency", 0.0))
     phi_name = state.get("closest_phi_node", "unknown")
-    n_phi = len(PHI_NODES)
-    phi_onehot = np.zeros(n_phi, dtype=float)
     idx = PHI_NAME_TO_IDX.get(phi_name)
     if idx is not None:
         phi_onehot[idx] = 1.0
-    base_feats = np.array([phi, omega, coh, S_RRF, C_RRF, E_H, dom_f], dtype=float)
-    feats = np.concatenate([base_feats, phi_onehot], axis=0)  # 15D
-    return feats
 # ======================================================
-# 10) Core scoring for (prompt, answer)
-# We'll analyze combined QA text to stay consistent and stable.
 # ======================================================
-def get_embedding_normed(text: str) -> np.ndarray:
     return agirrf_core.embedder.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
 def compute_scores(prompt: str, answer: str) -> Dict[str, Any]:
     if not prompt.strip() or not answer.strip():
         raise ValueError("Empty prompt/answer")
-    # Classic cosine (extra signal, not part of 15D)
-    e_p = get_embedding_normed(prompt)
-    e_a = get_embedding_normed(answer)
     cosine = float(np.dot(e_p, e_a))
-    # AGI-RRF state from combined text (stable single-state features)
     qa_text = f"Q: {prompt}\nA: {answer}"
     state = agirrf_core.analyze(qa_text, context_label="qa")
     feats = rrf_state_to_features(state).reshape(1, -1)
     p_good = float(meta_logit.predict_proba(feats)[0][1])
-    # Keep your public metrics, but now grounded
     SRRF = p_good
     CRRF = p_good * cosine
     E_phi = 0.5 * (p_good + abs(cosine))
     return {
-        "cosine": cosine,
         "p_good": p_good,
         "SRRF": SRRF,
         "CRRF": CRRF,
         "E_phi": E_phi,
-        # expose state so it's debuggable (very important for Savant)
         "phi": float(state["phi"]),
         "omega": float(state["omega"]),
         "coherence": float(state["coherence"]),
@@ -382,16 +430,16 @@ def compute_scores(prompt: str, answer: str) -> Dict[str, Any]:
 # ======================================================
 class EvaluateRequest(BaseModel):
     prompt: str
     answer: str
-    model_label: Optional[str] = None
 class EvaluateResponse(BaseModel):
     scores: Dict[str, Any]
     manifest_version: str
 class PredictRequest(BaseModel):
-    # direct 15D call (matches your MetaLogit /predict pattern)
     features: List[float] = Field(..., min_length=15, max_length=15)
 class PredictResponse(BaseModel):
@@ -400,7 +448,7 @@ class PredictResponse(BaseModel):
 class RerankRequest(BaseModel):
     query: str
     documents: List[str]
-    alpha: float = 0.2
 class RerankDocument(BaseModel):
     id: int
@@ -419,42 +467,75 @@ class RerankResponse(BaseModel):
 app = FastAPI(
     title="Savant RRF Φ12.0 API",
-    version="1.2.0",
     description="AGIRRFCore-aligned Meta-Logic, Reranking & Quality Evaluation",
 )
 @app.get("/manifest")
-def manifest():
     return {
         "model": "RRFSavantMetaLogicV2",
-        "version": "Φ12.0",
         "encoder": ENCODER_MODEL_ID,
         "features": 15,
-        "phi_nodes": PHI_NODES,
     }
 @app.get("/health")
 def health():
     return {
-        "encoder": True,
-        "meta_logit": True,
         "cnn_loaded": savant_cnn is not None,
         "rrf_nodes_loaded": rrf_nodes is not None,
-        "manifest": manifest.get("version"),
         "phi_nodes": len(PHI_NODES),
     }
 @app.post("/evaluate", response_model=EvaluateResponse)
 def evaluate(req: EvaluateRequest):
     try:
         scores = compute_scores(req.prompt, req.answer)
-        return EvaluateResponse(scores=scores, manifest_version=manifest.get("version"))
     except Exception as e:
         print(f"[Evaluate] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Evaluation failed")
 @app.post("/predict", response_model=PredictResponse)
 def predict(req: PredictRequest):
     try:
@@ -465,20 +546,35 @@ def predict(req: PredictRequest):
         print(f"[Predict] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Predict failed")
 @app.post("/v1/rerank", response_model=RerankResponse)
 def rerank(req: RerankRequest):
     try:
         texts = [req.query] + req.documents
         embs = agirrf_core.embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
         q_emb = embs[0]
         d_embs = embs[1:]
-        scores = (d_embs @ q_emb).tolist()
         results = [{"id": i, "score": float(s)} for i, s in enumerate(scores)]
         results.sort(key=lambda x: x["score"], reverse=True)
-        ranked = [RerankDocument(id=r["id"], score=r["score"], rank=i+1) for i, r in enumerate(results)]
         return RerankResponse(model_id=ENCODER_MODEL_ID, results=ranked)
     except Exception as e:
         print(f"[Rerank] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Rerank failed")

 # ======================================================
+# Savant RRF Φ12.0 — app.py (AGIRRFCore-aligned, HARDENED)
 # Uses the same AGIRRFCore logic as RRFSavant_AGI_Core_Colab
 # ======================================================
 import joblib
+# ======================================================
+# 0) Hardening limits
+# ======================================================
+MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "8000"))
+MAX_ANSWER_CHARS = int(os.environ.get("MAX_ANSWER_CHARS", "12000"))
+MAX_DOCS        = int(os.environ.get("MAX_DOCS", "50"))
+MAX_DOC_CHARS   = int(os.environ.get("MAX_DOC_CHARS", "6000"))
 # ======================================================
 # 1) MANIFEST
 # ======================================================
 MANIFEST_PATH = Path(__file__).parent / "savant_rrf_api_manifest_phi12.json"
+def load_manifest_file() -> Dict[str, Any]:
     if MANIFEST_PATH.exists():
         try:
             print(f"[Manifest] Loading from {MANIFEST_PATH}", flush=True)
             return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
         except Exception as e:
             print(f"[Manifest] Invalid JSON: {e}", flush=True)
     print("[Manifest] Using DEFAULT_MANIFEST", flush=True)
     return DEFAULT_MANIFEST
+manifest_data = load_manifest_file()
+print("[Manifest] version:", manifest_data.get("version"), flush=True)
 # ======================================================
 # 2) Global config
 # ======================================================
+HF_TOKEN = os.environ.get("HF_TOKEN", "")  # set in Spaces secrets
+if HF_TOKEN:
+    os.environ["HF_TOKEN"] = HF_TOKEN
 ENCODER_MODEL_ID    = "antonypamo/RRFSAVANTMADE"
 META_LOGIT_REPO     = "antonypamo/RRFSavantMetaLogicV2"
 RRF_DATASET_REPO = "antonypamo/savant_rrf1_curated"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+st_device = "cuda" if torch.cuda.is_available() else "cpu"
+def _hf_download_safe(
+    repo_id: str,
+    filename: str,
+    *,
+    repo_type: Optional[str] = None,
+    token: Optional[str] = None,
+) -> Optional[str]:
+    """
+    Robust HF download:
+    - returns local path or None
+    - prints actionable errors (401/private/gated/missing)
+    """
     try:
         return hf_hub_download(
+            repo_id=repo_id,
             filename=filename,
+            repo_type=repo_type,
+            token=token or None,
         )
     except Exception as e:
+        msg = str(e)
+        if "401" in msg or "Unauthorized" in msg:
+            print(f"❌ [HF] 401 Unauthorized downloading {repo_id}/{filename}. "
+                  f"Repo may be private/gated or HF_TOKEN missing/invalid.", flush=True)
+        elif "RepositoryNotFoundError" in msg or "404" in msg:
+            print(f"❌ [HF] Repo or file not found: {repo_id}/{filename}", flush=True)
+        else:
+            print(f"⚠️ [HF] Download failed: {repo_id}/{filename} | {e}", flush=True)
         return None
+def hf_dataset_path(filename: str) -> Optional[str]:
+    return _hf_download_safe(
+        repo_id=RRF_DATASET_REPO,
+        filename=filename,
+        repo_type="dataset",
+        token=HF_TOKEN if HF_TOKEN else None,
+    )
 # ======================================================
+# 3) Optional artifacts (dataset assets)
 # ======================================================
+SAVANT_CNN_PATH  = hf_dataset_path("savant_cnn.pt")
+RRF_NODES_PATH   = hf_dataset_path("rrf_nodes.pt")
+RRF_TUTOR_JSONL  = hf_dataset_path("rrf_tutor_curated.jsonl")
 # ======================================================
         x = x.view(x.size(0), -1)
         return self.fc(x)
 savant_cnn = None
 if SAVANT_CNN_PATH:
     try:
         rrf_nodes = torch.load(RRF_NODES_PATH, map_location=device)
         print("✅ RRF nodes loaded", flush=True)
     except Exception as e:
+        print(f"⚠️ RRF nodes load failed: {e}", flush=True)
 # ======================================================
+# 5) Φ-node ontology (8 nodes -> one-hot 8)
 # ======================================================
 @dataclass
     name: str
     description: str
     tags: List[str] = field(default_factory=list)
+    embedding: Optional[np.ndarray] = None  # runtime only
 PHI_NODES: List[PhiNode] = [
     PhiNode("Φ0_seed",      "Genesis seed, core identity and origin.", ["genesis","identity","anchor"]),
 PHI_NAME_TO_IDX = {n.name: i for i, n in enumerate(PHI_NODES)}
+def phi_nodes_public() -> List[Dict[str, Any]]:
+    # JSON-safe version (no embeddings)
+    return [{"name": n.name, "description": n.description, "tags": n.tags} for n in PHI_NODES]
 # ======================================================
+# 6) CoherenceModel (stable S_RRF + C_RRF)
 # ======================================================
 class CoherenceModel:
 # ======================================================
+# 7) AGIRRFCore (aligned)
 # ======================================================
 class AGIRRFCore:
         self.phi_nodes = phi_nodes
         self.coherence_model = coherence_model
+        print(f"🔄 Loading sentence-transformer: {st_model_name} on {st_device} ...", flush=True)
         self.embedder = SentenceTransformer(st_model_name, device=st_device)
         print("✅ Embedder loaded", flush=True)
         return float(freqs[idx])
     def _phi_omega(self, energy: float, dom_freq: float) -> Tuple[float, float]:
+        phi = 1.0 - math.exp(-float(energy))      # saturating
+        omega = math.tanh(dom_freq * 10.0)        # saturating
         return float(phi), float(omega)
     def _closest_phi_node(self, vec: np.ndarray) -> Tuple[str, float]:
             return "unknown", 0.0
         v = np.asarray(vec, dtype=float).ravel()
         v_norm = np.linalg.norm(v) + 1e-9
+        best_name, best_cos = "unknown", -1.0
         for node in self.phi_nodes:
             e = node.embedding
             if e is None:
     def analyze(self, text: str, context_label: str = "query") -> Dict[str, Any]:
         vec = self._embed_text(text)
         energy = float(np.dot(vec, vec))
         dom_freq = self._dominant_frequency(vec)
         phi, omega = self._phi_omega(energy, dom_freq)
             S_RRF, C_RRF = 0.0, 0.0
         coherence = 0.5 * float(S_RRF) + 0.5 * float(C_RRF)
         closest_name, closest_cos = self._closest_phi_node(vec)
         return {
 # ======================================================
+# 8) Load Meta-Logit (15D)
 # ======================================================
 print("🔄 Loading meta-logit...", flush=True)
+meta_logit_path = _hf_download_safe(
     repo_id=META_LOGIT_REPO,
     filename=META_LOGIT_FILENAME,
+    token=HF_TOKEN if HF_TOKEN else None,
 )
+if not meta_logit_path:
+    raise RuntimeError(
+        f"Meta-logit not available. Check repo_id={META_LOGIT_REPO}, "
+        f"filename={META_LOGIT_FILENAME}, and HF_TOKEN if private."
+    )
 meta_logit = joblib.load(meta_logit_path)
+EXPECTED_FEATURES = getattr(meta_logit, "n_features_in_", 15)
+if EXPECTED_FEATURES != 15:
+    raise RuntimeError(f"Meta-logit expects {EXPECTED_FEATURES} features, expected 15.")
+print("✅ Meta-logit ready (15D)", flush=True)
 # ======================================================
+# 9) Feature mapping (7 + one-hot 8 = 15)
 # ======================================================
 def rrf_state_to_features(state: Dict[str, Any]) -> np.ndarray:
+    phi   = float(state.get("phi", 0.0))
     omega = float(state.get("omega", 0.0))
+    coh   = float(state.get("coherence", 0.0))
     S_RRF = float(state.get("S_RRF", 0.0))
     C_RRF = float(state.get("C_RRF", 0.0))
+    E_H   = float(state.get("hamiltonian_energy", 0.0))
     dom_f = float(state.get("dominant_frequency", 0.0))
     phi_name = state.get("closest_phi_node", "unknown")
+    phi_onehot = np.zeros(len(PHI_NODES), dtype=float)
     idx = PHI_NAME_TO_IDX.get(phi_name)
     if idx is not None:
         phi_onehot[idx] = 1.0
+    base = np.array([phi, omega, coh, S_RRF, C_RRF, E_H, dom_f], dtype=float)
+    return np.concatenate([base, phi_onehot], axis=0)
 # ======================================================
+# 10) Core scoring (prompt, answer)
 # ======================================================
+def _embed_norm(text: str) -> np.ndarray:
     return agirrf_core.embedder.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
 def compute_scores(prompt: str, answer: str) -> Dict[str, Any]:
+    prompt = prompt or ""
+    answer = answer or ""
     if not prompt.strip() or not answer.strip():
         raise ValueError("Empty prompt/answer")
+    if len(prompt) > MAX_PROMPT_CHARS or len(answer) > MAX_ANSWER_CHARS:
+        raise HTTPException(status_code=413, detail="Payload too large")
+    # extra signal: cosine(prompt, answer)
+    e_p = _embed_norm(prompt)
+    e_a = _embed_norm(answer)
     cosine = float(np.dot(e_p, e_a))
+    # stable single-state features on combined QA text
     qa_text = f"Q: {prompt}\nA: {answer}"
     state = agirrf_core.analyze(qa_text, context_label="qa")
     feats = rrf_state_to_features(state).reshape(1, -1)
     p_good = float(meta_logit.predict_proba(feats)[0][1])
     SRRF = p_good
     CRRF = p_good * cosine
     E_phi = 0.5 * (p_good + abs(cosine))
     return {
         "p_good": p_good,
         "SRRF": SRRF,
         "CRRF": CRRF,
         "E_phi": E_phi,
+        "cosine": cosine,
+        # debug/state exposure (key for Savant)
         "phi": float(state["phi"]),
         "omega": float(state["omega"]),
         "coherence": float(state["coherence"]),
 # ======================================================
 class EvaluateRequest(BaseModel):
+    model_config = ConfigDict(protected_namespaces=())
     prompt: str
     answer: str
+    model_label: Optional[str] = None  # reserved for future routing
 class EvaluateResponse(BaseModel):
     scores: Dict[str, Any]
     manifest_version: str
 class PredictRequest(BaseModel):
     features: List[float] = Field(..., min_length=15, max_length=15)
 class PredictResponse(BaseModel):
 class RerankRequest(BaseModel):
     query: str
     documents: List[str]
+    alpha: float = 0.2  # kept for compatibility (not used in cosine rerank)
 class RerankDocument(BaseModel):
     id: int
 app = FastAPI(
     title="Savant RRF Φ12.0 API",
+    version="1.2.1",
     description="AGIRRFCore-aligned Meta-Logic, Reranking & Quality Evaluation",
 )
+# --------------------------
+# Root (avoid 404 in Spaces)
+# --------------------------
+@app.get("/")
+def root():
+    return {
+        "status": "ok",
+        "project": manifest_data.get("project"),
+        "version": manifest_data.get("version"),
+        "model": "RRFSavantMetaLogicV2",
+        "docs": "/docs",
+        "endpoints": ["/manifest", "/health", "/evaluate", "/predict", "/v1/rerank"],
+    }
+# --------------------------
+# Manifest (no naming clash)
+# --------------------------
 @app.get("/manifest")
+def get_manifest():
     return {
         "model": "RRFSavantMetaLogicV2",
+        "version": manifest_data.get("version"),
         "encoder": ENCODER_MODEL_ID,
+        "meta_logit": f"{META_LOGIT_REPO}/{META_LOGIT_FILENAME}",
         "features": 15,
+        "phi_nodes": phi_nodes_public(),
+        "limits": {
+            "MAX_PROMPT_CHARS": MAX_PROMPT_CHARS,
+            "MAX_ANSWER_CHARS": MAX_ANSWER_CHARS,
+            "MAX_DOCS": MAX_DOCS,
+            "MAX_DOC_CHARS": MAX_DOC_CHARS,
+        }
     }
 @app.get("/health")
 def health():
     return {
+        "status": "ok",
+        "encoder_loaded": True,
+        "meta_logit_loaded": True,
         "cnn_loaded": savant_cnn is not None,
         "rrf_nodes_loaded": rrf_nodes is not None,
+        "manifest_version": manifest_data.get("version"),
         "phi_nodes": len(PHI_NODES),
+        "device": str(device),
     }
 @app.post("/evaluate", response_model=EvaluateResponse)
 def evaluate(req: EvaluateRequest):
     try:
         scores = compute_scores(req.prompt, req.answer)
+        return EvaluateResponse(scores=scores, manifest_version=str(manifest_data.get("version")))
+    except HTTPException:
+        raise
     except Exception as e:
         print(f"[Evaluate] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Evaluation failed")
 @app.post("/predict", response_model=PredictResponse)
 def predict(req: PredictRequest):
     try:
         print(f"[Predict] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Predict failed")
 @app.post("/v1/rerank", response_model=RerankResponse)
 def rerank(req: RerankRequest):
     try:
+        if not req.query or not req.query.strip():
+            raise HTTPException(status_code=400, detail="query is empty")
+        if len(req.documents) > MAX_DOCS:
+            raise HTTPException(status_code=413, detail="Too many documents")
+        for d in req.documents:
+            if len(d) > MAX_DOC_CHARS:
+                raise HTTPException(status_code=413, detail="Document too large")
         texts = [req.query] + req.documents
         embs = agirrf_core.embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
         q_emb = embs[0]
         d_embs = embs[1:]
+        scores = (d_embs @ q_emb).astype(float).tolist()
         results = [{"id": i, "score": float(s)} for i, s in enumerate(scores)]
         results.sort(key=lambda x: x["score"], reverse=True)
+        ranked = [RerankDocument(id=r["id"], score=r["score"], rank=i + 1) for i, r in enumerate(results)]
         return RerankResponse(model_id=ENCODER_MODEL_ID, results=ranked)
+    except HTTPException:
+        raise
     except Exception as e:
         print(f"[Rerank] Error: {e}", flush=True)
         raise HTTPException(status_code=500, detail="Rerank failed")