Spaces:
Running
Running
File size: 19,647 Bytes
06dfe12 e027e92 646f858 06dfe12 e0fd074 646f858 06dfe12 646f858 11c4398 360b154 06dfe12 11c4398 06dfe12 360b154 11c4398 360b154 646f858 e027e92 06dfe12 646f858 06dfe12 360b154 06dfe12 646f858 06dfe12 dee2f43 9d33eb1 e027e92 06dfe12 9d33eb1 e027e92 9d33eb1 646f858 06dfe12 646f858 06dfe12 360b154 e027e92 360b154 06dfe12 d8bc33f b5bb4b5 11c4398 06dfe12 11c4398 e027e92 9d33eb1 06dfe12 e027e92 06dfe12 e027e92 06dfe12 9d33eb1 e027e92 9d33eb1 06dfe12 e027e92 06dfe12 e027e92 06dfe12 9d33eb1 e027e92 06dfe12 646f858 06dfe12 646f858 06dfe12 9d33eb1 06dfe12 9d33eb1 06dfe12 193cdd8 06dfe12 9d33eb1 06dfe12 9d33eb1 06dfe12 9d33eb1 e027e92 06dfe12 9d33eb1 06dfe12 9d33eb1 06dfe12 9d33eb1 06dfe12 9d33eb1 06dfe12 9d33eb1 e027e92 9d33eb1 646f858 e027e92 646f858 e027e92 646f858 e027e92 06dfe12 e027e92 06dfe12 360b154 646f858 11c4398 06dfe12 e027e92 06dfe12 360b154 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 360b154 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 646f858 e027e92 06dfe12 360b154 e027e92 646f858 e027e92 646f858 360b154 11c4398 06dfe12 646f858 e027e92 646f858 11c4398 360b154 646f858 06dfe12 646f858 06dfe12 11c4398 e027e92 06dfe12 e027e92 360b154 646f858 06dfe12 11c4398 646f858 11c4398 06dfe12 e027e92 11c4398 06dfe12 11c4398 9d33eb1 11c4398 06dfe12 11c4398 646f858 06dfe12 646f858 06dfe12 9d33eb1 11c4398 e027e92 646f858 11c4398 1e63ba7 e027e92 3e4e903 e027e92 06dfe12 1e63ba7 e027e92 1e63ba7 e027e92 1e63ba7 e027e92 06dfe12 11c4398 1e63ba7 11c4398 e027e92 9d33eb1 e027e92 646f858 e027e92 360b154 e027e92 11c4398 06dfe12 11c4398 646f858 e027e92 11c4398 06dfe12 11c4398 e027e92 646f858 e027e92 06dfe12 646f858 e027e92 646f858 e027e92 646f858 e027e92 11c4398 646f858 11c4398 e027e92 646f858 e027e92 646f858 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 |
# ======================================================
# Savant RRF Φ12.0 — app.py (AGIRRFCore-aligned, HARDENED)
# Uses the same AGIRRFCore logic as RRFSavant_AGI_Core_Colab
# ======================================================
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
import os, json, math, time
from typing import Optional, Dict, Any, List, Tuple
import numpy as np
import torch
import torch.nn as nn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field, ConfigDict
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
import joblib
# ======================================================
# 0) Hardening limits
# ======================================================
MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "8000"))
MAX_ANSWER_CHARS = int(os.environ.get("MAX_ANSWER_CHARS", "12000"))
MAX_DOCS = int(os.environ.get("MAX_DOCS", "50"))
MAX_DOC_CHARS = int(os.environ.get("MAX_DOC_CHARS", "6000"))
# ======================================================
# 1) MANIFEST
# ======================================================
DEFAULT_MANIFEST = {
"version": "Φ12.0",
"project": "Savant RRF API & Meta-Logic Suite",
"owner": "Antony Padilla Morales",
"status": "fallback_default",
}
MANIFEST_PATH = Path(__file__).parent / "savant_rrf_api_manifest_phi12.json"
def load_manifest_file() -> Dict[str, Any]:
if MANIFEST_PATH.exists():
try:
print(f"[Manifest] Loading from {MANIFEST_PATH}", flush=True)
return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
except Exception as e:
print(f"[Manifest] Invalid JSON: {e}", flush=True)
print("[Manifest] Using DEFAULT_MANIFEST", flush=True)
return DEFAULT_MANIFEST
manifest_data = load_manifest_file()
print("[Manifest] version:", manifest_data.get("version"), flush=True)
# ======================================================
# 2) Global config
# ======================================================
HF_TOKEN = os.environ.get("HF_TOKEN", "") # set in Spaces secrets
if HF_TOKEN:
os.environ["HF_TOKEN"] = HF_TOKEN
ENCODER_MODEL_ID = "antonypamo/RRFSAVANTMADE"
META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogicV2"
META_LOGIT_FILENAME = "logreg_rrf_savant.joblib"
RRF_DATASET_REPO = "antonypamo/savant_rrf1_curated"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
st_device = "cuda" if torch.cuda.is_available() else "cpu"
def _hf_download_safe(
repo_id: str,
filename: str,
*,
repo_type: Optional[str] = None,
token: Optional[str] = None,
) -> Optional[str]:
"""
Robust HF download:
- returns local path or None
- prints actionable errors (401/private/gated/missing)
"""
try:
return hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type=repo_type,
token=token or None,
)
except Exception as e:
msg = str(e)
if "401" in msg or "Unauthorized" in msg:
print(f"❌ [HF] 401 Unauthorized downloading {repo_id}/{filename}. "
f"Repo may be private/gated or HF_TOKEN missing/invalid.", flush=True)
elif "RepositoryNotFoundError" in msg or "404" in msg:
print(f"❌ [HF] Repo or file not found: {repo_id}/{filename}", flush=True)
else:
print(f"⚠️ [HF] Download failed: {repo_id}/{filename} | {e}", flush=True)
return None
def hf_dataset_path(filename: str) -> Optional[str]:
return _hf_download_safe(
repo_id=RRF_DATASET_REPO,
filename=filename,
repo_type="dataset",
token=HF_TOKEN if HF_TOKEN else None,
)
# ======================================================
# 3) Optional artifacts (dataset assets)
# ======================================================
SAVANT_CNN_PATH = hf_dataset_path("savant_cnn.pt")
RRF_NODES_PATH = hf_dataset_path("rrf_nodes.pt")
RRF_TUTOR_JSONL = hf_dataset_path("rrf_tutor_curated.jsonl")
# ======================================================
# 4) Savant CNN (optional)
# ======================================================
class SavantCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(1, 32, 3, padding=1)
self.conv2 = nn.Conv1d(32, 64, 3, padding=1)
self.conv3 = nn.Conv1d(64, 128, 3, padding=1)
self.pool = nn.AdaptiveAvgPool1d(4)
self.fc = nn.Linear(512, 64)
def forward(self, x):
x = torch.relu(self.conv1(x))
x = torch.relu(self.conv2(x))
x = torch.relu(self.conv3(x))
x = self.pool(x)
x = x.view(x.size(0), -1)
return self.fc(x)
savant_cnn = None
if SAVANT_CNN_PATH:
try:
savant_cnn = SavantCNN()
savant_cnn.load_state_dict(torch.load(SAVANT_CNN_PATH, map_location=device))
savant_cnn.to(device).eval()
print("✅ Savant CNN loaded", flush=True)
except Exception as e:
print(f"⚠️ CNN load failed: {e}", flush=True)
rrf_nodes = None
if RRF_NODES_PATH:
try:
rrf_nodes = torch.load(RRF_NODES_PATH, map_location=device)
print("✅ RRF nodes loaded", flush=True)
except Exception as e:
print(f"⚠️ RRF nodes load failed: {e}", flush=True)
# ======================================================
# 5) Φ-node ontology (8 nodes -> one-hot 8)
# ======================================================
@dataclass
class PhiNode:
name: str
description: str
tags: List[str] = field(default_factory=list)
embedding: Optional[np.ndarray] = None # runtime only
PHI_NODES: List[PhiNode] = [
PhiNode("Φ0_seed", "Genesis seed, core identity and origin.", ["genesis","identity","anchor"]),
PhiNode("Φ1_relation", "Relational bonding, dialogue, social meaning.", ["relation","dialogue"]),
PhiNode("Φ2_resonance", "Signal resonance, harmonic alignment, coherence lift.", ["resonance","harmonics"]),
PhiNode("Φ3_memory", "Memory consolidation, retrieval, indexing.", ["memory","retrieval"]),
PhiNode("Φ4_logic", "Logical rigor, constraints, verification.", ["logic","verification"]),
PhiNode("Φ5_creative", "Creative synthesis, metaphor, generative jumps.", ["creative","synthesis"]),
PhiNode("Φ6_alignment", "Ethical alignment and safety constraints.", ["alignment","ethics"]),
PhiNode("Φ7_meta_agi", "Meta-orchestrator that evaluates and routes flows.", ["meta","orchestration"]),
]
PHI_NAME_TO_IDX = {n.name: i for i, n in enumerate(PHI_NODES)}
def phi_nodes_public() -> List[Dict[str, Any]]:
# JSON-safe version (no embeddings)
return [{"name": n.name, "description": n.description, "tags": n.tags} for n in PHI_NODES]
# ======================================================
# 6) CoherenceModel (stable S_RRF + C_RRF)
# ======================================================
class CoherenceModel:
def __init__(self, eps: float = 1e-9):
self.eps = eps
def compute(self, vec: np.ndarray) -> Tuple[float, float]:
v = np.asarray(vec, dtype=float).ravel()
n = len(v)
if n < 4:
return 0.0, 0.0
spectrum = np.fft.rfft(v)
power = (np.abs(spectrum) ** 2).astype(float)
freqs = np.fft.rfftfreq(n, d=1.0).astype(float)
total_power = float(power.sum()) + self.eps
# C_RRF: concentration in dominant frequency
C_RRF = float(power.max() / total_power)
# S_RRF: prefer lower average frequency
f_mean = float((freqs * power).sum() / total_power)
f_max = float(freqs.max()) + self.eps
S_RRF = float(1.0 - min(1.0, f_mean / f_max))
return S_RRF, C_RRF
coherence_model = CoherenceModel()
# ======================================================
# 7) AGIRRFCore (aligned)
# ======================================================
class AGIRRFCore:
def __init__(
self,
phi_nodes: List[PhiNode],
coherence_model: Optional[CoherenceModel] = None,
st_model_name: str = ENCODER_MODEL_ID,
):
self.phi_nodes = phi_nodes
self.coherence_model = coherence_model
print(f"🔄 Loading sentence-transformer: {st_model_name} on {st_device} ...", flush=True)
self.embedder = SentenceTransformer(st_model_name, device=st_device)
print("✅ Embedder loaded", flush=True)
self._embed_phi_nodes()
def _embed_text(self, text: str) -> np.ndarray:
return self.embedder.encode([text], convert_to_numpy=True)[0]
def _embed_phi_nodes(self):
texts = [f"{n.name}: {n.description} | tags: {', '.join(n.tags)}" for n in self.phi_nodes]
embs = self.embedder.encode(texts, convert_to_numpy=True)
for node, emb in zip(self.phi_nodes, embs):
node.embedding = emb
print(f"✅ Embedded {len(self.phi_nodes)} Φ-nodes.", flush=True)
def _dominant_frequency(self, vec: np.ndarray) -> float:
v = np.asarray(vec, dtype=float).ravel()
if len(v) < 4:
return 0.0
spectrum = np.fft.rfft(v)
power = np.abs(spectrum) ** 2
freqs = np.fft.rfftfreq(len(v), d=1.0)
idx = int(np.argmax(power))
return float(freqs[idx])
def _phi_omega(self, energy: float, dom_freq: float) -> Tuple[float, float]:
phi = 1.0 - math.exp(-float(energy)) # saturating
omega = math.tanh(dom_freq * 10.0) # saturating
return float(phi), float(omega)
def _closest_phi_node(self, vec: np.ndarray) -> Tuple[str, float]:
if not self.phi_nodes or self.phi_nodes[0].embedding is None:
return "unknown", 0.0
v = np.asarray(vec, dtype=float).ravel()
v_norm = np.linalg.norm(v) + 1e-9
best_name, best_cos = "unknown", -1.0
for node in self.phi_nodes:
e = node.embedding
if e is None:
continue
cos = float(np.dot(v, e) / (v_norm * (np.linalg.norm(e) + 1e-9)))
if cos > best_cos:
best_cos = cos
best_name = node.name
return best_name, best_cos
def analyze(self, text: str, context_label: str = "query") -> Dict[str, Any]:
vec = self._embed_text(text)
energy = float(np.dot(vec, vec))
dom_freq = self._dominant_frequency(vec)
phi, omega = self._phi_omega(energy, dom_freq)
if self.coherence_model is not None:
S_RRF, C_RRF = self.coherence_model.compute(vec)
else:
S_RRF, C_RRF = 0.0, 0.0
coherence = 0.5 * float(S_RRF) + 0.5 * float(C_RRF)
closest_name, closest_cos = self._closest_phi_node(vec)
return {
"context": context_label,
"phi": phi,
"omega": omega,
"coherence": float(coherence),
"S_RRF": float(S_RRF),
"C_RRF": float(C_RRF),
"hamiltonian_energy": float(energy),
"dominant_frequency": float(dom_freq),
"closest_phi_node": closest_name,
"closest_phi_cos": float(closest_cos),
"timestamp": float(time.time()),
}
agirrf_core = AGIRRFCore(
phi_nodes=PHI_NODES,
coherence_model=coherence_model,
st_model_name=ENCODER_MODEL_ID,
)
# ======================================================
# 8) Load Meta-Logit (15D)
# ======================================================
print("🔄 Loading meta-logit...", flush=True)
meta_logit_path = _hf_download_safe(
repo_id=META_LOGIT_REPO,
filename=META_LOGIT_FILENAME,
token=HF_TOKEN if HF_TOKEN else None,
)
if not meta_logit_path:
raise RuntimeError(
f"Meta-logit not available. Check repo_id={META_LOGIT_REPO}, "
f"filename={META_LOGIT_FILENAME}, and HF_TOKEN if private."
)
meta_logit = joblib.load(meta_logit_path)
EXPECTED_FEATURES = getattr(meta_logit, "n_features_in_", 15)
if EXPECTED_FEATURES != 15:
raise RuntimeError(f"Meta-logit expects {EXPECTED_FEATURES} features, expected 15.")
print("✅ Meta-logit ready (15D)", flush=True)
# ======================================================
# 9) Feature mapping (7 + one-hot 8 = 15)
# ======================================================
def rrf_state_to_features(state: Dict[str, Any]) -> np.ndarray:
phi = float(state.get("phi", 0.0))
omega = float(state.get("omega", 0.0))
coh = float(state.get("coherence", 0.0))
S_RRF = float(state.get("S_RRF", 0.0))
C_RRF = float(state.get("C_RRF", 0.0))
E_H = float(state.get("hamiltonian_energy", 0.0))
dom_f = float(state.get("dominant_frequency", 0.0))
phi_name = state.get("closest_phi_node", "unknown")
phi_onehot = np.zeros(len(PHI_NODES), dtype=float)
idx = PHI_NAME_TO_IDX.get(phi_name)
if idx is not None:
phi_onehot[idx] = 1.0
base = np.array([phi, omega, coh, S_RRF, C_RRF, E_H, dom_f], dtype=float)
return np.concatenate([base, phi_onehot], axis=0)
# ======================================================
# 10) Core scoring (prompt, answer)
# ======================================================
def _embed_norm(text: str) -> np.ndarray:
return agirrf_core.embedder.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
def compute_scores(prompt: str, answer: str) -> Dict[str, Any]:
prompt = prompt or ""
answer = answer or ""
if not prompt.strip() or not answer.strip():
raise ValueError("Empty prompt/answer")
if len(prompt) > MAX_PROMPT_CHARS or len(answer) > MAX_ANSWER_CHARS:
raise HTTPException(status_code=413, detail="Payload too large")
# extra signal: cosine(prompt, answer)
e_p = _embed_norm(prompt)
e_a = _embed_norm(answer)
cosine = float(np.dot(e_p, e_a))
# stable single-state features on combined QA text
qa_text = f"Q: {prompt}\nA: {answer}"
state = agirrf_core.analyze(qa_text, context_label="qa")
feats = rrf_state_to_features(state).reshape(1, -1)
p_good = float(meta_logit.predict_proba(feats)[0][1])
SRRF = p_good
CRRF = p_good * cosine
E_phi = 0.5 * (p_good + abs(cosine))
return {
"p_good": p_good,
"SRRF": SRRF,
"CRRF": CRRF,
"E_phi": E_phi,
"cosine": cosine,
# debug/state exposure (key for Savant)
"phi": float(state["phi"]),
"omega": float(state["omega"]),
"coherence": float(state["coherence"]),
"S_RRF": float(state["S_RRF"]),
"C_RRF": float(state["C_RRF"]),
"hamiltonian_energy": float(state["hamiltonian_energy"]),
"dominant_frequency": float(state["dominant_frequency"]),
"closest_phi_node": state["closest_phi_node"],
"closest_phi_cos": float(state["closest_phi_cos"]),
}
# ======================================================
# 11) FastAPI models
# ======================================================
class EvaluateRequest(BaseModel):
model_config = ConfigDict(protected_namespaces=())
prompt: str
answer: str
model_label: Optional[str] = None # reserved for future routing
class EvaluateResponse(BaseModel):
scores: Dict[str, Any]
manifest_version: str
class PredictRequest(BaseModel):
features: List[float] = Field(..., min_length=15, max_length=15)
class PredictResponse(BaseModel):
p_good: float
class RerankRequest(BaseModel):
query: str
documents: List[str]
alpha: float = 0.2 # kept for compatibility (not used in cosine rerank)
class RerankDocument(BaseModel):
id: int
score: float
rank: int
class RerankResponse(BaseModel):
model_config = ConfigDict(protected_namespaces=())
model_id: str
results: List[RerankDocument]
# ======================================================
# 12) FastAPI app
# ======================================================
app = FastAPI(
title="Savant RRF Φ12.0 API",
version="1.2.1",
description="AGIRRFCore-aligned Meta-Logic, Reranking & Quality Evaluation",
)
# --------------------------
# Root (avoid 404 in Spaces)
# --------------------------
@app.get("/")
def root():
return {
"status": "ok",
"project": manifest_data.get("project"),
"version": manifest_data.get("version"),
"model": "RRFSavantMetaLogicV2",
"docs": "/docs",
"endpoints": ["/manifest", "/health", "/evaluate", "/predict", "/v1/rerank"],
}
# --------------------------
# Manifest (no naming clash)
# --------------------------
@app.get("/manifest")
def get_manifest():
return {
"model": "RRFSavantMetaLogicV2",
"version": manifest_data.get("version"),
"encoder": ENCODER_MODEL_ID,
"meta_logit": f"{META_LOGIT_REPO}/{META_LOGIT_FILENAME}",
"features": 15,
"phi_nodes": phi_nodes_public(),
"limits": {
"MAX_PROMPT_CHARS": MAX_PROMPT_CHARS,
"MAX_ANSWER_CHARS": MAX_ANSWER_CHARS,
"MAX_DOCS": MAX_DOCS,
"MAX_DOC_CHARS": MAX_DOC_CHARS,
}
}
@app.get("/health")
def health():
return {
"status": "ok",
"encoder_loaded": True,
"meta_logit_loaded": True,
"cnn_loaded": savant_cnn is not None,
"rrf_nodes_loaded": rrf_nodes is not None,
"manifest_version": manifest_data.get("version"),
"phi_nodes": len(PHI_NODES),
"device": str(device),
}
@app.post("/evaluate", response_model=EvaluateResponse)
def evaluate(req: EvaluateRequest):
try:
scores = compute_scores(req.prompt, req.answer)
return EvaluateResponse(scores=scores, manifest_version=str(manifest_data.get("version")))
except HTTPException:
raise
except Exception as e:
print(f"[Evaluate] Error: {e}", flush=True)
raise HTTPException(status_code=500, detail="Evaluation failed")
@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
try:
x = np.array([req.features], dtype=float)
p_good = float(meta_logit.predict_proba(x)[0][1])
return PredictResponse(p_good=p_good)
except Exception as e:
print(f"[Predict] Error: {e}", flush=True)
raise HTTPException(status_code=500, detail="Predict failed")
@app.post("/v1/rerank", response_model=RerankResponse)
def rerank(req: RerankRequest):
try:
if not req.query or not req.query.strip():
raise HTTPException(status_code=400, detail="query is empty")
if len(req.documents) > MAX_DOCS:
raise HTTPException(status_code=413, detail="Too many documents")
for d in req.documents:
if len(d) > MAX_DOC_CHARS:
raise HTTPException(status_code=413, detail="Document too large")
texts = [req.query] + req.documents
embs = agirrf_core.embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
q_emb = embs[0]
d_embs = embs[1:]
scores = (d_embs @ q_emb).astype(float).tolist()
results = [{"id": i, "score": float(s)} for i, s in enumerate(scores)]
results.sort(key=lambda x: x["score"], reverse=True)
ranked = [RerankDocument(id=r["id"], score=r["score"], rank=i + 1) for i, r in enumerate(results)]
return RerankResponse(model_id=ENCODER_MODEL_ID, results=ranked)
except HTTPException:
raise
except Exception as e:
print(f"[Rerank] Error: {e}", flush=True)
raise HTTPException(status_code=500, detail="Rerank failed")
|