Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,7 @@ import joblib
|
|
| 18 |
|
| 19 |
import torch
|
| 20 |
import torch.nn as nn
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# ============================
|
|
@@ -32,7 +33,8 @@ META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogit"
|
|
| 32 |
META_LOGIT_FILENAME = "logreg_rrf_savant_15.joblib" # versión 15 features
|
| 33 |
|
| 34 |
# Dataset central con TODOS los artefactos RRF/Savant
|
| 35 |
-
RRF_DATASET_REPO
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
def hf_data_path(filename: str) -> str:
|
|
@@ -113,7 +115,7 @@ PHYS_ADJ_13 = safe_hf("adjacency_13.csv")
|
|
| 113 |
|
| 114 |
|
| 115 |
# ============================
|
| 116 |
-
# Savant CNN + nodos RRF (demo
|
| 117 |
# ============================
|
| 118 |
|
| 119 |
class SavantCNN(nn.Module):
|
|
@@ -534,8 +536,6 @@ def apply_role_profile(scores: Dict[str, float], role_name: Optional[str]) -> Di
|
|
| 534 |
# RRF Tutor: carga de dataset savant_rrf1_curated
|
| 535 |
# ============================
|
| 536 |
|
| 537 |
-
from datasets import load_dataset
|
| 538 |
-
|
| 539 |
print(f"🔄 [Startup] Cargando dataset para RRF Tutor: {RRF_TUTOR_DATASET_ID}...", flush=True)
|
| 540 |
ds_rrf = None
|
| 541 |
rrf_corpus_texts: List[str] = []
|
|
@@ -590,6 +590,56 @@ except Exception as e:
|
|
| 590 |
print("⚠️ [RRF Tutor] Endpoint /v1/rrf_tutor devolverá error 503 si se usa.", flush=True)
|
| 591 |
|
| 592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
# ============================
|
| 594 |
# FastAPI models
|
| 595 |
# ============================
|
|
@@ -847,10 +897,13 @@ def rrf_tutor_endpoint(body: RRFTutorRequest):
|
|
| 847 |
if not body.query or not body.query.strip():
|
| 848 |
raise HTTPException(status_code=400, detail="El campo 'query' no puede estar vacío.")
|
| 849 |
|
| 850 |
-
if
|
| 851 |
raise HTTPException(
|
| 852 |
-
status_code=
|
| 853 |
-
detail=
|
|
|
|
|
|
|
|
|
|
| 854 |
)
|
| 855 |
|
| 856 |
try:
|
|
|
|
| 18 |
|
| 19 |
import torch
|
| 20 |
import torch.nn as nn
|
| 21 |
+
from datasets import load_dataset
|
| 22 |
|
| 23 |
|
| 24 |
# ============================
|
|
|
|
| 33 |
META_LOGIT_FILENAME = "logreg_rrf_savant_15.joblib" # versión 15 features
|
| 34 |
|
| 35 |
# Dataset central con TODOS los artefactos RRF/Savant
|
| 36 |
+
RRF_DATASET_REPO = "antonypamo/savant_rrf1_curated"
|
| 37 |
+
RRF_TUTOR_DATASET_ID = RRF_DATASET_REPO # mismo repo para Tutor
|
| 38 |
|
| 39 |
|
| 40 |
def hf_data_path(filename: str) -> str:
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
# ============================
|
| 118 |
+
# Savant CNN + nodos RRF (demo interna)
|
| 119 |
# ============================
|
| 120 |
|
| 121 |
class SavantCNN(nn.Module):
|
|
|
|
| 536 |
# RRF Tutor: carga de dataset savant_rrf1_curated
|
| 537 |
# ============================
|
| 538 |
|
|
|
|
|
|
|
| 539 |
print(f"🔄 [Startup] Cargando dataset para RRF Tutor: {RRF_TUTOR_DATASET_ID}...", flush=True)
|
| 540 |
ds_rrf = None
|
| 541 |
rrf_corpus_texts: List[str] = []
|
|
|
|
| 590 |
print("⚠️ [RRF Tutor] Endpoint /v1/rrf_tutor devolverá error 503 si se usa.", flush=True)
|
| 591 |
|
| 592 |
|
| 593 |
+
def rrf_tutor_retrieve_examples(query: str, top_k: int = 3):
|
| 594 |
+
"""
|
| 595 |
+
Recupera los ejemplos más similares desde savant_rrf1_curated
|
| 596 |
+
usando embeddings del encoder RRF.
|
| 597 |
+
"""
|
| 598 |
+
if (not rrf_tutor_ready) or rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
|
| 599 |
+
raise RuntimeError("Embeddings de RRF Tutor no están disponibles.")
|
| 600 |
+
|
| 601 |
+
q_emb = encoder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
|
| 602 |
+
sims = np.dot(rrf_corpus_embeds, q_emb)
|
| 603 |
+
|
| 604 |
+
top_k = min(top_k, len(rrf_corpus_embeds))
|
| 605 |
+
top_idx = np.argsort(-sims)[:top_k]
|
| 606 |
+
|
| 607 |
+
results = []
|
| 608 |
+
for idx in top_idx:
|
| 609 |
+
results.append(
|
| 610 |
+
{
|
| 611 |
+
"idx": int(idx),
|
| 612 |
+
"score": float(sims[idx]),
|
| 613 |
+
"prompt": rrf_corpus_prompts[idx],
|
| 614 |
+
"completion": rrf_corpus_completions[idx],
|
| 615 |
+
}
|
| 616 |
+
)
|
| 617 |
+
return results
|
| 618 |
+
|
| 619 |
+
|
| 620 |
+
def rrf_tutor_build_answer(query: str, retrieved_examples):
|
| 621 |
+
"""
|
| 622 |
+
Construye una respuesta simple basada en el mejor ejemplo del corpus.
|
| 623 |
+
"""
|
| 624 |
+
if not retrieved_examples:
|
| 625 |
+
return (
|
| 626 |
+
"No encontré ejemplos relevantes en el dataset RRF Tutor para tu consulta. "
|
| 627 |
+
"Verifica que antonypamo/savant_rrf1_curated contenga 'prompt' y 'completion'."
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
best = retrieved_examples[0]
|
| 631 |
+
base_completion = best["completion"]
|
| 632 |
+
|
| 633 |
+
answer = (
|
| 634 |
+
"🔎 Respuesta basada en el ejemplo más cercano del corpus RRF:\n\n"
|
| 635 |
+
f"{base_completion}\n\n"
|
| 636 |
+
"💡 Nota: Esta es una versión mínima que reutiliza directamente la 'completion' "
|
| 637 |
+
"del ejemplo más similar en savant_rrf1_curated. En una versión extendida, aquí "
|
| 638 |
+
"se conectaría un LLM pequeño que combine varios ejemplos como contexto."
|
| 639 |
+
)
|
| 640 |
+
return answer
|
| 641 |
+
|
| 642 |
+
|
| 643 |
# ============================
|
| 644 |
# FastAPI models
|
| 645 |
# ============================
|
|
|
|
| 897 |
if not body.query or not body.query.strip():
|
| 898 |
raise HTTPException(status_code=400, detail="El campo 'query' no puede estar vacío.")
|
| 899 |
|
| 900 |
+
if not rrf_tutor_ready:
|
| 901 |
raise HTTPException(
|
| 902 |
+
status_code=503,
|
| 903 |
+
detail=(
|
| 904 |
+
"RRF Tutor no está listo: embeddings no cargados. "
|
| 905 |
+
"Verifica el dataset antonypamo/savant_rrf1_curated y reinicia el Space."
|
| 906 |
+
),
|
| 907 |
)
|
| 908 |
|
| 909 |
try:
|