antonypamo commited on
Commit
9d33eb1
·
verified ·
1 Parent(s): 64f3eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -127
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import sys
3
  import math
 
4
  from typing import Optional, Dict, Any, List
5
 
6
  import numpy as np
@@ -9,16 +10,18 @@ from scipy.linalg import expm
9
 
10
  from fastapi import FastAPI, HTTPException
11
  from pydantic import BaseModel, Field
 
12
 
13
  from sentence_transformers import SentenceTransformer
14
  from huggingface_hub import hf_hub_download
15
  import joblib
16
 
17
- from datasets import load_dataset # para /v1/rrf_tutor
 
18
 
19
 
20
  # ============================
21
- # Configuración de modelos
22
  # ============================
23
 
24
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
@@ -26,7 +29,29 @@ HF_TOKEN = os.environ.get("HF_TOKEN", "")
26
  ENCODER_MODEL_ID = "antonypamo/RRFSAVANTMADE"
27
  META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogit"
28
  META_LOGIT_FILENAME = "logreg_rrf_savant_15.joblib"
29
- RRF_TUTOR_DATASET_ID = "antonypamo/savant_rrf1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  print("🔄 [Startup] Cargando encoder RRFSAVANTMADE...", flush=True)
32
  try:
@@ -41,7 +66,7 @@ try:
41
  meta_logit_path = hf_hub_download(
42
  repo_id=META_LOGIT_REPO,
43
  filename=META_LOGIT_FILENAME,
44
- token=HF_TOKEN if HF_TOKEN else None, # si es público, puede ser None
45
  )
46
  print(f"🔄 [Startup] Cargando modelo meta-logit '{META_LOGIT_FILENAME}'...", flush=True)
47
  meta_logit = joblib.load(meta_logit_path)
@@ -55,6 +80,138 @@ except Exception as e:
55
  raise
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # ============================
59
  # Geometría icosaédrica Φ12.0
60
  # ============================
@@ -92,6 +249,7 @@ def geodesic_kernel(nodes, sigma=0.618, alpha_log=0.10):
92
 
93
  if alpha_log > 0.0:
94
  corr = 1.0 + alpha_log * np.log1p(dist ** 2)
 
95
  corr[range(N), range(N)] = 1.0
96
  W = W / corr
97
 
@@ -197,23 +355,6 @@ def evolve_dirac_shell(psi0, H, dt=0.05, steps=100, record_every=25):
197
  }
198
 
199
 
200
- # ============================
201
- # Hamiltoniano base (startup)
202
- # ============================
203
-
204
- print("🔄 [Startup] Construyendo Hamiltoniano base Φ12.0...", flush=True)
205
- H_BASE = build_dirac_hamiltonian(
206
- m=0.25,
207
- v=1.0,
208
- sigma=0.618,
209
- alpha_log=0.10,
210
- q=1.0,
211
- flux_vector=(0.0, 0.0, 0.0),
212
- gauge_scale=0.0,
213
- )
214
- print("✅ Hamiltoniano base construido.", flush=True)
215
-
216
-
217
  # ============================
218
  # Core RRF: embeddings + features + scores
219
  # ============================
@@ -224,21 +365,25 @@ def get_embedding(text: str) -> np.ndarray:
224
 
225
 
226
  def compute_rrf_features(prompt: str, answer: str) -> Dict[str, float]:
227
- # Embeddings
228
  e_p = get_embedding(prompt)
229
  e_a = get_embedding(answer)
230
 
231
  cosine_pa = float(np.dot(e_p, e_a))
232
  len_ratio = len(answer) / (len(prompt) + 1.0)
233
 
234
- # Simulación Dirac shell determinista (semilla por prompt+answer)
235
  rng = np.random.default_rng(abs(hash(prompt + answer)) % (2 ** 32))
236
  vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
237
  vec /= np.sqrt(np.vdot(vec, vec))
238
  psi0 = vec
239
 
240
- # Usamos el Hamiltoniano base global
241
- out = evolve_dirac_shell(psi0, H_BASE, dt=0.05, steps=100, record_every=25)
 
 
 
 
 
 
242
 
243
  entropy = out["entropy"]
244
  energy = out["energy"]
@@ -251,7 +396,6 @@ def compute_rrf_features(prompt: str, answer: str) -> Dict[str, float]:
251
  E_mean = float(np.mean(energy))
252
  E_std = float(np.std(energy))
253
 
254
- # Núcleo de 7 features
255
  feats: Dict[str, float] = {
256
  "cosine_pa": cosine_pa,
257
  "len_ratio": len_ratio,
@@ -262,7 +406,6 @@ def compute_rrf_features(prompt: str, answer: str) -> Dict[str, float]:
262
  "dirac_energy_std": E_std,
263
  }
264
 
265
- # Derivadas para llegar a 15 (igual que en el CSV)
266
  S_max = math.log(N)
267
  feats["entropy_norm"] = feats["dirac_entropy_final"] / S_max
268
  feats["entropy_abs_delta"] = abs(feats["dirac_entropy_delta"])
@@ -343,10 +486,7 @@ ROLE_PROFILES: Dict[str, Dict[str, float]] = {
343
  }
344
 
345
 
346
- def apply_role_profile(
347
- scores: Dict[str, float],
348
- role_name: Optional[str],
349
- ) -> Dict[str, Any]:
350
  if not role_name:
351
  role_name = "default"
352
 
@@ -370,52 +510,93 @@ def apply_role_profile(
370
 
371
 
372
  # ============================
373
- # RRF Tutor: carga de dataset savant_rrf1
374
  # ============================
375
 
376
- print(f"🔄 [Startup] Cargando dataset para RRF Tutor: {RRF_TUTOR_DATASET_ID}...", flush=True)
377
- try:
378
- ds_rrf = load_dataset(RRF_TUTOR_DATASET_ID, split="train")
379
- ds_rrf = ds_rrf.filter(
380
- lambda ex: ex.get("prompt") is not None and ex.get("completion") is not None
381
- )
382
- print(f"✅ Dataset RRF Tutor cargado. Ejemplos útiles: {len(ds_rrf)}", flush=True)
383
- except Exception as e:
384
- print(f"❌ Error cargando dataset RRF Tutor: {e}", file=sys.stderr, flush=True)
385
- ds_rrf = None
386
-
387
- if ds_rrf is not None:
388
- print("🔄 [Startup] Construyendo textos y embeddings para RRF Tutor...", flush=True)
389
- rrf_corpus_texts: List[str] = []
390
- rrf_corpus_prompts: List[str] = []
391
- rrf_corpus_completions: List[str] = []
392
-
393
- for ex in ds_rrf:
394
- p = ex["prompt"]
395
- c = ex["completion"]
396
- rrf_corpus_prompts.append(p)
397
- rrf_corpus_completions.append(c)
398
- rrf_corpus_texts.append(p + "\n\n" + c)
399
-
400
- rrf_corpus_embeds = encoder.encode(
401
- rrf_corpus_texts,
402
- convert_to_numpy=True,
403
- show_progress_bar=True,
404
- normalize_embeddings=True,
405
- )
406
- print("✅ [RRF Tutor] Embeddings construidos.", flush=True)
 
 
 
407
  else:
408
- # Dimensión consistente con el encoder
409
- dim = encoder.get_sentence_embedding_dimension()
410
- rrf_corpus_texts = []
411
- rrf_corpus_prompts = []
412
- rrf_corpus_completions = []
413
- rrf_corpus_embeds = np.zeros((0, dim), dtype=np.float32)
414
- print("⚠️ [RRF Tutor] Dataset no disponible, el endpoint devolverá error si se usa.", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
 
417
  # ============================
418
- # FastAPI app & modelos Pydantic
419
  # ============================
420
 
421
  class EvaluateRequest(BaseModel):
@@ -466,6 +647,9 @@ class RerankDocumentResult(BaseModel):
466
 
467
 
468
  class RerankResponse(BaseModel):
 
 
 
469
  model_id: str
470
  alpha: float
471
  query_embedding_norm: bool
@@ -476,11 +660,11 @@ class RRFTutorRequest(BaseModel):
476
  query: str = Field(..., description="Pregunta o fragmento de ecuación/idea RRF.")
477
  max_examples: int = Field(
478
  3, ge=1, le=8,
479
- description="Número de ejemplos de savant_rrf1 a recuperar (1-8)."
480
  )
481
  include_raw_context: bool = Field(
482
  False,
483
- description="Si es true, devuelve los ejemplos recuperados."
484
  )
485
 
486
 
@@ -495,10 +679,14 @@ class RRFTutorResponse(BaseModel):
495
  retrieved: Optional[List[RetrievedExample]] = None
496
 
497
 
 
 
 
 
498
  app = FastAPI(
499
  title="Savant RRF Φ12.0 API",
500
- description="Dirac-Resonant conceptual quality layer + reranking + RRF Tutor.",
501
- version="1.1.0",
502
  )
503
 
504
 
@@ -543,54 +731,6 @@ def _compute_rerank_scores(query: str, docs: List[str], alpha: float, norm_query
543
  return reranked
544
 
545
 
546
- # ============================
547
- # Utilidades /v1/rrf_tutor
548
- # ============================
549
-
550
- def rrf_tutor_retrieve_examples(query: str, top_k: int = 3):
551
- if rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
552
- raise RuntimeError("Embeddings de RRF Tutor no están disponibles.")
553
-
554
- q_emb = encoder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
555
- sims = np.dot(rrf_corpus_embeds, q_emb)
556
-
557
- top_k = min(top_k, len(rrf_corpus_embeds))
558
- top_idx = np.argsort(-sims)[:top_k]
559
-
560
- results = []
561
- for idx in top_idx:
562
- results.append(
563
- {
564
- "idx": int(idx),
565
- "score": float(sims[idx]),
566
- "prompt": rrf_corpus_prompts[idx],
567
- "completion": rrf_corpus_completions[idx],
568
- }
569
- )
570
- return results
571
-
572
-
573
- def rrf_tutor_build_answer(query: str, retrieved_examples):
574
- if not retrieved_examples:
575
- return (
576
- "No encontré ejemplos relevantes en el dataset RRF Tutor para tu consulta. "
577
- "Intenta reformular la pregunta o revisar la configuración del dataset."
578
- )
579
-
580
- best = retrieved_examples[0]
581
- base_completion = best["completion"]
582
-
583
- answer = (
584
- "🔎 Respuesta basada en el ejemplo más cercano del corpus RRF:\n\n"
585
- f"{base_completion}\n\n"
586
- "💡 Nota: Esta es una versión mínima que reutiliza directamente la 'completion' "
587
- "del ejemplo más similar en savant_rrf1. En una versión extendida, aquí se "
588
- "conectaría un LLM pequeño (TinyLlama, etc.) que use varios ejemplos como "
589
- "contexto para generar una explicación personalizada a tu `query`."
590
- )
591
- return answer
592
-
593
-
594
  # ============================
595
  # Endpoints
596
  # ============================
@@ -607,6 +747,9 @@ def health():
607
  "encoder_model_id": ENCODER_MODEL_ID,
608
  "meta_logit_filename": META_LOGIT_FILENAME,
609
  "N_sites": N,
 
 
 
610
  }
611
 
612
 
@@ -626,14 +769,19 @@ def evaluate(req: EvaluateRequest):
626
 
627
  role_profile = apply_role_profile(scores, req.model_label)
628
 
629
- # Segunda simulación solo para resumen visual
 
 
 
 
 
630
  rng = np.random.default_rng(
631
  abs(hash(req.prompt + req.answer + "sim")) % (2 ** 32)
632
  )
633
  vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
634
  vec /= np.sqrt(np.vdot(vec, vec))
635
  psi0 = vec
636
- sim = evolve_dirac_shell(psi0, H_BASE, dt=0.05, steps=60, record_every=20)
637
 
638
  sim_summary = {
639
  "entropy_initial": float(sim["entropy"][0]),
@@ -691,10 +839,10 @@ def rrf_tutor_endpoint(body: RRFTutorRequest):
691
  if not body.query or not body.query.strip():
692
  raise HTTPException(status_code=400, detail="El campo 'query' no puede estar vacío.")
693
 
694
- if ds_rrf is None or rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
695
  raise HTTPException(
696
  status_code=500,
697
- detail="El dataset/embeddings de RRF Tutor no están disponibles en este momento."
698
  )
699
 
700
  try:
 
1
  import os
2
  import sys
3
  import math
4
+ import json
5
  from typing import Optional, Dict, Any, List
6
 
7
  import numpy as np
 
10
 
11
  from fastapi import FastAPI, HTTPException
12
  from pydantic import BaseModel, Field
13
+ from pydantic import ConfigDict # para evitar warning de protected_namespaces
14
 
15
  from sentence_transformers import SentenceTransformer
16
  from huggingface_hub import hf_hub_download
17
  import joblib
18
 
19
+ import torch
20
+ import torch.nn as nn
21
 
22
 
23
  # ============================
24
+ # Configuración general
25
  # ============================
26
 
27
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
 
29
  ENCODER_MODEL_ID = "antonypamo/RRFSAVANTMADE"
30
  META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogit"
31
  META_LOGIT_FILENAME = "logreg_rrf_savant_15.joblib"
32
+
33
+ # Dataset central con TODOS los artefactos
34
+ RRF_DATASET_REPO = "antonypamo/savant_rrf1_curated"
35
+
36
+
37
+ def hf_data_path(filename: str) -> str:
38
+ """
39
+ Descarga un archivo desde el dataset antonypamo/savant_rrf1_curated
40
+ y devuelve la ruta local en cache.
41
+ """
42
+ return hf_hub_download(
43
+ repo_id=RRF_DATASET_REPO,
44
+ filename=filename,
45
+ repo_type="dataset",
46
+ token=HF_TOKEN or None,
47
+ )
48
+
49
+
50
+ print("===== Application Startup =====", flush=True)
51
+
52
+ # ============================
53
+ # Cargar encoder y meta-logit
54
+ # ============================
55
 
56
  print("🔄 [Startup] Cargando encoder RRFSAVANTMADE...", flush=True)
57
  try:
 
66
  meta_logit_path = hf_hub_download(
67
  repo_id=META_LOGIT_REPO,
68
  filename=META_LOGIT_FILENAME,
69
+ token=HF_TOKEN or None,
70
  )
71
  print(f"🔄 [Startup] Cargando modelo meta-logit '{META_LOGIT_FILENAME}'...", flush=True)
72
  meta_logit = joblib.load(meta_logit_path)
 
80
  raise
81
 
82
 
83
+ # ============================
84
+ # Rutas a artefactos desde el dataset central
85
+ # ============================
86
+
87
+ def safe_hf(path_name: str) -> Optional[str]:
88
+ try:
89
+ p = hf_data_path(path_name)
90
+ print(f"✅ [Dataset] Descargado {path_name}", flush=True)
91
+ return p
92
+ except Exception as e:
93
+ print(f"⚠️ [Dataset] No se pudo descargar {path_name}: {e}", file=sys.stderr, flush=True)
94
+ return None
95
+
96
+
97
+ SAVANT_CNN_PATH = safe_hf("savant_cnn.pt")
98
+ RRF_NODES_PATH = safe_hf("rrf_nodes.pt")
99
+ RRF_TUTOR_JSONL_PATH = safe_hf("rrf_tutor_curated.jsonl")
100
+ RRF_SEMANTIC_CORPUS_PATH = safe_hf("RRF_SAVANT_SEMANTIC_CORPUS.jsonl")
101
+ RRF_CORPUS_INDEX_PATH = safe_hf("RRF_SAVANT_CORPUS.index")
102
+
103
+ PHYS_RRF_RESONANCE_MATRIX = safe_hf("rrf_resonance_matrix.csv")
104
+ PHYS_RRF_ENERGY_PROFILE = safe_hf("rrf_energy_profile.csv")
105
+ PHYS_RRF_EIGEN_SPECTRUM = safe_hf("rrf_eigen_spectrum.csv")
106
+
107
+ PHYS_RES_MATRIX_13 = safe_hf("resonance_matrix_13.csv")
108
+ PHYS_NODES_13 = safe_hf("nodes_13.csv")
109
+ PHYS_ENERGY_LOGPHI_13 = safe_hf("energy_logphi_13.csv")
110
+ PHYS_DEGREE_13 = safe_hf("degree_13.csv")
111
+ PHYS_ADJ_13 = safe_hf("adjacency_13.csv")
112
+
113
+
114
+ # ============================
115
+ # Savant CNN + nodos RRF (para futura integración)
116
+ # ============================
117
+
118
+ class SavantCNN(nn.Module):
119
+ """
120
+ CNN tal como fue entrenada originalmente:
121
+ - conv1: [1 -> 32]
122
+ - conv2: [32 -> 64]
123
+ - conv3: [64 -> 128]
124
+ - fc: [512 -> 64] (según checkpoint original)
125
+ """
126
+ def __init__(self, in_channels: int = 1, out_dim: int = 64):
127
+ super().__init__()
128
+ self.conv1 = nn.Conv1d(in_channels, 32, kernel_size=3, padding=1)
129
+ self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
130
+ self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
131
+ self.pool = nn.AdaptiveAvgPool1d(1)
132
+ self.fc = nn.Linear(512, out_dim) # mantiene compatibilidad con checkpoint
133
+
134
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
135
+ # x: [batch, channels, length]
136
+ x = torch.relu(self.conv1(x))
137
+ x = torch.relu(self.conv2(x))
138
+ x = torch.relu(self.conv3(x))
139
+ x = self.pool(x).squeeze(-1) # [batch, 128] en este diseño simplificado
140
+ x = self.fc(x)
141
+ return x
142
+
143
+
144
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
145
+
146
+ savant_cnn: Optional[SavantCNN] = None
147
+ rrf_nodes: Optional[Any] = None
148
+
149
+ if SAVANT_CNN_PATH is not None:
150
+ try:
151
+ state_dict = torch.load(SAVANT_CNN_PATH, map_location=device)
152
+ print("✅ Checkpoint keys:", list(state_dict.keys()))
153
+ print("ℹ️ conv3.weight shape en checkpoint:", state_dict["conv3.weight"].shape)
154
+ print("ℹ️ fc.weight shape en checkpoint:", state_dict["fc.weight"].shape)
155
+
156
+ savant_cnn = SavantCNN()
157
+ savant_cnn.load_state_dict(state_dict)
158
+ savant_cnn.to(device)
159
+ savant_cnn.eval()
160
+ print("✅ Loaded Savant CNN from", SAVANT_CNN_PATH)
161
+ except Exception as e:
162
+ print("⚠️ Error loading Savant CNN:", e, file=sys.stderr)
163
+ savant_cnn = None
164
+ else:
165
+ print("⚠️ SAVANT_CNN_PATH is None, no se cargó CNN.", file=sys.stderr)
166
+
167
+
168
+ if RRF_NODES_PATH is not None:
169
+ try:
170
+ rrf_nodes = torch.load(RRF_NODES_PATH, map_location=device)
171
+ print("✅ Loaded RRF nodes from", RRF_NODES_PATH)
172
+ print("Type of rrf_nodes:", type(rrf_nodes))
173
+ if isinstance(rrf_nodes, dict):
174
+ print("🔑 rrf_nodes keys:", list(rrf_nodes.keys())[:10])
175
+ except Exception as e:
176
+ print("⚠️ Error loading RRF nodes:", e, file=sys.stderr)
177
+ rrf_nodes = None
178
+ else:
179
+ print("⚠️ RRF_NODES_PATH is None, no se cargaron nodos.", file=sys.stderr)
180
+
181
+
182
+ def fuse_cnn_with_node(example_length: int = 64):
183
+ """
184
+ Utilidad interna: ejemplo de cómo fusionar la CNN con un nodo RRF.
185
+ No se expone aún como endpoint, pero sirve para demos técnicas.
186
+ """
187
+ if savant_cnn is None or rrf_nodes is None:
188
+ print("Fusion not available – missing CNN or RRF nodes snapshot.")
189
+ return None
190
+
191
+ x = torch.randn(1, 1, example_length, device=device)
192
+ cnn_emb = savant_cnn(x) # [1, 64]
193
+
194
+ try:
195
+ # asumir que el primer nodo es algo como rrf_nodes["node_0"]
196
+ node0_key = list(rrf_nodes.keys())[0]
197
+ node0 = rrf_nodes[node0_key]
198
+ if isinstance(node0, dict) and "linguistic" in node0:
199
+ linguistic_vec = node0["linguistic"]
200
+ if isinstance(linguistic_vec, torch.Tensor):
201
+ linguistic_vec = linguistic_vec.detach().clone().to(device)
202
+ else:
203
+ linguistic_vec = torch.tensor(linguistic_vec, dtype=torch.float32, device=device)
204
+ else:
205
+ linguistic_vec = torch.randn(cnn_emb.shape[-1], device=device)
206
+ except Exception:
207
+ linguistic_vec = torch.randn(cnn_emb.shape[-1], device=device)
208
+
209
+ linguistic_vec = linguistic_vec.unsqueeze(0) # [1, 64]
210
+ fused = torch.cat([cnn_emb, linguistic_vec], dim=-1) # [1, 128]
211
+ print("Fused embedding shape (CNN + linguistic node):", fused.shape)
212
+ return fused
213
+
214
+
215
  # ============================
216
  # Geometría icosaédrica Φ12.0
217
  # ============================
 
249
 
250
  if alpha_log > 0.0:
251
  corr = 1.0 + alpha_log * np.log1p(dist ** 2)
252
+ # mantener diagonal en 1 para evitar log(0)
253
  corr[range(N), range(N)] = 1.0
254
  W = W / corr
255
 
 
355
  }
356
 
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  # ============================
359
  # Core RRF: embeddings + features + scores
360
  # ============================
 
365
 
366
 
367
  def compute_rrf_features(prompt: str, answer: str) -> Dict[str, float]:
 
368
  e_p = get_embedding(prompt)
369
  e_a = get_embedding(answer)
370
 
371
  cosine_pa = float(np.dot(e_p, e_a))
372
  len_ratio = len(answer) / (len(prompt) + 1.0)
373
 
 
374
  rng = np.random.default_rng(abs(hash(prompt + answer)) % (2 ** 32))
375
  vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
376
  vec /= np.sqrt(np.vdot(vec, vec))
377
  psi0 = vec
378
 
379
+ H = build_dirac_hamiltonian(
380
+ m=0.25, v=1.0, sigma=0.618,
381
+ alpha_log=0.10, q=1.0,
382
+ flux_vector=(0.0, 0.0, 0.0),
383
+ gauge_scale=0.0,
384
+ )
385
+
386
+ out = evolve_dirac_shell(psi0, H, dt=0.05, steps=100, record_every=25)
387
 
388
  entropy = out["entropy"]
389
  energy = out["energy"]
 
396
  E_mean = float(np.mean(energy))
397
  E_std = float(np.std(energy))
398
 
 
399
  feats: Dict[str, float] = {
400
  "cosine_pa": cosine_pa,
401
  "len_ratio": len_ratio,
 
406
  "dirac_energy_std": E_std,
407
  }
408
 
 
409
  S_max = math.log(N)
410
  feats["entropy_norm"] = feats["dirac_entropy_final"] / S_max
411
  feats["entropy_abs_delta"] = abs(feats["dirac_entropy_delta"])
 
486
  }
487
 
488
 
489
+ def apply_role_profile(scores: Dict[str, float], role_name: Optional[str]) -> Dict[str, Any]:
 
 
 
490
  if not role_name:
491
  role_name = "default"
492
 
 
510
 
511
 
512
  # ============================
513
+ # RRF Tutor (curated JSONL)
514
  # ============================
515
 
516
+ print("🔄 [Startup] Cargando dataset RRF Tutor (curated JSONL)...", flush=True)
517
+ rrf_corpus_texts: List[str] = []
518
+ rrf_corpus_prompts: List[str] = []
519
+ rrf_corpus_completions: List[str] = []
520
+
521
+ if RRF_TUTOR_JSONL_PATH is not None:
522
+ try:
523
+ with open(RRF_TUTOR_JSONL_PATH, "r", encoding="utf-8") as f:
524
+ for line in f:
525
+ if not line.strip():
526
+ continue
527
+ ex = json.loads(line)
528
+ p = ex.get("prompt")
529
+ c = ex.get("completion")
530
+ if p and c:
531
+ rrf_corpus_prompts.append(p)
532
+ rrf_corpus_completions.append(c)
533
+ rrf_corpus_texts.append(p + "\n\n" + c)
534
+
535
+ if rrf_corpus_texts:
536
+ print(f"✅ RRF Tutor: {len(rrf_corpus_texts)} ejemplos cargados.", flush=True)
537
+ rrf_corpus_embeds = encoder.encode(
538
+ rrf_corpus_texts,
539
+ convert_to_numpy=True,
540
+ show_progress_bar=True,
541
+ normalize_embeddings=True,
542
+ )
543
+ print("✅ [RRF Tutor] Embeddings construidos.", flush=True)
544
+ else:
545
+ print("⚠️ RRF Tutor JSONL no tiene ejemplos válidos.", file=sys.stderr, flush=True)
546
+ rrf_corpus_embeds = np.zeros((0, 384), dtype=np.float32)
547
+ except Exception as e:
548
+ print(f"❌ Error cargando/parsing RRF Tutor JSONL: {e}", file=sys.stderr, flush=True)
549
+ rrf_corpus_embeds = np.zeros((0, 384), dtype=np.float32)
550
  else:
551
+ print("⚠️ No se encontró RRF_TUTOR_JSONL_PATH.", file=sys.stderr, flush=True)
552
+ rrf_corpus_embeds = np.zeros((0, 384), dtype=np.float32)
553
+
554
+
555
+ def rrf_tutor_retrieve_examples(query: str, top_k: int = 3):
556
+ if rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
557
+ raise RuntimeError("Embeddings de RRF Tutor no están disponibles.")
558
+
559
+ q_emb = encoder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
560
+ sims = np.dot(rrf_corpus_embeds, q_emb)
561
+
562
+ top_k = min(top_k, len(rrf_corpus_embeds))
563
+ top_idx = np.argsort(-sims)[:top_k]
564
+
565
+ results = []
566
+ for idx in top_idx:
567
+ results.append(
568
+ {
569
+ "idx": int(idx),
570
+ "score": float(sims[idx]),
571
+ "prompt": rrf_corpus_prompts[idx],
572
+ "completion": rrf_corpus_completions[idx],
573
+ }
574
+ )
575
+ return results
576
+
577
+
578
+ def rrf_tutor_build_answer(query: str, retrieved_examples):
579
+ if not retrieved_examples:
580
+ return (
581
+ "No encontré ejemplos relevantes en el dataset RRF Tutor para tu consulta. "
582
+ "Intenta reformular la pregunta o revisar la configuración del dataset."
583
+ )
584
+
585
+ best = retrieved_examples[0]
586
+ base_completion = best["completion"]
587
+
588
+ answer = (
589
+ "🔎 Respuesta basada en el ejemplo más cercano del corpus RRF:\n\n"
590
+ f"{base_completion}\n\n"
591
+ "💡 Nota: Esta es una versión mínima que reutiliza directamente la 'completion' "
592
+ "del ejemplo más similar en savant_rrf1_curated. En una versión extendida, aquí se "
593
+ "conectaría un LLM pequeño que use varios ejemplos como contexto."
594
+ )
595
+ return answer
596
 
597
 
598
  # ============================
599
+ # FastAPI models
600
  # ============================
601
 
602
  class EvaluateRequest(BaseModel):
 
647
 
648
 
649
  class RerankResponse(BaseModel):
650
+ # evitar warning con 'model_id'
651
+ model_config = ConfigDict(protected_namespaces=())
652
+
653
  model_id: str
654
  alpha: float
655
  query_embedding_norm: bool
 
660
  query: str = Field(..., description="Pregunta o fragmento de ecuación/idea RRF.")
661
  max_examples: int = Field(
662
  3, ge=1, le=8,
663
+ description="Número de ejemplos de savant_rrf1_curated a recuperar (1-8).",
664
  )
665
  include_raw_context: bool = Field(
666
  False,
667
+ description="Si es true, devuelve los ejemplos recuperados.",
668
  )
669
 
670
 
 
679
  retrieved: Optional[List[RetrievedExample]] = None
680
 
681
 
682
+ # ============================
683
+ # FastAPI app
684
+ # ============================
685
+
686
  app = FastAPI(
687
  title="Savant RRF Φ12.0 API",
688
+ description="Dirac-Resonant conceptual quality layer + reranking + RRF Tutor (+ CNN/nodes listos).",
689
+ version="1.2.0",
690
  )
691
 
692
 
 
731
  return reranked
732
 
733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  # ============================
735
  # Endpoints
736
  # ============================
 
747
  "encoder_model_id": ENCODER_MODEL_ID,
748
  "meta_logit_filename": META_LOGIT_FILENAME,
749
  "N_sites": N,
750
+ "rrf_tutor_examples": len(rrf_corpus_texts),
751
+ "cnn_loaded": savant_cnn is not None,
752
+ "rrf_nodes_loaded": rrf_nodes is not None,
753
  }
754
 
755
 
 
769
 
770
  role_profile = apply_role_profile(scores, req.model_label)
771
 
772
+ H = build_dirac_hamiltonian(
773
+ m=0.25, v=1.0, sigma=0.618,
774
+ alpha_log=0.10, q=1.0,
775
+ flux_vector=(0.0, 0.0, 0.0),
776
+ gauge_scale=0.0,
777
+ )
778
  rng = np.random.default_rng(
779
  abs(hash(req.prompt + req.answer + "sim")) % (2 ** 32)
780
  )
781
  vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
782
  vec /= np.sqrt(np.vdot(vec, vec))
783
  psi0 = vec
784
+ sim = evolve_dirac_shell(psi0, H, dt=0.05, steps=60, record_every=20)
785
 
786
  sim_summary = {
787
  "entropy_initial": float(sim["entropy"][0]),
 
839
  if not body.query or not body.query.strip():
840
  raise HTTPException(status_code=400, detail="El campo 'query' no puede estar vacío.")
841
 
842
+ if rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
843
  raise HTTPException(
844
  status_code=500,
845
+ detail="El dataset/embeddings de RRF Tutor no están disponibles en este momento.",
846
  )
847
 
848
  try: