antonypamo commited on
Commit
11c4398
·
verified ·
1 Parent(s): a4b0b5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +490 -99
app.py CHANGED
@@ -1,44 +1,63 @@
1
  import os
 
 
 
 
2
  import numpy as np
3
  from numpy.linalg import norm
4
  from scipy.linalg import expm
5
- from sentence_transformers import SentenceTransformer
6
- from huggingface_hub import hf_hub_download
7
- import joblib
8
 
9
- from fastapi import FastAPI
10
  from pydantic import BaseModel, Field
11
- from typing import Optional, Dict, Any, List
12
 
13
- # NOTE: HF_TOKEN is expected to be set as an environment variable in a real deployment
14
- # For local testing, you might set it here or pass it directly
15
- HF_TOKEN = os.environ.get("HF_TOKEN", "") # Use environment variable, default to empty
16
- os.environ["HF_TOKEN"] = HF_TOKEN
17
 
18
- ENCODER_MODEL_ID = "antonypamo/RRFSAVANTMADE" # encoder RRF
19
- META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogit" # repo del meta-logit
20
- META_LOGIT_FILENAME = "logreg_rrf_savant.joblib" # NUEVO archivo del meta-logit en HF
21
 
22
- print("🔄 Cargando encoder RRFSAVANTMADE...")
23
- encoder = SentenceTransformer(ENCODER_MODEL_ID)
24
 
25
- print("🔄 Descargando meta-logit v2 desde HF Hub...")
26
- meta_logit_path = hf_hub_download(
27
- repo_id=META_LOGIT_REPO,
28
- filename=META_LOGIT_FILENAME,
29
- token=os.environ.get("HF_TOKEN")
30
- )
31
 
32
- print("🔄 Cargando modelo meta-logit v2...")
33
- meta_logit = joblib.load(meta_logit_path)
34
 
35
- print("✅ Encoder y meta-logit v2 cargados correctamente.")
 
 
 
36
 
 
 
 
 
 
 
 
37
 
38
- # =========================
39
- # Geometría icosaédrica
40
- # (Copied from cell lyVrwdhgIOlq)
41
- # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  phi = (1 + np.sqrt(5)) / 2
44
  nodes = np.array([
@@ -49,28 +68,30 @@ nodes = np.array([
49
  nodes /= norm(nodes, axis=1, keepdims=True)
50
  N = nodes.shape[0] # 12 nodos
51
 
52
- # Pauli
53
  sigma_x = np.array([[0, 1], [1, 0]], dtype=complex)
54
  sigma_y = np.array([[0, -1j], [1j, 0]], dtype=complex)
55
  sigma_z = np.array([[1, 0], [0, -1]], dtype=complex)
56
 
 
57
  def kron_IN(M, N_sites):
58
  return np.kron(M, np.eye(N_sites, dtype=complex))
59
 
 
60
  def site_op(block_2x2, i, j, N_sites):
61
  K = np.zeros((N_sites, N_sites), dtype=complex)
62
  K[i, j] = 1.0
63
  return np.kron(K, block_2x2)
64
 
 
65
  def geodesic_kernel(nodes, sigma=0.618, alpha_log=0.10):
66
  diff = nodes[:, None, :] - nodes[None, :, :]
67
  dist = norm(diff, axis=-1)
68
 
69
- W = np.exp(-(dist**2) / (sigma**2))
70
  np.fill_diagonal(W, 0.0)
71
 
72
  if alpha_log > 0.0:
73
- corr = 1.0 + alpha_log * np.log1p(dist**2)
74
  corr[range(N), range(N)] = 1.0
75
  W = W / corr
76
 
@@ -78,6 +99,7 @@ def geodesic_kernel(nodes, sigma=0.618, alpha_log=0.10):
78
  row_sums[row_sums == 0] = 1.0
79
  return W / row_sums
80
 
 
81
  def u1_edge_phases(nodes, flux_vector=(0.0, 0.0, 0.0), q=1.0, gauge_scale=1.0):
82
  A = gauge_scale * np.asarray(flux_vector, dtype=float)
83
  midpoints = (nodes[:, None, :] + nodes[None, :, :]) / 2.0
@@ -85,6 +107,7 @@ def u1_edge_phases(nodes, flux_vector=(0.0, 0.0, 0.0), q=1.0, gauge_scale=1.0):
85
  theta = 0.5 * (theta - theta.T)
86
  return theta * q
87
 
 
88
  def build_dirac_hamiltonian(
89
  m=0.25,
90
  v=1.0,
@@ -92,7 +115,7 @@ def build_dirac_hamiltonian(
92
  alpha_log=0.10,
93
  q=1.0,
94
  flux_vector=(0.0, 0.0, 0.0),
95
- gauge_scale=0.0
96
  ):
97
  W = geodesic_kernel(nodes, sigma=sigma, alpha_log=alpha_log)
98
 
@@ -103,10 +126,8 @@ def build_dirac_hamiltonian(
103
  else:
104
  U = np.ones((N, N), dtype=complex)
105
 
106
- # Término de masa
107
  H = np.kron(np.eye(N, dtype=complex), m * sigma_z)
108
 
109
- # Término cinético acoplado
110
  diff = nodes[:, None, :] - nodes[None, :, :]
111
  dist = norm(diff, axis=-1) + 1e-12
112
  d_hat = diff / dist[..., None]
@@ -121,28 +142,32 @@ def build_dirac_hamiltonian(
121
  nvec[2] * sigma_z)
122
  H += v * W[i, j] * U[i, j] * site_op(S, i, j, N)
123
 
124
- # Hermitizar por seguridad numérica
125
  H = 0.5 * (H + H.conj().T)
126
  return H
127
 
 
128
  def site_probs(psi):
129
  N2 = psi.shape[0]
130
  n = N2 // 2
131
  psi_mat = psi.reshape(n, 2)
132
- return np.sum(np.abs(psi_mat)**2, axis=1).real
 
133
 
134
  def chirality(psi):
135
  S = kron_IN(sigma_z, N)
136
  return float(np.vdot(psi, S @ psi).real)
137
 
 
138
  def energy_expectation(psi, H):
139
  return float(np.vdot(psi, H @ psi).real)
140
 
 
141
  def spatial_entropy(p):
142
  p = np.clip(p, 1e-12, 1.0)
143
  return float(-np.sum(p * np.log(p)).real)
144
 
145
- def evolve_dirac_shell(psi0, H, dt=0.05, steps=200, record_every=20):
 
146
  U = expm(-1j * dt * H)
147
  psi = psi0.copy()
148
 
@@ -172,52 +197,51 @@ def evolve_dirac_shell(psi0, H, dt=0.05, steps=200, record_every=20):
172
  }
173
 
174
 
175
- # =========================
176
- # Feature extraction and scoring
177
- # (Copied from cell DiknqWJZIZ5q)
178
- # =========================
179
 
180
  def get_embedding(text: str) -> np.ndarray:
181
  emb = encoder.encode([text], convert_to_numpy=True, normalize_embeddings=True)
182
  return emb[0]
183
 
184
- def compute_rrf_features(prompt: str, answer: str) -> dict:
185
- # Embeddings RRF
 
186
  e_p = get_embedding(prompt)
187
  e_a = get_embedding(answer)
188
 
189
  cosine_pa = float(np.dot(e_p, e_a))
190
  len_ratio = len(answer) / (len(prompt) + 1.0)
191
 
192
- # Estado inicial ligado al texto (seed reproducible)
193
- rng = np.random.default_rng(abs(hash(prompt + answer)) % (2**32))
194
- vec = rng.normal(0, 1, (2*N,)) + 1j * rng.normal(0, 1, (2*N,))
195
  vec /= np.sqrt(np.vdot(vec, vec))
196
  psi0 = vec
197
 
198
- # Hamiltoniano Dirac Φ12.0
199
  H = build_dirac_hamiltonian(
200
  m=0.25, v=1.0, sigma=0.618,
201
  alpha_log=0.10, q=1.0,
202
  flux_vector=(0.0, 0.0, 0.0),
203
- gauge_scale=0.0
204
  )
205
 
206
- out = evolve_dirac_shell(psi0, H, dt=0.05, steps=200, record_every=20)
207
 
208
- probs = out["probs"]
209
  energy = out["energy"]
210
  chir = out["chirality"]
211
- entropy = out["entropy"]
212
 
213
- S_initial = float(entropy[0])
214
  S_final = float(entropy[-1])
 
215
  S_delta = S_final - S_initial
216
  C_final = float(chir[-1])
217
  E_mean = float(np.mean(energy))
218
  E_std = float(np.std(energy))
219
 
220
- return {
 
221
  "cosine_pa": cosine_pa,
222
  "len_ratio": len_ratio,
223
  "dirac_entropy_final": S_final,
@@ -227,7 +251,21 @@ def compute_rrf_features(prompt: str, answer: str) -> dict:
227
  "dirac_energy_std": E_std,
228
  }
229
 
230
- def features_to_vector(feats: dict) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  keys = [
232
  "cosine_pa",
233
  "len_ratio",
@@ -236,24 +274,30 @@ def features_to_vector(feats: dict) -> np.ndarray:
236
  "dirac_chirality_final",
237
  "dirac_energy_mean",
238
  "dirac_energy_std",
 
 
 
 
 
 
 
 
239
  ]
240
  return np.array([feats[k] for k in keys], dtype=float)
241
 
242
- def compute_scores_srff_crrf_ephi(prompt: str, answer: str):
 
243
  feats = compute_rrf_features(prompt, answer)
244
  x = features_to_vector(feats).reshape(1, -1)
245
 
246
- # meta-logit v2: pipeline (scaler + logistic regression)
247
  proba = meta_logit.predict_proba(x)[0]
248
  p_good = float(proba[1])
249
 
250
  SRRF = p_good
251
  CRRF = p_good * feats["cosine_pa"]
252
 
253
- S_final = feats["dirac_entropy_final"]
254
- S_max = np.log(N)
255
- norm_entropy = float(S_final / S_max)
256
-
257
  E_phi = 0.5 * (SRRF + norm_entropy)
258
 
259
  scores = {
@@ -265,59 +309,406 @@ def compute_scores_srff_crrf_ephi(prompt: str, answer: str):
265
  return scores, feats
266
 
267
 
268
- # =========================
269
- # FastAPI App
270
- # (Copied from cell LwlyX4-LIgKK)
271
- # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- app = FastAPI(
274
- title="Savant RRF Φ12.0 API",
275
- description="Evaluación conceptual resonante para texto generado por LLMs (SRRF / CRRF / E_phi).",
276
- version="1.0.0",
277
- )
278
 
279
- class EvaluateRequest(BaseModel):
280
- prompt: str = Field(..., description="Pregunta / instrucción original.")
281
- answer: str = Field(..., description="Respuesta generada por un LLM.")
282
- model_label: Optional[str] = Field(
283
- None, description="Etiqueta opcional del modelo que generó la respuesta."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  class EvaluateResponse(BaseModel):
287
  scores: Dict[str, float]
288
  features: Dict[str, float]
289
  sim_summary: Dict[str, Any]
 
290
 
291
- @app.post("/evaluate", response_model=EvaluateResponse)
292
- def evaluate_endpoint(req: EvaluateRequest):
293
- scores, feats = compute_scores_srff_crrf_ephi(req.prompt, req.answer)
294
 
295
- # mini-sim extra para resumen diagnóstico simple
296
- H = build_dirac_hamiltonian(
297
- m=0.25, v=1.0, sigma=0.618,
298
- alpha_log=0.10, q=1.0,
299
- flux_vector=(0.0, 0.0, 0.0),
300
- gauge_scale=0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  )
302
- rng = np.random.default_rng(abs(hash(req.prompt + req.answer)) % (2**32))
303
- vec = rng.normal(0, 1, (2*N,)) + 1j * rng.normal(0, 1, (2*N,))
304
- vec /= np.sqrt(np.vdot(vec, vec))
305
- psi0 = vec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- sim = evolve_dirac_shell(psi0, H, dt=0.05, steps=100, record_every=25)
308
 
309
- sim_summary = {
310
- "entropy_initial": float(sim["entropy"][0]),
311
- "entropy_final": float(sim["entropy"][-1]),
312
- "chirality_initial": float(sim["chirality"][0]),
313
- "chirality_final": float(sim["chirality"][-1]),
314
- "energy_mean": float(np.mean(sim["energy"])),
315
- "energy_std": float(np.std(sim["energy"])),
316
- "N_sites": int(N),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  }
318
 
319
- return EvaluateResponse(
320
- scores=scores,
321
- features=feats,
322
- sim_summary=sim_summary,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import sys
3
+ import math
4
+ from typing import Optional, Dict, Any, List
5
+
6
  import numpy as np
7
  from numpy.linalg import norm
8
  from scipy.linalg import expm
 
 
 
9
 
10
+ from fastapi import FastAPI, HTTPException
11
  from pydantic import BaseModel, Field
 
12
 
13
+ from sentence_transformers import SentenceTransformer
14
+ from huggingface_hub import hf_hub_download
15
+ import joblib
 
16
 
17
+ from datasets import load_dataset # 🔹 para /v1/rrf_tutor
 
 
18
 
 
 
19
 
20
+ # ============================
21
+ # Configuración de modelos
22
+ # ============================
 
 
 
23
 
24
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
 
25
 
26
+ ENCODER_MODEL_ID = "antonypamo/RRFSAVANTMADE"
27
+ META_LOGIT_REPO = "antonypamo/RRFSavantMetaLogit"
28
+ META_LOGIT_FILENAME = "logreg_rrf_savant_15.joblib"
29
+ RRF_TUTOR_DATASET_ID = "antonypamo/savant_rrf1"
30
 
31
+ print("🔄 [Startup] Cargando encoder RRFSAVANTMADE...", flush=True)
32
+ try:
33
+ encoder = SentenceTransformer(ENCODER_MODEL_ID)
34
+ print("✅ [Startup] Encoder cargado.", flush=True)
35
+ except Exception as e:
36
+ print(f"❌ [Startup] Error al cargar encoder: {e}", file=sys.stderr, flush=True)
37
+ raise
38
 
39
+ print("🔄 [Startup] Descargando meta-logit desde HF Hub...", flush=True)
40
+ try:
41
+ meta_logit_path = hf_hub_download(
42
+ repo_id=META_LOGIT_REPO,
43
+ filename=META_LOGIT_FILENAME,
44
+ token=HF_TOKEN if HF_TOKEN else None, # si es público, puede ser None
45
+ )
46
+ print(f"🔄 [Startup] Cargando modelo meta-logit '{META_LOGIT_FILENAME}'...", flush=True)
47
+ meta_logit = joblib.load(meta_logit_path)
48
+ try:
49
+ print(f"🔎 [Startup] Meta-logit espera {meta_logit.n_features_in_} features.", flush=True)
50
+ except Exception:
51
+ print("⚠️ [Startup] No se pudo leer n_features_in_.", flush=True)
52
+ print("✅ [Startup] Meta-logit cargado.", flush=True)
53
+ except Exception as e:
54
+ print(f"❌ [Startup] Error al cargar meta-logit: {e}", file=sys.stderr, flush=True)
55
+ raise
56
+
57
+
58
+ # ============================
59
+ # Geometría icosaédrica Φ12.0
60
+ # ============================
61
 
62
  phi = (1 + np.sqrt(5)) / 2
63
  nodes = np.array([
 
68
  nodes /= norm(nodes, axis=1, keepdims=True)
69
  N = nodes.shape[0] # 12 nodos
70
 
 
71
  sigma_x = np.array([[0, 1], [1, 0]], dtype=complex)
72
  sigma_y = np.array([[0, -1j], [1j, 0]], dtype=complex)
73
  sigma_z = np.array([[1, 0], [0, -1]], dtype=complex)
74
 
75
+
76
  def kron_IN(M, N_sites):
77
  return np.kron(M, np.eye(N_sites, dtype=complex))
78
 
79
+
80
  def site_op(block_2x2, i, j, N_sites):
81
  K = np.zeros((N_sites, N_sites), dtype=complex)
82
  K[i, j] = 1.0
83
  return np.kron(K, block_2x2)
84
 
85
+
86
  def geodesic_kernel(nodes, sigma=0.618, alpha_log=0.10):
87
  diff = nodes[:, None, :] - nodes[None, :, :]
88
  dist = norm(diff, axis=-1)
89
 
90
+ W = np.exp(-(dist ** 2) / (sigma ** 2))
91
  np.fill_diagonal(W, 0.0)
92
 
93
  if alpha_log > 0.0:
94
+ corr = 1.0 + alpha_log * np.log1p(dist ** 2)
95
  corr[range(N), range(N)] = 1.0
96
  W = W / corr
97
 
 
99
  row_sums[row_sums == 0] = 1.0
100
  return W / row_sums
101
 
102
+
103
  def u1_edge_phases(nodes, flux_vector=(0.0, 0.0, 0.0), q=1.0, gauge_scale=1.0):
104
  A = gauge_scale * np.asarray(flux_vector, dtype=float)
105
  midpoints = (nodes[:, None, :] + nodes[None, :, :]) / 2.0
 
107
  theta = 0.5 * (theta - theta.T)
108
  return theta * q
109
 
110
+
111
  def build_dirac_hamiltonian(
112
  m=0.25,
113
  v=1.0,
 
115
  alpha_log=0.10,
116
  q=1.0,
117
  flux_vector=(0.0, 0.0, 0.0),
118
+ gauge_scale=0.0,
119
  ):
120
  W = geodesic_kernel(nodes, sigma=sigma, alpha_log=alpha_log)
121
 
 
126
  else:
127
  U = np.ones((N, N), dtype=complex)
128
 
 
129
  H = np.kron(np.eye(N, dtype=complex), m * sigma_z)
130
 
 
131
  diff = nodes[:, None, :] - nodes[None, :, :]
132
  dist = norm(diff, axis=-1) + 1e-12
133
  d_hat = diff / dist[..., None]
 
142
  nvec[2] * sigma_z)
143
  H += v * W[i, j] * U[i, j] * site_op(S, i, j, N)
144
 
 
145
  H = 0.5 * (H + H.conj().T)
146
  return H
147
 
148
+
149
  def site_probs(psi):
150
  N2 = psi.shape[0]
151
  n = N2 // 2
152
  psi_mat = psi.reshape(n, 2)
153
+ return np.sum(np.abs(psi_mat) ** 2, axis=1).real
154
+
155
 
156
  def chirality(psi):
157
  S = kron_IN(sigma_z, N)
158
  return float(np.vdot(psi, S @ psi).real)
159
 
160
+
161
  def energy_expectation(psi, H):
162
  return float(np.vdot(psi, H @ psi).real)
163
 
164
+
165
  def spatial_entropy(p):
166
  p = np.clip(p, 1e-12, 1.0)
167
  return float(-np.sum(p * np.log(p)).real)
168
 
169
+
170
+ def evolve_dirac_shell(psi0, H, dt=0.05, steps=100, record_every=25):
171
  U = expm(-1j * dt * H)
172
  psi = psi0.copy()
173
 
 
197
  }
198
 
199
 
200
+ # ============================
201
+ # Core RRF: embeddings + features + scores
202
+ # ============================
 
203
 
204
  def get_embedding(text: str) -> np.ndarray:
205
  emb = encoder.encode([text], convert_to_numpy=True, normalize_embeddings=True)
206
  return emb[0]
207
 
208
+
209
+ def compute_rrf_features(prompt: str, answer: str) -> Dict[str, float]:
210
+ # Embeddings
211
  e_p = get_embedding(prompt)
212
  e_a = get_embedding(answer)
213
 
214
  cosine_pa = float(np.dot(e_p, e_a))
215
  len_ratio = len(answer) / (len(prompt) + 1.0)
216
 
217
+ # Simulación Dirac shell determinista (semilla por prompt+answer)
218
+ rng = np.random.default_rng(abs(hash(prompt + answer)) % (2 ** 32))
219
+ vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
220
  vec /= np.sqrt(np.vdot(vec, vec))
221
  psi0 = vec
222
 
 
223
  H = build_dirac_hamiltonian(
224
  m=0.25, v=1.0, sigma=0.618,
225
  alpha_log=0.10, q=1.0,
226
  flux_vector=(0.0, 0.0, 0.0),
227
+ gauge_scale=0.0,
228
  )
229
 
230
+ out = evolve_dirac_shell(psi0, H, dt=0.05, steps=100, record_every=25)
231
 
232
+ entropy = out["entropy"]
233
  energy = out["energy"]
234
  chir = out["chirality"]
 
235
 
 
236
  S_final = float(entropy[-1])
237
+ S_initial = float(entropy[0])
238
  S_delta = S_final - S_initial
239
  C_final = float(chir[-1])
240
  E_mean = float(np.mean(energy))
241
  E_std = float(np.std(energy))
242
 
243
+ # Núcleo de 7 features
244
+ feats: Dict[str, float] = {
245
  "cosine_pa": cosine_pa,
246
  "len_ratio": len_ratio,
247
  "dirac_entropy_final": S_final,
 
251
  "dirac_energy_std": E_std,
252
  }
253
 
254
+ # Derivadas para llegar a 15 (igual que en el CSV)
255
+ S_max = math.log(N)
256
+ feats["entropy_norm"] = feats["dirac_entropy_final"] / S_max
257
+ feats["entropy_abs_delta"] = abs(feats["dirac_entropy_delta"])
258
+ feats["chirality_abs"] = abs(feats["dirac_chirility_final"]) if "dirac_chirility_final" in feats else abs(feats["dirac_chirality_final"])
259
+ feats["energy_abs_mean"] = abs(feats["dirac_energy_mean"])
260
+ feats["energy_std_sq"] = feats["dirac_energy_std"] ** 2
261
+ feats["cosine_sq"] = feats["cosine_pa"] ** 2
262
+ feats["len_log"] = math.log1p(feats["len_ratio"])
263
+ feats["len_inv"] = 1.0 / (1.0 + feats["len_ratio"])
264
+
265
+ return feats
266
+
267
+
268
+ def features_to_vector(feats: Dict[str, float]) -> np.ndarray:
269
  keys = [
270
  "cosine_pa",
271
  "len_ratio",
 
274
  "dirac_chirality_final",
275
  "dirac_energy_mean",
276
  "dirac_energy_std",
277
+ "entropy_norm",
278
+ "entropy_abs_delta",
279
+ "chirality_abs",
280
+ "energy_abs_mean",
281
+ "energy_std_sq",
282
+ "cosine_sq",
283
+ "len_log",
284
+ "len_inv",
285
  ]
286
  return np.array([feats[k] for k in keys], dtype=float)
287
 
288
+
289
+ def compute_scores_srff_crff_ephi(prompt: str, answer: str):
290
  feats = compute_rrf_features(prompt, answer)
291
  x = features_to_vector(feats).reshape(1, -1)
292
 
 
293
  proba = meta_logit.predict_proba(x)[0]
294
  p_good = float(proba[1])
295
 
296
  SRRF = p_good
297
  CRRF = p_good * feats["cosine_pa"]
298
 
299
+ S_max = math.log(N)
300
+ norm_entropy = float(feats["dirac_entropy_final"] / S_max)
 
 
301
  E_phi = 0.5 * (SRRF + norm_entropy)
302
 
303
  scores = {
 
309
  return scores, feats
310
 
311
 
312
+ # ============================
313
+ # Role profiles
314
+ # ============================
315
+
316
+ ROLE_PROFILES: Dict[str, Dict[str, float]] = {
317
+ "default": {
318
+ "SRRF": 1.0,
319
+ "CRRF": 1.0,
320
+ "E_phi": 1.0,
321
+ },
322
+ "creative": {
323
+ "SRRF": 0.5,
324
+ "CRRF": 0.5,
325
+ "E_phi": 1.5,
326
+ },
327
+ "precise": {
328
+ "SRRF": 1.0,
329
+ "CRRF": 1.8,
330
+ "E_phi": 0.4,
331
+ },
332
+ }
333
+
334
+
335
+ def apply_role_profile(
336
+ scores: Dict[str, float],
337
+ role_name: Optional[str],
338
+ ) -> Dict[str, Any]:
339
+ if not role_name:
340
+ role_name = "default"
341
+
342
+ profile = ROLE_PROFILES.get(role_name, ROLE_PROFILES["default"])
343
+
344
+ composite = 0.0
345
+ weight_sum = 0.0
346
+ for key, w in profile.items():
347
+ if key in scores:
348
+ composite += w * scores[key]
349
+ weight_sum += abs(w)
350
+
351
+ if weight_sum > 0.0:
352
+ composite /= weight_sum
353
 
354
+ return {
355
+ "role": role_name,
356
+ "weights": profile,
357
+ "composite_score": composite,
358
+ }
359
 
360
+
361
+ # ============================
362
+ # RRF Tutor: carga de dataset savant_rrf1
363
+ # ============================
364
+
365
+ print(f"🔄 [Startup] Cargando dataset para RRF Tutor: {RRF_TUTOR_DATASET_ID}...", flush=True)
366
+ try:
367
+ ds_rrf = load_dataset(RRF_TUTOR_DATASET_ID, split="train")
368
+ ds_rrf = ds_rrf.filter(
369
+ lambda ex: ex.get("prompt") is not None and ex.get("completion") is not None
370
+ )
371
+ print(f"✅ Dataset RRF Tutor cargado. Ejemplos útiles: {len(ds_rrf)}", flush=True)
372
+ except Exception as e:
373
+ print(f"❌ Error cargando dataset RRF Tutor: {e}", file=sys.stderr, flush=True)
374
+ ds_rrf = None
375
+
376
+ if ds_rrf is not None:
377
+ print("🔄 [Startup] Construyendo textos y embeddings para RRF Tutor...", flush=True)
378
+ rrf_corpus_texts: List[str] = []
379
+ rrf_corpus_prompts: List[str] = []
380
+ rrf_corpus_completions: List[str] = []
381
+
382
+ for ex in ds_rrf:
383
+ p = ex["prompt"]
384
+ c = ex["completion"]
385
+ rrf_corpus_prompts.append(p)
386
+ rrf_corpus_completions.append(c)
387
+ rrf_corpus_texts.append(p + "\n\n" + c)
388
+
389
+ rrf_corpus_embeds = encoder.encode(
390
+ rrf_corpus_texts,
391
+ convert_to_numpy=True,
392
+ show_progress_bar=True,
393
+ normalize_embeddings=True,
394
  )
395
+ print("✅ [RRF Tutor] Embeddings construidos.", flush=True)
396
+ else:
397
+ rrf_corpus_texts = []
398
+ rrf_corpus_prompts = []
399
+ rrf_corpus_completions = []
400
+ rrf_corpus_embeds = np.zeros((0, 384), dtype=np.float32)
401
+ print("⚠️ [RRF Tutor] Dataset no disponible, el endpoint devolverá error si se usa.", flush=True)
402
+
403
+
404
+ # ============================
405
+ # FastAPI app & modelos
406
+ # ============================
407
+
408
+ class EvaluateRequest(BaseModel):
409
+ prompt: str
410
+ answer: str
411
+ model_label: Optional[str] = None
412
+
413
 
414
  class EvaluateResponse(BaseModel):
415
  scores: Dict[str, float]
416
  features: Dict[str, float]
417
  sim_summary: Dict[str, Any]
418
+ role_profile: Optional[Dict[str, Any]] = None
419
 
 
 
 
420
 
421
+ class QualityRemoteRequest(EvaluateRequest):
422
+ pass
423
+
424
+
425
+ class RoleProfileInfo(BaseModel):
426
+ name: str
427
+ weights: Dict[str, float]
428
+
429
+
430
+ class RoleProfilesResponse(BaseModel):
431
+ roles: List[RoleProfileInfo]
432
+
433
+
434
+ class RerankRequest(BaseModel):
435
+ query: str = Field(..., description="Query de búsqueda o pregunta del usuario.")
436
+ documents: List[str] = Field(..., description="Lista de documentos candidatos a rerankear.")
437
+ alpha: float = Field(
438
+ 0.2,
439
+ description="Peso de la corrección log_rdf en el score_final. 0 = solo cosine, 1 = solo log_rdf.",
440
  )
441
+ query_embedding_norm: bool = Field(
442
+ True,
443
+ description="Si True, normaliza el embedding de query (útil para cosine).",
444
+ )
445
+
446
+
447
+ class RerankDocumentResult(BaseModel):
448
+ id: int = Field(..., description="Índice del documento en la lista de entrada.")
449
+ score_cosine: float
450
+ score_log_rdf: float
451
+ score_final: float
452
+ rank: int
453
+
454
+
455
+ class RerankResponse(BaseModel):
456
+ model_id: str
457
+ alpha: float
458
+ query_embedding_norm: bool
459
+ results: List[RerankDocumentResult]
460
+
461
+
462
+ class RRFTutorRequest(BaseModel):
463
+ query: str = Field(..., description="Pregunta o fragmento de ecuación/idea RRF.")
464
+ max_examples: int = Field(
465
+ 3, ge=1, le=8,
466
+ description="Número de ejemplos de savant_rrf1 a recuperar (1-8)."
467
+ )
468
+ include_raw_context: bool = Field(
469
+ False,
470
+ description="Si es true, devuelve los ejemplos recuperados."
471
+ )
472
+
473
+
474
+ class RetrievedExample(BaseModel):
475
+ prompt: str
476
+ completion: str
477
+ score: float
478
 
 
479
 
480
+ class RRFTutorResponse(BaseModel):
481
+ answer: str
482
+ retrieved: Optional[List[RetrievedExample]] = None
483
+
484
+
485
+ app = FastAPI(
486
+ title="Savant RRF Φ12.0 API",
487
+ description="Dirac-Resonant conceptual quality layer + reranking + RRF Tutor.",
488
+ version="1.1.0",
489
+ )
490
+
491
+
492
+ # ============================
493
+ # Utilidades /v1/rerank
494
+ # ============================
495
+
496
+ def _compute_rerank_scores(query: str, docs: List[str], alpha: float, norm_query: bool) -> List[RerankDocumentResult]:
497
+ q_emb = encoder.encode([query], convert_to_numpy=True, normalize_embeddings=norm_query)[0]
498
+
499
+ results = []
500
+ for idx, text in enumerate(docs):
501
+ d_emb = encoder.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
502
+ score_cosine = float(np.dot(q_emb, d_emb))
503
+
504
+ val = max(score_cosine, 0.0) + 1e-6
505
+ score_log_rdf = float(np.log1p(val))
506
+
507
+ score_final = (1.0 - alpha) * score_cosine + alpha * score_log_rdf
508
+
509
+ results.append(
510
+ {
511
+ "id": idx,
512
+ "score_cosine": score_cosine,
513
+ "score_log_rdf": score_log_rdf,
514
+ "score_final": score_final,
515
+ }
516
+ )
517
+
518
+ results_sorted = sorted(results, key=lambda r: r["score_final"], reverse=True)
519
+ reranked = []
520
+ for rank, r in enumerate(results_sorted, start=1):
521
+ reranked.append(
522
+ RerankDocumentResult(
523
+ id=r["id"],
524
+ score_cosine=r["score_cosine"],
525
+ score_log_rdf=r["score_log_rdf"],
526
+ score_final=r["score_final"],
527
+ rank=rank,
528
+ )
529
+ )
530
+ return reranked
531
+
532
+
533
+ # ============================
534
+ # Utilidades /v1/rrf_tutor
535
+ # ============================
536
+
537
+ def rrf_tutor_retrieve_examples(query: str, top_k: int = 3):
538
+ if rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
539
+ raise RuntimeError("Embeddings de RRF Tutor no están disponibles.")
540
+
541
+ q_emb = encoder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
542
+ sims = np.dot(rrf_corpus_embeds, q_emb)
543
+
544
+ top_k = min(top_k, len(rrf_corpus_embeds))
545
+ top_idx = np.argsort(-sims)[:top_k]
546
+
547
+ results = []
548
+ for idx in top_idx:
549
+ results.append(
550
+ {
551
+ "idx": int(idx),
552
+ "score": float(sims[idx]),
553
+ "prompt": rrf_corpus_prompts[idx],
554
+ "completion": rrf_corpus_completions[idx],
555
+ }
556
+ )
557
+ return results
558
+
559
+
560
+ def rrf_tutor_build_answer(query: str, retrieved_examples):
561
+ if not retrieved_examples:
562
+ return (
563
+ "No encontré ejemplos relevantes en el dataset RRF Tutor para tu consulta. "
564
+ "Intenta reformular la pregunta o revisar la configuración del dataset."
565
+ )
566
+
567
+ best = retrieved_examples[0]
568
+ base_completion = best["completion"]
569
+
570
+ answer = (
571
+ "🔎 Respuesta basada en el ejemplo más cercano del corpus RRF:\n\n"
572
+ f"{base_completion}\n\n"
573
+ "💡 Nota: Esta es una versión mínima que reutiliza directamente la 'completion' "
574
+ "del ejemplo más similar en savant_rrf1. En una versión extendida, aquí se "
575
+ "conectaría un LLM pequeño (TinyLlama, etc.) que use varios ejemplos como "
576
+ "contexto para generar una explicación personalizada a tu `query`."
577
+ )
578
+ return answer
579
+
580
+
581
+ # ============================
582
+ # Endpoints
583
+ # ============================
584
+
585
+ @app.get("/")
586
+ def root():
587
+ return {"message": "Savant RRF Φ12.0 API running", "docs": "/docs"}
588
+
589
+
590
+ @app.get("/health")
591
+ def health():
592
+ return {
593
+ "status": "ok",
594
+ "encoder_model_id": ENCODER_MODEL_ID,
595
+ "meta_logit_filename": META_LOGIT_FILENAME,
596
+ "N_sites": N,
597
  }
598
 
599
+
600
+ @app.get("/roles", response_model=RoleProfilesResponse)
601
+ def list_roles():
602
+ roles = [
603
+ RoleProfileInfo(name=name, weights=weights)
604
+ for name, weights in ROLE_PROFILES.items()
605
+ ]
606
+ return RoleProfilesResponse(roles=roles)
607
+
608
+
609
+ @app.post("/evaluate", response_model=EvaluateResponse)
610
+ def evaluate(req: EvaluateRequest):
611
+ try:
612
+ scores, feats = compute_scores_srff_crff_ephi(req.prompt, req.answer)
613
+
614
+ role_profile = apply_role_profile(scores, req.model_label)
615
+
616
+ H = build_dirac_hamiltonian(
617
+ m=0.25, v=1.0, sigma=0.618,
618
+ alpha_log=0.10, q=1.0,
619
+ flux_vector=(0.0, 0.0, 0.0),
620
+ gauge_scale=0.0,
621
+ )
622
+ rng = np.random.default_rng(
623
+ abs(hash(req.prompt + req.answer + "sim")) % (2 ** 32)
624
+ )
625
+ vec = rng.normal(0, 1, (2 * N,)) + 1j * rng.normal(0, 1, (2 * N,))
626
+ vec /= np.sqrt(np.vdot(vec, vec))
627
+ psi0 = vec
628
+ sim = evolve_dirac_shell(psi0, H, dt=0.05, steps=60, record_every=20)
629
+
630
+ sim_summary = {
631
+ "entropy_initial": float(sim["entropy"][0]),
632
+ "entropy_final": float(sim["entropy"][-1]),
633
+ "chirality_initial": float(sim["chirality"][0]),
634
+ "chirality_final": float(sim["chirality"][-1]),
635
+ "energy_mean": float(np.mean(sim["energy"])),
636
+ "energy_std": float(np.std(sim["energy"])),
637
+ "N_sites": int(N),
638
+ }
639
+
640
+ return EvaluateResponse(
641
+ scores=scores,
642
+ features=feats,
643
+ sim_summary=sim_summary,
644
+ role_profile=role_profile,
645
+ )
646
+ except Exception as e:
647
+ print(f"❌ [Runtime] Error en /evaluate: {e}", file=sys.stderr, flush=True)
648
+ raise HTTPException(status_code=500, detail="Internal server error")
649
+
650
+
651
+ @app.post("/quality_remote", response_model=EvaluateResponse)
652
+ def quality_remote(req: QualityRemoteRequest):
653
+ return evaluate(req)
654
+
655
+
656
+ @app.post("/quality", response_model=EvaluateResponse)
657
+ def quality_alias(req: QualityRemoteRequest):
658
+ """
659
+ Alias directo de /evaluate para compatibilidad con clientes previos.
660
+ """
661
+ return evaluate(req)
662
+
663
+
664
+ @app.post("/v1/rerank", response_model=RerankResponse)
665
+ def rerank_endpoint(req: RerankRequest):
666
+ results = _compute_rerank_scores(
667
+ query=req.query,
668
+ docs=req.documents,
669
+ alpha=req.alpha,
670
+ norm_query=req.query_embedding_norm,
671
+ )
672
+
673
+ return RerankResponse(
674
+ model_id=ENCODER_MODEL_ID,
675
+ alpha=req.alpha,
676
+ query_embedding_norm=req.query_embedding_norm,
677
+ results=results,
678
  )
679
+
680
+
681
+ @app.post("/v1/rrf_tutor", response_model=RRFTutorResponse)
682
+ def rrf_tutor_endpoint(body: RRFTutorRequest):
683
+ if not body.query or not body.query.strip():
684
+ raise HTTPException(status_code=400, detail="El campo 'query' no puede estar vacío.")
685
+
686
+ if ds_rrf is None or rrf_corpus_embeds is None or len(rrf_corpus_embeds) == 0:
687
+ raise HTTPException(
688
+ status_code=500,
689
+ detail="El dataset/embeddings de RRF Tutor no están disponibles en este momento."
690
+ )
691
+
692
+ try:
693
+ retrieved = rrf_tutor_retrieve_examples(body.query, top_k=body.max_examples)
694
+ except Exception as e:
695
+ raise HTTPException(
696
+ status_code=500,
697
+ detail=f"Error interno recuperando ejemplos RRF Tutor: {e}",
698
+ )
699
+
700
+ answer = rrf_tutor_build_answer(body.query, retrieved)
701
+
702
+ resp = RRFTutorResponse(answer=answer)
703
+
704
+ if body.include_raw_context:
705
+ resp.retrieved = [
706
+ RetrievedExample(
707
+ prompt=ex["prompt"],
708
+ completion=ex["completion"],
709
+ score=ex["score"],
710
+ )
711
+ for ex in retrieved
712
+ ]
713
+
714
+ return resp