Spaces:
Running
Running
NoeMartinezSanchez commited on
Commit ·
c380cb1
1
Parent(s): 1dd03ac
Correccion de indentacion
Browse files- models/gemma_wrapper.py +1 -5
- rag/optimized_retriever.py +3 -3
models/gemma_wrapper.py
CHANGED
|
@@ -270,7 +270,7 @@ class GemmaWrapper:
|
|
| 270 |
self._clear_cache()
|
| 271 |
return "Lo siento, hubo un problema al generar la respuesta. Por favor, intenta de nuevo."
|
| 272 |
|
| 273 |
-
def generate_with_context(
|
| 274 |
self,
|
| 275 |
context: str,
|
| 276 |
question: str,
|
|
@@ -328,18 +328,14 @@ RESPUESTA:"""
|
|
| 328 |
|
| 329 |
import re
|
| 330 |
|
| 331 |
-
# eliminar basura inicial (símbolos o texto corrupto)
|
| 332 |
text = re.sub(r'^[^a-zA-ZáéíóúÁÉÍÓÚ¿¡]+', '', text)
|
| 333 |
|
| 334 |
-
# eliminar palabras muy cortas al inicio (ej: "U", "O", etc.)
|
| 335 |
words = text.split()
|
| 336 |
if len(words) > 1 and len(words[0]) <= 2:
|
| 337 |
text = ' '.join(words[1:])
|
| 338 |
|
| 339 |
-
# normalizar espacios
|
| 340 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 341 |
|
| 342 |
-
# capitalizar primera letra
|
| 343 |
if text and text[0].islower():
|
| 344 |
text = text[0].upper() + text[1:]
|
| 345 |
|
|
|
|
| 270 |
self._clear_cache()
|
| 271 |
return "Lo siento, hubo un problema al generar la respuesta. Por favor, intenta de nuevo."
|
| 272 |
|
| 273 |
+
def generate_with_context(
|
| 274 |
self,
|
| 275 |
context: str,
|
| 276 |
question: str,
|
|
|
|
| 328 |
|
| 329 |
import re
|
| 330 |
|
|
|
|
| 331 |
text = re.sub(r'^[^a-zA-ZáéíóúÁÉÍÓÚ¿¡]+', '', text)
|
| 332 |
|
|
|
|
| 333 |
words = text.split()
|
| 334 |
if len(words) > 1 and len(words[0]) <= 2:
|
| 335 |
text = ' '.join(words[1:])
|
| 336 |
|
|
|
|
| 337 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 338 |
|
|
|
|
| 339 |
if text and text[0].islower():
|
| 340 |
text = text[0].upper() + text[1:]
|
| 341 |
|
rag/optimized_retriever.py
CHANGED
|
@@ -348,7 +348,7 @@ class OptimizedRetriever:
|
|
| 348 |
"conductas muy graves",
|
| 349 |
"sanciones aplicables",
|
| 350 |
"canales de denuncia"
|
| 351 |
-
|
| 352 |
|
| 353 |
logger.debug(f"Generadas {len(subqueries)} subqueries para consulta compleja")
|
| 354 |
|
|
@@ -356,7 +356,7 @@ class OptimizedRetriever:
|
|
| 356 |
|
| 357 |
# ==================== 6. MÉTODO PRINCIPAL DE BÚSQUEDA ====================
|
| 358 |
|
| 359 |
-
def retrieve(self, query: str, query_embedding: np.ndarray, top_k: int = None) -> List[Dict]:
|
| 360 |
"""
|
| 361 |
Pipeline completo de retrieval optimizado.
|
| 362 |
|
|
@@ -478,6 +478,6 @@ def retrieve(self, query: str, query_embedding: np.ndarray, top_k: int = None) -
|
|
| 478 |
for metadata in self.vs.metadata:
|
| 479 |
for key, value in metadata.items():
|
| 480 |
if isinstance(value, (str, int, float, bool)):
|
| 481 |
-
fields[key].add(str(value)[:50])
|
| 482 |
|
| 483 |
return {k: list(v)[:10] for k, v in fields.items()}
|
|
|
|
| 348 |
"conductas muy graves",
|
| 349 |
"sanciones aplicables",
|
| 350 |
"canales de denuncia"
|
| 351 |
+
])
|
| 352 |
|
| 353 |
logger.debug(f"Generadas {len(subqueries)} subqueries para consulta compleja")
|
| 354 |
|
|
|
|
| 356 |
|
| 357 |
# ==================== 6. MÉTODO PRINCIPAL DE BÚSQUEDA ====================
|
| 358 |
|
| 359 |
+
def retrieve(self, query: str, query_embedding: np.ndarray, top_k: int = None) -> List[Dict]:
|
| 360 |
"""
|
| 361 |
Pipeline completo de retrieval optimizado.
|
| 362 |
|
|
|
|
| 478 |
for metadata in self.vs.metadata:
|
| 479 |
for key, value in metadata.items():
|
| 480 |
if isinstance(value, (str, int, float, bool)):
|
| 481 |
+
fields[key].add(str(value)[:50])
|
| 482 |
|
| 483 |
return {k: list(v)[:10] for k, v in fields.items()}
|