Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -194,6 +194,88 @@ async def list_retrievers(source: str = None) -> dict:
|
|
| 194 |
}
|
| 195 |
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
async def search_tavily(
|
| 199 |
query: str,
|
|
@@ -267,16 +349,18 @@ with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as arxiv_tab:
|
|
| 267 |
],
|
| 268 |
outputs=gr.JSON(label="Resultados de búsqueda"),
|
| 269 |
title="Búsqueda en ArXiv",
|
| 270 |
-
description="Busca artículos académicos en ArXiv por palabras clave."
|
|
|
|
| 271 |
)
|
| 272 |
|
| 273 |
-
with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as
|
| 274 |
retrievers_interface = gr.Interface(
|
| 275 |
fn=list_retrievers,
|
| 276 |
inputs=gr.Textbox(label="Fuente (opcional)", placeholder="Dejar vacío para listar todos"),
|
| 277 |
outputs=gr.JSON(label="Lista de retrievers"),
|
| 278 |
title="Lista de Retrievers",
|
| 279 |
-
description="Muestra los retrievers disponibles, opcionalmente filtrados por fuente."
|
|
|
|
| 280 |
)
|
| 281 |
|
| 282 |
with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as tavily_tab:
|
|
@@ -290,13 +374,35 @@ with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as tavily_tab:
|
|
| 290 |
],
|
| 291 |
outputs=gr.JSON(label="Resultados de Tavily"),
|
| 292 |
title="Búsqueda Web (Tavily)",
|
| 293 |
-
description="Realiza búsquedas en web usando la API de Tavily."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
)
|
| 295 |
|
| 296 |
# Creamos la interfaz con las pestañas separadas
|
| 297 |
demo = gr.TabbedInterface(
|
| 298 |
-
[arxiv_tab,
|
| 299 |
-
["ArXiv", "Retrievers", "
|
| 300 |
)
|
| 301 |
|
| 302 |
demo.launch(mcp_server=True)
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
|
| 197 |
+
def retrieve_docs(
|
| 198 |
+
query: str,
|
| 199 |
+
retrievers: List[str],
|
| 200 |
+
top_k: int = 3
|
| 201 |
+
) -> dict:
|
| 202 |
+
"""
|
| 203 |
+
Realiza búsqueda semántica en documentos indexados.
|
| 204 |
+
|
| 205 |
+
Parámetros:
|
| 206 |
+
query (str): Texto de búsqueda (requerido)
|
| 207 |
+
retrievers (List[str]): Nombres de retrievers a consultar (requerido)
|
| 208 |
+
top_k (int): Número de resultados por retriever (opcional, default=3)
|
| 209 |
+
"""
|
| 210 |
+
print(f"Iniciando búsqueda para query: '{query}'")
|
| 211 |
+
print(f"Parámetros - retrievers: {retrievers}, top_k: {top_k}")
|
| 212 |
+
|
| 213 |
+
results = {}
|
| 214 |
+
invalid = []
|
| 215 |
+
|
| 216 |
+
for name in retrievers:
|
| 217 |
+
if name not in indices:
|
| 218 |
+
print(f"Retriever no encontrado: {name}")
|
| 219 |
+
invalid.append(name)
|
| 220 |
+
continue
|
| 221 |
+
|
| 222 |
+
try:
|
| 223 |
+
print(f"Procesando retriever: {name}")
|
| 224 |
+
retriever = indices[name].as_retriever(similarity_top_k=top_k)
|
| 225 |
+
nodes = retriever.retrieve(query)
|
| 226 |
+
print(f"Retrieved {len(nodes)} documentos de {name}")
|
| 227 |
+
|
| 228 |
+
# 2. Buscar metadatos COMPLETOS
|
| 229 |
+
metadata = {}
|
| 230 |
+
source = "unknown"
|
| 231 |
+
for src, indexes in retrievers_metadata.items():
|
| 232 |
+
if name in indexes:
|
| 233 |
+
metadata = indexes[name]
|
| 234 |
+
source = src
|
| 235 |
+
break
|
| 236 |
+
print(f"Metadatos encontrados para {name}: {metadata.keys()}")
|
| 237 |
+
|
| 238 |
+
# 3. Construir respuesta
|
| 239 |
+
results[name] = {
|
| 240 |
+
"title": metadata.get("title", name),
|
| 241 |
+
"documents": [
|
| 242 |
+
{
|
| 243 |
+
"content": node.get_content(),
|
| 244 |
+
"metadata": node.metadata,
|
| 245 |
+
"score": node.score
|
| 246 |
+
}
|
| 247 |
+
for node in nodes
|
| 248 |
+
],
|
| 249 |
+
"description": metadata.get("description", ""),
|
| 250 |
+
"source": source,
|
| 251 |
+
"last_updated": metadata.get("last_updated", "")
|
| 252 |
+
}
|
| 253 |
+
print(f"Retriever {name} procesado exitosamente")
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
print(f"Error procesando retriever {name}: {str(e)}", exc_info=True)
|
| 257 |
+
results[name] = {
|
| 258 |
+
"error": str(e),
|
| 259 |
+
"retriever": name
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
# Construir respuesta final
|
| 263 |
+
response = {
|
| 264 |
+
"query": query,
|
| 265 |
+
"results": results,
|
| 266 |
+
"top_k": top_k,
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
if invalid:
|
| 270 |
+
print(f"Retrievers inválidos: {invalid}. Opciones válidas: {list(indices.keys())}")
|
| 271 |
+
response["warnings"] = {
|
| 272 |
+
"invalid_retrievers": invalid,
|
| 273 |
+
"valid_options": list(indices.keys())
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
print(f"Búsqueda completada. Total resultados: {len(results)}")
|
| 277 |
+
return response
|
| 278 |
+
|
| 279 |
|
| 280 |
async def search_tavily(
|
| 281 |
query: str,
|
|
|
|
| 349 |
],
|
| 350 |
outputs=gr.JSON(label="Resultados de búsqueda"),
|
| 351 |
title="Búsqueda en ArXiv",
|
| 352 |
+
description="Busca artículos académicos en ArXiv por palabras clave.",
|
| 353 |
+
api_name="_search_arxiv"
|
| 354 |
)
|
| 355 |
|
| 356 |
+
with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as list_retrievers_tab:
|
| 357 |
retrievers_interface = gr.Interface(
|
| 358 |
fn=list_retrievers,
|
| 359 |
inputs=gr.Textbox(label="Fuente (opcional)", placeholder="Dejar vacío para listar todos"),
|
| 360 |
outputs=gr.JSON(label="Lista de retrievers"),
|
| 361 |
title="Lista de Retrievers",
|
| 362 |
+
description="Muestra los retrievers disponibles, opcionalmente filtrados por fuente.",
|
| 363 |
+
api_name="_list_retrievers"
|
| 364 |
)
|
| 365 |
|
| 366 |
with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as tavily_tab:
|
|
|
|
| 374 |
],
|
| 375 |
outputs=gr.JSON(label="Resultados de Tavily"),
|
| 376 |
title="Búsqueda Web (Tavily)",
|
| 377 |
+
description="Realiza búsquedas en web usando la API de Tavily.",
|
| 378 |
+
api_name="_search_tavily"
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as retrieve_tab:
|
| 382 |
+
# Interfaz para retrieve_docs
|
| 383 |
+
retrieve_interface = gr.Interface(
|
| 384 |
+
fn=retrieve_docs,
|
| 385 |
+
inputs=[
|
| 386 |
+
gr.Textbox(label="Consulta", placeholder="Ingrese su pregunta o términos de búsqueda..."),
|
| 387 |
+
gr.Dropdown(
|
| 388 |
+
choices=list(indices.keys()),
|
| 389 |
+
label="Retrievers",
|
| 390 |
+
multiselect=True,
|
| 391 |
+
info="Seleccione uno o más retrievers"
|
| 392 |
+
),
|
| 393 |
+
gr.Slider(1, 10, value=3, step=1, label="Número de resultados por retriever (top_k)")
|
| 394 |
+
],
|
| 395 |
+
outputs=gr.JSON(label="Resultados de búsqueda semántica"),
|
| 396 |
+
title="Búsqueda Semántica en Documentos",
|
| 397 |
+
description="""Realiza búsqueda semántica en documentos indexados usando retrievers.
|
| 398 |
+
Seleccione los retrievers disponibles y ajuste el número de resultados.""",
|
| 399 |
+
api_name="_retrieve"
|
| 400 |
)
|
| 401 |
|
| 402 |
# Creamos la interfaz con las pestañas separadas
|
| 403 |
demo = gr.TabbedInterface(
|
| 404 |
+
[arxiv_tab, tavily_tab, list_retrievers_tab, retrieve_tab],
|
| 405 |
+
["ArXiv", "Tavily", "List Retrievers", "Retrieve"]
|
| 406 |
)
|
| 407 |
|
| 408 |
demo.launch(mcp_server=True)
|