Spaces:

pharma-IA
/

Demo_MCP_Server_MIT

Running

App Files Files Community

geronimo-pericoli commited on May 13, 2025

Commit

97520b6

verified ·

1 Parent(s): db47b33

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -72

app.py CHANGED Viewed

@@ -46,26 +46,26 @@ Settings.embed_model = embed_model
 DOCUMENTS_BASE_PATH = "./"
 RETRIEVERS_JSON_PATH = Path("./retrievers.json")
-# Cargar metadatos
 def load_retrievers_metadata():
     try:
         with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
             return json.load(f)
     except Exception as e:
-        print(f"Error cargando retrievers.json: {str(e)}")
-        print(f"Detalles del error: {traceback.format_exc()}") # Necesitarías importar traceback
         return {}
 retrievers_metadata = load_retrievers_metadata()
 SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
-# Cargar índices
 indices: Dict[str, VectorStoreIndex] = {}
 for source, rel_path in SOURCES.items():
     full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
     if not os.path.exists(full_path):
-        print(f"Advertencia: No se encontró la ruta para {source}")
         continue
     for root, dirs, files in os.walk(full_path):
@@ -75,10 +75,10 @@ for source, rel_path in SOURCES.items():
                 storage_context = StorageContext.from_defaults(persist_dir=storage_path)
                 index_name = os.path.basename(root)
                 indices[index_name] = load_index_from_storage(storage_context) #, index_id="vector_index"
-                print(f"Índice cargado correctamente: {index_name}")
             except Exception as e:
-                print(f"Error cargando índice {index_name}: {str(e)}")
-                print(f"Detalles del error: {traceback.format_exc()}")
@@ -98,26 +98,26 @@ async def search_arxiv(
     max_results: int = 5
 ) -> Dict[str, Any]:
     """
-    Busca artículos académicos en ArXiv.
     Args:
-        query: Términos de búsqueda (ej. "deep learning")
-        max_results: Número máximo de resultados (1-10, default 5)
     Returns:
-        Dict: Resultados de la búsqueda con metadatos de los papers
     """
     try:
-        # Configurar máximo de resultados
         max_results = min(max(1, max_results), 10)
         arxiv_tool.metadata.max_results = max_results
-        # Ejecutar búsqueda y obtener resultados
         tool_output = arxiv_tool(query=query)
-        # Procesar documentos
         papers = []
-        for doc in tool_output.raw_output:  # Acceder correctamente a los documentos
             content = doc.text_resource.text.split('\n')
             papers.append({
                 'title': content[0].split(': ')[1] if ': ' in content[0] else content[0],
@@ -144,19 +144,19 @@ async def search_arxiv(
 async def list_retrievers(source: str = None) -> dict:
     """
-    Devuelve la lista de retrievers disponibles.
-    Si se especifica una source y existe, filtra por ella; si no existe, devuelve todas.
     Args:
-        source (str, optional): Fuente para filtrar. Si no existe, se ignorará. Defaults to None.
     Returns:
         dict: {
-            "retrievers": Lista de retrievers (filtrados o completos),
-            "count": Número total,
             "status": "success"|"error",
-            "source_requested": source,  # Muestra lo que se solicitó
-            "source_used": "all"|source  # Muestra lo que realmente se usó
         }
     """
     try:
@@ -164,7 +164,7 @@ async def list_retrievers(source: str = None) -> dict:
         source_exists = source in retrievers_metadata if source else False
         for current_source, indexes in retrievers_metadata.items():
-            # Solo filtrar si el source existe, sino mostrar todo
             if source_exists and current_source != source:
                 continue
@@ -200,32 +200,32 @@ def retrieve_docs(
     top_k: int = 3
 ) -> dict:
     """
-    Realiza búsqueda semántica en documentos indexados.
-    Parámetros:
-        query (str): Texto de búsqueda (requerido)
-        retrievers (List[str]): Nombres de retrievers a consultar (requerido)
-        top_k (int): Número de resultados por retriever (opcional, default=3)
     """
-    print(f"Iniciando búsqueda para query: '{query}'")
-    print(f"Parámetros - retrievers: {retrievers}, top_k: {top_k}")
     results = {}
     invalid = []
     for name in retrievers:
         if name not in indices:
-            print(f"Retriever no encontrado: {name}")
             invalid.append(name)
             continue
         try:
-            print(f"Procesando retriever: {name}")
             retriever = indices[name].as_retriever(similarity_top_k=top_k)
             nodes = retriever.retrieve(query)
-            print(f"Retrieved {len(nodes)} documentos de {name}")
-            # 2. Buscar metadatos COMPLETOS
             metadata = {}
             source = "unknown"
             for src, indexes in retrievers_metadata.items():
@@ -233,9 +233,9 @@ def retrieve_docs(
                     metadata = indexes[name]
                     source = src
                     break
-            print(f"Metadatos encontrados para {name}: {metadata.keys()}")
-            # 3. Construir respuesta
             results[name] = {
                 "title": metadata.get("title", name),
                 "documents": [
@@ -250,16 +250,16 @@ def retrieve_docs(
                 "source": source,
                 "last_updated": metadata.get("last_updated", "")
             }
-            print(f"Retriever {name} procesado exitosamente")
         except Exception as e:
-            print(f"Error procesando retriever {name}: {str(e)}", exc_info=True)
             results[name] = {
                 "error": str(e),
                 "retriever": name
             }
-    # Construir respuesta final
     response = {
         "query": query,
         "results": results,
@@ -267,13 +267,13 @@ def retrieve_docs(
     }
     if invalid:
-        print(f"Retrievers inválidos: {invalid}. Opciones válidas: {list(indices.keys())}")
         response["warnings"] = {
             "invalid_retrievers": invalid,
             "valid_options": list(indices.keys())
         }
-    print(f"Búsqueda completada. Total resultados: {len(results)}")
     return response
@@ -294,7 +294,7 @@ async def search_tavily(
     Returns:
         dict: Search results from Tavily
     """
-    # Obtener la API key de las variables de entorno
     tavily_api_key = os.environ.get('TAVILY_API_KEY')
     if not tavily_api_key:
         raise ValueError("TAVILY_API_KEY environment variable not set")
@@ -340,66 +340,66 @@ async def search_tavily(
 # Gradio interface
-with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as arxiv_tab:
     arxiv_interface = gr.Interface(
         fn=search_arxiv,
         inputs=[
-            gr.Textbox(label="Términos de búsqueda", placeholder="Ej: deep learning"),
-            gr.Slider(1, 10, value=5, step=1, label="Número máximo de resultados")
         ],
-        outputs=gr.JSON(label="Resultados de búsqueda"),
-        title="Búsqueda en ArXiv",
-        description="Busca artículos académicos en ArXiv por palabras clave.",
         api_name="_search_arxiv"
     )
-with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as list_retrievers_tab:
     retrievers_interface = gr.Interface(
         fn=list_retrievers,
-        inputs=gr.Textbox(label="Fuente (opcional)", placeholder="Dejar vacío para listar todos"),
-        outputs=gr.JSON(label="Lista de retrievers"),
-        title="Lista de Retrievers",
-        description="Muestra los retrievers disponibles, opcionalmente filtrados por fuente.",
         api_name="_list_retrievers"
     )
-with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as tavily_tab:
     tavily_interface = gr.Interface(
         fn=search_tavily,
         inputs=[
-            gr.Textbox(label="Consulta de búsqueda", placeholder="Ej: últimas noticias sobre IA"),
-            gr.Slider(1, 30, value=7, step=1, label="Últimos N días (0 para sin límite)"),
-            gr.Slider(1, 10, value=1, step=1, label="Máximo de resultados"),
-            gr.Checkbox(label="Incluir respuesta directa", value=False)
         ],
-        outputs=gr.JSON(label="Resultados de Tavily"),
-        title="Búsqueda Web (Tavily)",
-        description="Realiza búsquedas en web usando la API de Tavily.",
         api_name="_search_tavily"
     )
-with gr.Blocks(title="Herramientas MCP", theme=gr.themes.Base()) as retrieve_tab:
-    # Interfaz para retrieve_docs
     retrieve_interface = gr.Interface(
         fn=retrieve_docs,
         inputs=[
-            gr.Textbox(label="Consulta", placeholder="Ingrese su pregunta o términos de búsqueda..."),
             gr.Dropdown(
                 choices=list(indices.keys()),
                 label="Retrievers",
                 multiselect=True,
-                info="Seleccione uno o más retrievers"
             ),
-            gr.Slider(1, 10, value=3, step=1, label="Número de resultados por retriever (top_k)")
         ],
-        outputs=gr.JSON(label="Resultados de búsqueda semántica"),
-        title="Búsqueda Semántica en Documentos",
-        description="""Realiza búsqueda semántica en documentos indexados usando retrievers.
-                    Seleccione los retrievers disponibles y ajuste el número de resultados.""",
         api_name="_retrieve"
     )
-# Creamos la interfaz con las pestañas separadas
 demo = gr.TabbedInterface(
     [arxiv_tab, tavily_tab, list_retrievers_tab, retrieve_tab],
     ["ArXiv", "Tavily", "List Retrievers", "Retrieve"]

 DOCUMENTS_BASE_PATH = "./"
 RETRIEVERS_JSON_PATH = Path("./retrievers.json")
+# Load metadata
 def load_retrievers_metadata():
     try:
         with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
             return json.load(f)
     except Exception as e:
+        print(f"Error loading retrievers.json: {str(e)}")
+        print(f"Error details: {traceback.format_exc()}") # You would need to import traceback
         return {}
 retrievers_metadata = load_retrievers_metadata()
 SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
+# Load indexes
 indices: Dict[str, VectorStoreIndex] = {}
 for source, rel_path in SOURCES.items():
     full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
     if not os.path.exists(full_path):
+        print(f"Warning: Path not found for {source}")
         continue
     for root, dirs, files in os.walk(full_path):
                 storage_context = StorageContext.from_defaults(persist_dir=storage_path)
                 index_name = os.path.basename(root)
                 indices[index_name] = load_index_from_storage(storage_context) #, index_id="vector_index"
+                print(f"Index loaded successfully: {index_name}")
             except Exception as e:
+                print(f"Error loading index {index_name}: {str(e)}")
+                print(f"Error details: {traceback.format_exc()}")
     max_results: int = 5
 ) -> Dict[str, Any]:
     """
+    Searches for academic papers on ArXiv.
     Args:
+        query: Search terms (e.g. "deep learning")
+        max_results: Maximum number of results (1-10, default 5)
     Returns:
+        Dict: Search results with paper metadata
     """
     try:
+        # Configure maximum results
         max_results = min(max(1, max_results), 10)
         arxiv_tool.metadata.max_results = max_results
+        # Execute search and get results
         tool_output = arxiv_tool(query=query)
+        # Process documents
         papers = []
+        for doc in tool_output.raw_output:  # Correctly access documents
             content = doc.text_resource.text.split('\n')
             papers.append({
                 'title': content[0].split(': ')[1] if ': ' in content[0] else content[0],
 async def list_retrievers(source: str = None) -> dict:
     """
+    Returns the list of available retrievers.
+    If a source is specified and exists, filters by it; if it doesn't exist, returns all.
     Args:
+        source (str, optional): Source to filter by. If it doesn't exist, it will be ignored. Defaults to None.
     Returns:
         dict: {
+            "retrievers": List of retrievers (filtered or complete),
+            "count": Total count,
             "status": "success"|"error",
+            "source_requested": source,  # Shows what was requested
+            "source_used": "all"|source  # Shows what was actually used
         }
     """
     try:
         source_exists = source in retrievers_metadata if source else False
         for current_source, indexes in retrievers_metadata.items():
+            # Only filter if source exists, otherwise show all
             if source_exists and current_source != source:
                 continue
     top_k: int = 3
 ) -> dict:
     """
+    Performs semantic search on indexed documents.
+    Parameters:
+        query (str): Search text (required)
+        retrievers (List[str]): Names of retrievers to query (required)
+        top_k (int): Number of results per retriever (optional, default=3)
     """
+    print(f"Starting search for query: '{query}'")
+    print(f"Parameters - retrievers: {retrievers}, top_k: {top_k}")
     results = {}
     invalid = []
     for name in retrievers:
         if name not in indices:
+            print(f"Retriever not found: {name}")
             invalid.append(name)
             continue
         try:
+            print(f"Processing retriever: {name}")
             retriever = indices[name].as_retriever(similarity_top_k=top_k)
             nodes = retriever.retrieve(query)
+            print(f"Retrieved {len(nodes)} documents from {name}")
+            # 2. Search for COMPLETE metadata
             metadata = {}
             source = "unknown"
             for src, indexes in retrievers_metadata.items():
                     metadata = indexes[name]
                     source = src
                     break
+            print(f"Metadata found for {name}: {metadata.keys()}")
+            # 3. Build response
             results[name] = {
                 "title": metadata.get("title", name),
                 "documents": [
                 "source": source,
                 "last_updated": metadata.get("last_updated", "")
             }
+            print(f"Retriever {name} processed successfully")
         except Exception as e:
+            print(f"Error processing retriever {name}: {str(e)}", exc_info=True)
             results[name] = {
                 "error": str(e),
                 "retriever": name
             }
+    # Build final response
     response = {
         "query": query,
         "results": results,
     }
     if invalid:
+        print(f"Invalid retrievers: {invalid}. Valid options: {list(indices.keys())}")
         response["warnings"] = {
             "invalid_retrievers": invalid,
             "valid_options": list(indices.keys())
         }
+    print(f"Search completed. Total results: {len(results)}")
     return response
     Returns:
         dict: Search results from Tavily
     """
+    # Get API key from environment variables
     tavily_api_key = os.environ.get('TAVILY_API_KEY')
     if not tavily_api_key:
         raise ValueError("TAVILY_API_KEY environment variable not set")
 # Gradio interface
+with gr.Blocks(title="MCP Tools", theme=gr.themes.Base()) as arxiv_tab:
     arxiv_interface = gr.Interface(
         fn=search_arxiv,
         inputs=[
+            gr.Textbox(label="Search terms", placeholder="E.g.: deep learning"),
+            gr.Slider(1, 10, value=5, step=1, label="Maximum number of results")
         ],
+        outputs=gr.JSON(label="Search results"),
+        title="ArXiv Search",
+        description="Search for academic papers on ArXiv using keywords.",
         api_name="_search_arxiv"
     )
+with gr.Blocks(title="MCP Tools", theme=gr.themes.Base()) as list_retrievers_tab:
     retrievers_interface = gr.Interface(
         fn=list_retrievers,
+        inputs=gr.Textbox(label="Source (optional)", placeholder="Leave empty to list all"),
+        outputs=gr.JSON(label="List of retrievers"),
+        title="List of Retrievers",
+        description="Shows available retrievers, optionally filtered by source.",
         api_name="_list_retrievers"
     )
+with gr.Blocks(title="MCP Tools", theme=gr.themes.Base()) as tavily_tab:
     tavily_interface = gr.Interface(
         fn=search_tavily,
         inputs=[
+            gr.Textbox(label="Search query", placeholder="E.g.: latest news about AI"),
+            gr.Slider(1, 30, value=7, step=1, label="Last N days (0 for no limit)"),
+            gr.Slider(1, 10, value=1, step=1, label="Maximum results"),
+            gr.Checkbox(label="Include direct answer", value=False)
         ],
+        outputs=gr.JSON(label="Tavily results"),
+        title="Web Search (Tavily)",
+        description="Perform web searches using the Tavily API.",
         api_name="_search_tavily"
     )
+with gr.Blocks(title="MCP Tools", theme=gr.themes.Base()) as retrieve_tab:
+    # Interface for retrieve_docs
     retrieve_interface = gr.Interface(
         fn=retrieve_docs,
         inputs=[
+            gr.Textbox(label="Query", placeholder="Enter your question or search terms..."),
             gr.Dropdown(
                 choices=list(indices.keys()),
                 label="Retrievers",
                 multiselect=True,
+                info="Select one or more retrievers"
             ),
+            gr.Slider(1, 10, value=3, step=1, label="Number of results per retriever (top_k)")
         ],
+        outputs=gr.JSON(label="Semantic search results"),
+        title="Semantic Document Search",
+        description="""Perform semantic search on indexed documents using retrievers.
+                    Select available retrievers and adjust the number of results.""",
         api_name="_retrieve"
     )
+# Create the interface with separate tabs
 demo = gr.TabbedInterface(
     [arxiv_tab, tavily_tab, list_retrievers_tab, retrieve_tab],
     ["ArXiv", "Tavily", "List Retrievers", "Retrieve"]