File size: 8,643 Bytes
980f81a
4a86350
4ee4e2e
7f73e5e
 
 
 
55153f6
 
 
2eb9acb
2e80bf6
4a86350
70601ba
2d5c108
70601ba
2b15de9
55153f6
1c8cf6c
55153f6
1c8cf6c
 
55153f6
1c8cf6c
 
 
55153f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8cf6c
55153f6
 
3bc496b
b28944c
e884da5
 
 
 
 
 
 
 
 
 
 
 
dbf30b9
b28944c
e884da5
b28944c
2bda19d
b28944c
 
2bda19d
3bc496b
2bda19d
78b225d
2bda19d
 
 
 
 
 
e884da5
78b225d
2bda19d
e884da5
b28944c
87575d2
 
3bc496b
87575d2
 
 
 
 
 
 
 
4765456
 
 
df8dd91
 
4765456
 
df8dd91
4765456
 
df8dd91
 
 
 
 
 
 
4765456
2b15de9
89082d2
df8dd91
 
4765456
df8dd91
 
4765456
 
2b15de9
 
 
4765456
2b15de9
 
 
 
 
 
 
4765456
df8dd91
 
2c2b401
08b169e
 
 
 
 
4765456
df8dd91
 
08b169e
2c2b401
 
 
 
3bc496b
87575d2
2eb9acb
 
dbf30b9
2eb9acb
 
e884da5
3bc496b
e884da5
 
3bc496b
 
 
e884da5
3bc496b
e884da5
3bc496b
892052e
e884da5
 
 
2eb9acb
48d30ee
2eb9acb
48d30ee
 
 
 
 
2eb9acb
b963f56
48d30ee
2eb9acb
 
b963f56
 
 
 
 
 
 
 
dbf30b9
b963f56
dbf30b9
3bc496b
dbf30b9
 
 
 
 
 
 
 
 
b963f56
 
dbf30b9
b963f56
2eb9acb
b963f56
 
 
 
48d30ee
b963f56
dbf30b9
 
 
b963f56
dbf30b9
 
48d30ee
dbf30b9
48d30ee
 
 
2eb9acb
dbf30b9
b28944c
8370383
 
70601ba
2d5c108
 
 
 
 
 
 
 
 
 
 
 
7047942
2d5c108
 
 
 
 
7047942
 
2d5c108
 
 
7047942
2d5c108
70601ba
2d5c108
 
 
 
 
 
 
 
 
 
 
 
 
 
7047942
2d5c108
 
7047942
 
2d5c108
 
 
 
 
 
 
70601ba
 
19dc6e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
from mcp.server.fastmcp import FastMCP, Context
from datetime import datetime
from llama_index.core import VectorStoreIndex
from llama_index.core import (
    StorageContext,
    load_index_from_storage,
)
from llama_index.core import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from typing import Dict, Optional, List
from pathlib import Path
import json
import os
import aiohttp  # Necesario para las peticiones HTTP asíncronas

import asyncio

import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)



##### AZURE OPENAI #####
api_key = os.environ.get('AZURE_API_KEY')
azure_endpoint = "https://pharmaia-gpt.openai.azure.com/"
api_version = "2024-02-01"

llm = AzureOpenAI(
    model="gpt-4.1",
    deployment_name="gpt-4.1",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)
# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-3-large",
    deployment_name="text-embedding-3-large",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

Settings.llm = llm
Settings.embed_model = embed_model
##### FIN AZURE OPENAI #####


# Configuración de paths
DOCUMENTS_BASE_PATH = "./"
RETRIEVERS_JSON_PATH = Path("./retrievers.json")

# Cargar metadatos
def load_retrievers_metadata():
    try:
        with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error cargando retrievers.json: {str(e)}")
        return {}

retrievers_metadata = load_retrievers_metadata()
SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}

# Cargar índices
indices: Dict[str, VectorStoreIndex] = {}

for source, rel_path in SOURCES.items():
    full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
    if not os.path.exists(full_path):
        print(f"Advertencia: No se encontró la ruta para {source}")
        continue

    for root, dirs, files in os.walk(full_path):
        if "storage_nodes" in dirs:
            try:
                storage_path = os.path.join(root, "storage_nodes")
                storage_context = StorageContext.from_defaults(persist_dir=storage_path)
                index_name = os.path.basename(root)
                indices[index_name] = load_index_from_storage(storage_context, index_id="vector_index")
                print(f"Índice cargado correctamente: {index_name}")
            except Exception as e:
                print(f"Error cargando índice {index_name}: {str(e)}")




            


port = int(os.getenv("PORT", 7860))
mcp = FastMCP("OnBase", port=port)



@mcp.tool()
async def list_retrievers(source: str = None) -> dict:
    """
    Devuelve la lista de retrievers disponibles. 
    Si se especifica una source y existe, filtra por ella; si no existe, devuelve todas.
    
    Args:
        source (str, optional): Fuente para filtrar. Si no existe, se ignorará. Defaults to None.
    
    Returns:
        dict: {
            "retrievers": Lista de retrievers (filtrados o completos),
            "count": Número total,
            "status": "success"|"error",
            "source_requested": source,  # Muestra lo que se solicitó
            "source_used": "all"|source  # Muestra lo que realmente se usó
        }
    """
    try:
        available = []
        source_exists = source in retrievers_metadata if source else False
        
        for current_source, indexes in retrievers_metadata.items():
            # Solo filtrar si el source existe, sino mostrar todo
            if source_exists and current_source != source:
                continue
                
            for index_name, metadata in indexes.items():
                available.append({
                    "name": index_name,
                    "source": current_source,
                    "title": metadata.get("title", ""),
                    "description": metadata.get("description", "")
                })
        
        return {
            "retrievers": available,
            "count": len(available),
            "status": "success",
            "source_requested": source,
            "source_used": source if source_exists else "all"
        }
    except Exception as e:
        return {
            "retrievers": [],
            "count": 0,
            "status": "error",
            "error": str(e),
            "source_requested": source,
            "source_used": "none"
        }




# Función principal de búsqueda
@mcp.tool()
def retrieve_docs(
    query: str,
    retrievers: List[str],
    top_k: int = 3
) -> dict:
    """
    Realiza búsqueda semántica en documentos indexados.
    
    Parámetros:
        query (str): Texto de búsqueda (requerido)
        retrievers (List[str]): Nombres de retrievers a consultar (requerido)
        top_k (int): Número de resultados por retriever (opcional, default=3)
    
    Ejemplo:
        retrieve_docs(
            query="estándares farmacéuticos",
            retrievers=["vec_1", "vec_2"],
            top_k=2
        )
    """
    results = {}
    invalid = []
    
    for name in retrievers:
        if name not in indices:
            invalid.append(name)
            continue
            
        try:
            # 1. Obtener el índice y realizar la búsqueda
            retriever = indices[name].as_retriever(similarity_top_k=top_k)
            nodes = retriever.retrieve(query)
            
            # 2. Buscar metadatos COMPLETOS
            metadata = {}
            source = "unknown"
            for src, indexes in retrievers_metadata.items():
                if name in indexes:
                    metadata = indexes[name]
                    source = src
                    break
            
            # 3. Construir respuesta para ESTE retriever
            results[name] = {
                "title": metadata.get("title", name),
                "documents": [
                    {
                        "content": node.get_content(),
                        "metadata": node.metadata,
                        "score": node.score
                    }
                    for node in nodes
                ],
                "description": metadata.get("description", ""),
                "source": source,
                "last_updated": metadata.get("last_updated", "")
            }
            
        except Exception as e:
            results[name] = {
                "error": str(e),
                "retriever": name
            }
    
    # Construir respuesta final
    response = {
        "query": query,
        "results": results,
        "top_k": top_k,
    }
    
    if invalid:
        response["warnings"] = {
            "invalid_retrievers": invalid,
            "valid_options": list(indices.keys())
        }
    
    return response

    

@mcp.tool()
async def search_tavily(
    query: str,
    days: int = 7,
    max_results: int = 1,
    include_answer: bool = False
) -> dict:
    """Perform a web search using the Tavily API.
    
    Args:
        query: Search query string (required)
        days: Restrict search to last N days (default: 7)
        max_results: Maximum results to return (default: 1)
        include_answer: Include a direct answer only when requested by the user (default: False)
        
    Returns:
        dict: Search results from Tavily
    """
    # Obtener la API key de las variables de entorno
    tavily_api_key = os.environ.get('TAVILY_API_KEY')
    if not tavily_api_key:
        raise ValueError("TAVILY_API_KEY environment variable not set")
    
    headers = {
        "Authorization": f"Bearer {tavily_api_key}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "query": query,
        "search_depth": "basic",
        "max_results": max_results,
        "days": days if days else None,
        "include_answer": include_answer
    }
    
    try:
        async with aiohttp.ClientSession() as session:
            async with session.post(
                "https://api.tavily.com/search",
                headers=headers,
                json=payload
            ) as response:
                response.raise_for_status()
                result = await response.json()
                return result
                
    except Exception as e:
        return {
            "error": str(e),
            "status": "failed",
            "query": query
        }

if __name__ == "__main__":
    mcp.run("sse")