File size: 10,047 Bytes
70601ba
4a86350
4ee4e2e
7f73e5e
 
 
 
55153f6
 
 
2eb9acb
4a86350
70601ba
2d5c108
70601ba
55153f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87575d2
b28944c
 
2bda19d
b28944c
 
2bda19d
b28944c
2bda19d
b28944c
 
2bda19d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28944c
87575d2
 
 
 
 
 
 
 
 
 
2eb9acb
8bb703e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2eb9acb
 
 
 
 
 
8bb703e
2eb9acb
8bb703e
2eb9acb
8bb703e
 
2eb9acb
 
 
 
 
 
 
8bb703e
 
 
 
 
 
2eb9acb
 
 
 
8bb703e
 
 
 
2eb9acb
 
8bb703e
 
 
 
 
 
2eb9acb
 
 
8bb703e
2eb9acb
 
 
 
 
 
87575d2
2eb9acb
 
 
 
 
b28944c
87575d2
 
b28944c
2bda19d
2eb9acb
 
 
 
 
 
 
 
 
4a86350
87575d2
 
 
2eb9acb
 
8370383
2eb9acb
 
 
 
 
 
4a86350
b28944c
2eb9acb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b28944c
 
 
8370383
 
70601ba
2d5c108
 
 
 
 
 
 
 
 
 
 
 
7047942
2d5c108
 
 
 
 
7047942
 
2d5c108
 
 
7047942
2d5c108
70601ba
2d5c108
 
 
 
 
 
 
 
 
 
 
 
 
 
7047942
2d5c108
 
7047942
 
2d5c108
 
 
 
 
 
 
70601ba
 
19dc6e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
from mcp.server.fastmcp import FastMCP
from datetime import datetime
from llama_index.core import VectorStoreIndex
from llama_index.core import (
    StorageContext,
    load_index_from_storage,
)
from llama_index.core import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from typing import Dict, Optional, List
import json
import os
import aiohttp  # Necesario para las peticiones HTTP asíncronas




api_key = os.environ.get('AZURE_API_KEY')
azure_endpoint = "https://pharmaia-gpt.openai.azure.com/"
api_version = "2024-02-01"

llm = AzureOpenAI(
    model="gpt-4.1",
    deployment_name="gpt-4.1",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)
# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-3-large",
    deployment_name="text-embedding-3-large",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

Settings.llm = llm
Settings.embed_model = embed_model



# Configuración inicial (esto probablemente estaría en otro módulo)
DOCUMENTS_BASE_PATH = "./"
SOURCES = {
    "oms": "oms/",  # Esta será la carpeta base que contiene todos los subíndices
}

# Cargar índices recursivamente
indices: Dict[str, VectorStoreIndex] = {}

for source, rel_path in SOURCES.items():
    full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
    
    if not os.path.exists(full_path):
        continue
    
    # Buscar todas las subcarpetas que contengan índices
    for root, dirs, files in os.walk(full_path):
        if "storage_nodes" in dirs:
            # Esta es una carpeta que contiene un índice
            try:
                storage_path = os.path.join(root, "storage_nodes")
                storage_context = StorageContext.from_defaults(persist_dir=storage_path)
                
                # Usamos el nombre de la carpeta padre como clave (ej: "vec_1")
                index_name = os.path.basename(root)
                full_index_name = f"{source}_{index_name}"  # ej: "oms_vec_1"
                
                index = load_index_from_storage(storage_context, index_id="vector_index")
                indices[full_index_name] = index
            except Exception as e:
                print(f"Error cargando índice en {root}: {str(e)}")
                continue



            


port = int(os.getenv("PORT", 7860))
mcp = FastMCP("OnBase", port=port)




# Configuración del archivo retrievers.json
RETRIEVERS_METADATA_PATH = Path("./retrievers.json")

# Cargar metadatos de los retrievers
def load_retrievers_metadata() -> Dict:
    try:
        with open(RETRIEVERS_METADATA_PATH, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Warning: {RETRIEVERS_METADATA_PATH} not found. Using empty metadata.")
        return {}
    except json.JSONDecodeError:
        print(f"Warning: {RETRIEVERS_METADATA_PATH} is invalid JSON. Using empty metadata.")
        return {}

retrievers_metadata = load_retrievers_metadata()

# Resource para listar solo títulos/disponibles
@mcp.resource(
    uri="info://available_retriever_titles",
    name="AvailableRetrieverTitles",
    description="Lista los nombres/títulos disponibles de los retrievers",
    mime_type="application/json"
)
def get_retriever_titles() -> dict:
    """
    Devuelve una lista con los títulos/nombres de los retrievers disponibles
    """
    return {
        "titles": list(retrievers_metadata.keys()),
        "count": len(retrievers_metadata)
    }

# Resource para obtener metadatos específicos
@mcp.resource(
    uri="info://retriever_details/{retriever_title}",
    name="RetrieverDetails",
    description="Obtiene información detallada sobre un retriever específico",
    mime_type="application/json"
)
def get_retriever_details(retriever_title: str) -> dict:
    """
    Devuelve los metadatos completos para un retriever específico
    
    Parameters:
        retriever_title: El título/nombre del retriever (ej: 'oms')
    """
    if retriever_title not in retrievers_metadata:
        return {
            "error": f"Retriever '{retriever_title}' no encontrado",
            "available_titles": list(retrievers_metadata.keys())
        }
    
    return {
        "retriever": retriever_title,
        "details": retrievers_metadata[retriever_title]
    }

# Modificación del resource existente para usar los metadatos
@mcp.resource(
    uri="info://available_retrievers",
    name="AvailableRetrievers",
    description="Provides information about available document retrievers including their names and descriptions.",
    mime_type="application/json"
)
def get_available_retrievers(retriever_title: Optional[str] = None) -> dict:
    """
    Versión mejorada que puede filtrar por título de retriever
    
    Parameters:
        retriever_title: Opcional. Si se especifica, solo devuelve los de este título
    """
    available_retrievers = []
    
    for full_index_name in indices.keys():
        parts = full_index_name.split('_')
        source = parts[0]
        
        # Filtrar por título si se especificó
        if retriever_title and source != retriever_title:
            continue
            
        # Obtener metadatos del JSON si existen
        metadata = retrievers_metadata.get(source, {}).get(full_index_name, {})
        
        available_retrievers.append({
            "retriever_name": full_index_name,
            "source": source,
            "index_name": '_'.join(parts[1:]) if len(parts) > 1 else "default",
            "description": metadata.get("description", f"Documentos de {source.upper()}"),
            "content_info": metadata.get("content_info", "No description available"),
            "last_updated": metadata.get("last_updated", "unknown")
        })
    
    if retriever_title and not available_retrievers:
        return {
            "error": f"No hay retrievers para el título '{retriever_title}'",
            "available_titles": list(retrievers_metadata.keys())
        }
    
    return {
        "retrievers": available_retrievers,
        "count": len(available_retrievers),
        "filtered_by": retriever_title if retriever_title else "all"
    }





@mcp.tool()
def retrieve_docs(
    query: str,
    retrievers: List[str],
    top_k: int = 3
) -> dict:
    """
    Retrieve documents from different regulations using semantic search.
    
    Parameters:
        query: Search query (required).
        retrievers: List of specific retriever names to use (required).
        top_k: Number of results to return per retriever (default: 3).
    
    Example:
        retrieve_docs(
            query="salud pública",
            retrievers=["oms_vec_1", "oms_tree_2"],
            top_k=2
        )
    """
    if not query:
        return {"error": "Query parameter is required"}
    
    if not retrievers:
        return {"error": "At least one retriever must be specified", "available_retrievers": list(indices.keys())}
    
    # Verificar que todos los retrievers solicitados existan
    invalid_retrievers = [r for r in retrievers if r not in indices]
    if invalid_retrievers:
        return {
            "error": f"Invalid retrievers specified: {invalid_retrievers}",
            "available_retrievers": list(indices.keys())
        }
    
    results = {}
    
    for retriever_name in retrievers:
        try:
            retriever = indices[retriever_name].as_retriever(similarity_top_k=top_k)
            nodes = retriever.retrieve(query)
            
            results[retriever_name] = [
                {
                    "content": node.get_content(),
                    "metadata": node.metadata,
                    "score": node.score
                }
                for node in nodes
            ]
        except Exception as e:
            results[retriever_name] = {
                "error": f"Error retrieving documents: {str(e)}"
            }
    
    return {
        "results": results,
        "query": query,
        "retrievers_used": retrievers,
        "top_k": top_k,
        "successful_retrievers": [r for r in retrievers if isinstance(results[r], list)],
        "failed_retrievers": [r for r in retrievers if not isinstance(results[r], list)]
    }



    

@mcp.tool()
async def search_tavily(
    query: str,
    days: int = 7,
    max_results: int = 1,
    include_answer: bool = False
) -> dict:
    """Perform a web search using the Tavily API.
    
    Args:
        query: Search query string (required)
        days: Restrict search to last N days (default: 7)
        max_results: Maximum results to return (default: 1)
        include_answer: Include a direct answer only when requested by the user (default: False)
        
    Returns:
        dict: Search results from Tavily
    """
    # Obtener la API key de las variables de entorno
    tavily_api_key = os.environ.get('TAVILY_API_KEY')
    if not tavily_api_key:
        raise ValueError("TAVILY_API_KEY environment variable not set")
    
    headers = {
        "Authorization": f"Bearer {tavily_api_key}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "query": query,
        "search_depth": "basic",
        "max_results": max_results,
        "days": days if days else None,
        "include_answer": include_answer
    }
    
    try:
        async with aiohttp.ClientSession() as session:
            async with session.post(
                "https://api.tavily.com/search",
                headers=headers,
                json=payload
            ) as response:
                response.raise_for_status()
                result = await response.json()
                return result
                
    except Exception as e:
        return {
            "error": str(e),
            "status": "failed",
            "query": query
        }

if __name__ == "__main__":
    mcp.run("sse")