geronimo-pericoli commited on
Commit
48d30ee
verified
1 Parent(s): 5dd1a5b

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +44 -146
server.py CHANGED
@@ -41,35 +41,38 @@ Settings.embed_model = embed_model
41
 
42
 
43
 
44
- # Configuraci贸n inicial (esto probablemente estar铆a en otro m贸dulo)
45
  DOCUMENTS_BASE_PATH = "./"
46
  SOURCES = {
47
- "oms": "oms/", # Esta ser谩 la carpeta base que contiene todos los sub铆ndices
 
48
  }
49
 
50
- # Cargar 铆ndices recursivamente
51
  indices: Dict[str, VectorStoreIndex] = {}
52
 
53
  for source, rel_path in SOURCES.items():
54
  full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
55
 
56
  if not os.path.exists(full_path):
 
57
  continue
58
 
59
- # Buscar todas las subcarpetas que contengan 铆ndices
60
  for root, dirs, files in os.walk(full_path):
61
  if "storage_nodes" in dirs:
62
- # Esta es una carpeta que contiene un 铆ndice
63
  try:
64
  storage_path = os.path.join(root, "storage_nodes")
65
  storage_context = StorageContext.from_defaults(persist_dir=storage_path)
66
 
67
- # Usamos el nombre de la carpeta padre como clave (ej: "vec_1")
68
  index_name = os.path.basename(root)
69
- full_index_name = f"{source}_{index_name}" # ej: "oms_vec_1"
70
 
71
  index = load_index_from_storage(storage_context, index_id="vector_index")
72
- indices[full_index_name] = index
 
 
 
 
 
73
  except Exception as e:
74
  print(f"Error cargando 铆ndice en {root}: {str(e)}")
75
  continue
@@ -85,159 +88,54 @@ mcp = FastMCP("OnBase", port=port)
85
 
86
 
87
 
88
- # Configuraci贸n del archivo retrievers.json
89
- RETRIEVERS_METADATA_PATH = Path("./retrievers.json")
90
-
91
- # Cargar metadatos de los retrievers
92
- def load_retrievers_metadata() -> Dict:
93
- try:
94
- with open(RETRIEVERS_METADATA_PATH, 'r', encoding='utf-8') as f:
95
- return json.load(f)
96
- except FileNotFoundError:
97
- print(f"Warning: {RETRIEVERS_METADATA_PATH} not found. Using empty metadata.")
98
- return {}
99
- except json.JSONDecodeError:
100
- print(f"Warning: {RETRIEVERS_METADATA_PATH} is invalid JSON. Using empty metadata.")
101
- return {}
102
-
103
- retrievers_metadata = load_retrievers_metadata()
104
-
105
- # Resource para listar solo t铆tulos/disponibles
106
- @mcp.resource(
107
- uri="info://available_retriever_titles",
108
- name="AvailableRetrieverTitles",
109
- description="Lista los nombres/t铆tulos disponibles de los retrievers",
110
- mime_type="application/json"
111
- )
112
- def get_retriever_titles() -> dict:
113
- """
114
- Devuelve una lista con los t铆tulos/nombres de los retrievers disponibles
115
- """
116
- return {
117
- "titles": list(retrievers_metadata.keys()),
118
- "count": len(retrievers_metadata)
119
- }
120
-
121
- # Resource para obtener metadatos espec铆ficos
122
- @mcp.resource(
123
- uri="info://retriever_details/{retriever_title}",
124
- name="RetrieverDetails",
125
- description="Obtiene informaci贸n detallada sobre un retriever espec铆fico",
126
- mime_type="application/json"
127
- )
128
- def get_retriever_details(retriever_title: str) -> dict:
129
- """
130
- Devuelve los metadatos completos para un retriever espec铆fico
131
-
132
- Parameters:
133
- retriever_title: El t铆tulo/nombre del retriever (ej: 'oms')
134
- """
135
- if retriever_title not in retrievers_metadata:
136
- return {
137
- "error": f"Retriever '{retriever_title}' no encontrado",
138
- "available_titles": list(retrievers_metadata.keys())
139
- }
140
-
141
- return {
142
- "retriever": retriever_title,
143
- "details": retrievers_metadata[retriever_title]
144
- }
145
-
146
- # Modificaci贸n del resource existente para usar los metadatos
147
  @mcp.resource(
148
  uri="info://available_retrievers",
149
  name="AvailableRetrievers",
150
- description="Provides information about available document retrievers including their names and descriptions.",
151
  mime_type="application/json"
152
  )
153
- def get_available_retrievers(retriever_title: Optional[str] = None) -> dict:
154
- """
155
- Versi贸n mejorada que puede filtrar por t铆tulo de retriever
156
 
157
- Parameters:
158
- retriever_title: Opcional. Si se especifica, solo devuelve los de este t铆tulo
159
- """
160
- available_retrievers = []
161
-
162
- for full_index_name in indices.keys():
163
- parts = full_index_name.split('_')
164
- source = parts[0]
165
 
166
- # Filtrar por t铆tulo si se especific贸
167
- if retriever_title and source != retriever_title:
168
- continue
169
-
170
- # Obtener metadatos del JSON si existen
171
- metadata = retrievers_metadata.get(source, {}).get(full_index_name, {})
172
 
173
- available_retrievers.append({
174
- "retriever_name": full_index_name,
175
  "source": source,
176
- "index_name": '_'.join(parts[1:]) if len(parts) > 1 else "default",
177
- "description": metadata.get("description", f"Documentos de {source.upper()}"),
178
- "content_info": metadata.get("content_info", "No description available"),
179
- "last_updated": metadata.get("last_updated", "unknown")
180
  })
181
 
182
- if retriever_title and not available_retrievers:
183
- return {
184
- "error": f"No hay retrievers para el t铆tulo '{retriever_title}'",
185
- "available_titles": list(retrievers_metadata.keys())
186
- }
187
-
188
  return {
189
- "retrievers": available_retrievers,
190
- "count": len(available_retrievers),
191
- "filtered_by": retriever_title if retriever_title else "all"
192
  }
193
 
194
-
195
-
196
-
197
-
198
  @mcp.tool()
199
  def retrieve_docs(
200
  query: str,
201
- retrievers: List[str],
202
  top_k: int = 3
203
  ) -> dict:
204
- """
205
- Retrieve documents from different regulations using semantic search.
206
-
207
- Parameters:
208
- query: Search query (required).
209
- retrievers: List of specific retriever names to use (required).
210
- top_k: Number of results to return per retriever (default: 3).
211
-
212
- Example:
213
- retrieve_docs(
214
- query="salud p煤blica",
215
- retrievers=["oms_vec_1", "oms_tree_2"],
216
- top_k=2
217
- )
218
- """
219
- if not query:
220
- return {"error": "Query parameter is required"}
221
-
222
- if not retrievers:
223
- return {"error": "At least one retriever must be specified", "available_retrievers": list(indices.keys())}
224
-
225
- # Verificar que todos los retrievers solicitados existan
226
- invalid_retrievers = [r for r in retrievers if r not in indices]
227
- if invalid_retrievers:
228
- return {
229
- "error": f"Invalid retrievers specified: {invalid_retrievers}",
230
- "available_retrievers": list(indices.keys())
231
- }
232
-
233
  results = {}
 
234
 
235
- for retriever_name in retrievers:
 
 
 
 
236
  try:
237
- retriever = indices[retriever_name].as_retriever(similarity_top_k=top_k)
238
  nodes = retriever.retrieve(query)
239
 
240
- results[retriever_name] = [
241
  {
242
  "content": node.get_content(),
243
  "metadata": node.metadata,
@@ -246,21 +144,21 @@ def retrieve_docs(
246
  for node in nodes
247
  ]
248
  except Exception as e:
249
- results[retriever_name] = {
250
- "error": f"Error retrieving documents: {str(e)}"
251
- }
 
 
 
 
252
 
253
  return {
254
- "results": results,
255
  "query": query,
256
- "retrievers_used": retrievers,
257
- "top_k": top_k,
258
- "successful_retrievers": [r for r in retrievers if isinstance(results[r], list)],
259
- "failed_retrievers": [r for r in retrievers if not isinstance(results[r], list)]
260
  }
261
 
262
 
263
-
264
 
265
 
266
  @mcp.tool()
 
41
 
42
 
43
 
44
+ # Configuraci贸n inicial
45
  DOCUMENTS_BASE_PATH = "./"
46
  SOURCES = {
47
+ "oms": "oms/",
48
+ #"fda": "fda/"
49
  }
50
 
 
51
  indices: Dict[str, VectorStoreIndex] = {}
52
 
53
  for source, rel_path in SOURCES.items():
54
  full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
55
 
56
  if not os.path.exists(full_path):
57
+ print(f"Advertencia: No se encontr贸 la ruta {full_path} para {source}")
58
  continue
59
 
 
60
  for root, dirs, files in os.walk(full_path):
61
  if "storage_nodes" in dirs:
 
62
  try:
63
  storage_path = os.path.join(root, "storage_nodes")
64
  storage_context = StorageContext.from_defaults(persist_dir=storage_path)
65
 
66
+ # Usamos directamente el nombre de la carpeta (vec_who_1, etc.)
67
  index_name = os.path.basename(root)
 
68
 
69
  index = load_index_from_storage(storage_context, index_id="vector_index")
70
+ indices[index_name] = index # Guardamos con el nombre directo
71
+
72
+ # Verificaci贸n opcional de metadatos
73
+ if index_name not in retrievers_metadata.get(source, {}):
74
+ print(f"Advertencia: No hay metadatos para {index_name} en retrievers.json")
75
+
76
  except Exception as e:
77
  print(f"Error cargando 铆ndice en {root}: {str(e)}")
78
  continue
 
88
 
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  @mcp.resource(
92
  uri="info://available_retrievers",
93
  name="AvailableRetrievers",
94
+ description="Lista completa de retrievers con metadatos",
95
  mime_type="application/json"
96
  )
97
+ def get_available_retrievers() -> dict:
98
+ available = []
 
99
 
100
+ for index_name in indices.keys():
101
+ # Determinar la fuente (oms/fda) basado en el prefijo
102
+ source = "oms" if index_name.startswith("vec_who") else "fda"
 
 
 
 
 
103
 
104
+ # Obtener metadatos
105
+ metadata = retrievers_metadata.get(source, {}).get(index_name, {})
 
 
 
 
106
 
107
+ available.append({
108
+ "name": index_name, # Ej: "vec_who_1"
109
  "source": source,
110
+ "description": metadata.get("description", "Descripci贸n no disponible"),
111
+ "content_info": metadata.get("content_info", "Informaci贸n no disponible"),
112
+ "last_updated": metadata.get("last_updated", "Desconocido")
 
113
  })
114
 
 
 
 
 
 
 
115
  return {
116
+ "retrievers": available,
117
+ "count": len(available)
 
118
  }
119
 
 
 
 
 
120
  @mcp.tool()
121
  def retrieve_docs(
122
  query: str,
123
+ retrievers: List[str], # Nombres directos (vec_who_1, etc.)
124
  top_k: int = 3
125
  ) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  results = {}
127
+ invalid = []
128
 
129
+ for name in retrievers:
130
+ if name not in indices:
131
+ invalid.append(name)
132
+ continue
133
+
134
  try:
135
+ retriever = indices[name].as_retriever(similarity_top_k=top_k)
136
  nodes = retriever.retrieve(query)
137
 
138
+ results[name] = [
139
  {
140
  "content": node.get_content(),
141
  "metadata": node.metadata,
 
144
  for node in nodes
145
  ]
146
  except Exception as e:
147
+ results[name] = {"error": str(e)}
148
+
149
+ if invalid:
150
+ results["_warnings"] = {
151
+ "invalid_retrievers": invalid,
152
+ "valid_options": list(indices.keys())
153
+ }
154
 
155
  return {
 
156
  "query": query,
157
+ "results": results,
158
+ "top_k": top_k
 
 
159
  }
160
 
161
 
 
162
 
163
 
164
  @mcp.tool()