geronimo-pericoli commited on
Commit
e884da5
·
verified ·
1 Parent(s): dbf30b9

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +60 -12
server.py CHANGED
@@ -41,19 +41,27 @@ Settings.embed_model = embed_model
41
 
42
 
43
 
44
- # Configuración inicial
45
  DOCUMENTS_BASE_PATH = "./"
 
46
 
47
- # Cargar automáticamente las fuentes desde el JSON
 
 
 
 
 
 
 
 
 
48
  SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
49
 
 
50
  indices: Dict[str, VectorStoreIndex] = {}
51
 
52
  for source, rel_path in SOURCES.items():
53
  full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
54
-
55
  if not os.path.exists(full_path):
56
- print(f"Advertencia: No se encontró la ruta {full_path} para {source}")
57
  continue
58
 
59
  for root, dirs, files in os.walk(full_path):
@@ -62,10 +70,9 @@ for source, rel_path in SOURCES.items():
62
  storage_path = os.path.join(root, "storage_nodes")
63
  storage_context = StorageContext.from_defaults(persist_dir=storage_path)
64
  index_name = os.path.basename(root)
65
- indices[index_name] = index # Guardamos con nombre directo
66
  except Exception as e:
67
- print(f"Error cargando índice en {root}: {str(e)}")
68
- continue
69
 
70
 
71
 
@@ -109,6 +116,45 @@ def retrieve_docs(
109
  retrievers: List[str],
110
  top_k: int = 3
111
  ) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  results = {}
113
  invalid = []
114
 
@@ -121,11 +167,13 @@ def retrieve_docs(
121
  retriever = indices[name].as_retriever(similarity_top_k=top_k)
122
  nodes = retriever.retrieve(query)
123
 
124
- # Obtener metadatos del JSON si existen
125
  metadata = {}
126
- for source in retrievers_metadata.values():
127
- if name in source:
128
- metadata = source[name]
 
 
129
  break
130
 
131
  results[name] = {
@@ -138,7 +186,7 @@ def retrieve_docs(
138
  for node in nodes
139
  ],
140
  "description": metadata.get("description", ""),
141
- "source": next((s for s, idx in retrievers_metadata.items() if name in idx), "unknown")
142
  }
143
  except Exception as e:
144
  results[name] = {"error": str(e)}
 
41
 
42
 
43
 
 
44
  DOCUMENTS_BASE_PATH = "./"
45
+ RETRIEVERS_JSON_PATH = Path("./retrievers.json")
46
 
47
+ # Cargar metadatos
48
+ def load_retrievers_metadata():
49
+ try:
50
+ with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
51
+ return json.load(f)
52
+ except Exception as e:
53
+ print(f"Error cargando retrievers.json: {str(e)}")
54
+ return {}
55
+
56
+ retrievers_metadata = load_retrievers_metadata()
57
  SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
58
 
59
+ # Cargar índices
60
  indices: Dict[str, VectorStoreIndex] = {}
61
 
62
  for source, rel_path in SOURCES.items():
63
  full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
 
64
  if not os.path.exists(full_path):
 
65
  continue
66
 
67
  for root, dirs, files in os.walk(full_path):
 
70
  storage_path = os.path.join(root, "storage_nodes")
71
  storage_context = StorageContext.from_defaults(persist_dir=storage_path)
72
  index_name = os.path.basename(root)
73
+ indices[index_name] = load_index_from_storage(storage_context, index_id="vector_index")
74
  except Exception as e:
75
+ print(f"Error cargando índice {index_name}: {str(e)}")
 
76
 
77
 
78
 
 
116
  retrievers: List[str],
117
  top_k: int = 3
118
  ) -> dict:
119
+ """
120
+ Realiza búsqueda semántica en los documentos indexados.
121
+
122
+ Parámetros:
123
+ query (str, requerido): Texto de búsqueda. Ejemplo: "vacunas COVID-19"
124
+ retrievers (List[str], requerido): Lista de nombres de retrievers a consultar.
125
+ Ejemplo: ["vec_who_1", "fda_tree_1"]
126
+ top_k (int, opcional): Número máximo de resultados por retriever. Default: 3
127
+
128
+ Retorna:
129
+ dict: {
130
+ "query": str, # Texto buscado
131
+ "results": {
132
+ "retriever_name": {
133
+ "documents": [
134
+ {
135
+ "content": str, # Texto del documento
136
+ "metadata": dict, # Metadatos del documento
137
+ "score": float # Puntaje de relevancia
138
+ }
139
+ ],
140
+ "description": str, # Descripción del retriever
141
+ "source": str # Fuente (oms, fda, etc.)
142
+ }
143
+ },
144
+ "top_k": int,
145
+ "warnings": { # Solo si hay errores
146
+ "invalid_retrievers": [str], # Nombres no válidos
147
+ "valid_options": [str] # Retrievers disponibles
148
+ }
149
+ }
150
+
151
+ Ejemplo de uso:
152
+ retrieve_docs(
153
+ query="guías de vacunación",
154
+ retrievers=["vec_who_2"],
155
+ top_k=2
156
+ )
157
+ """
158
  results = {}
159
  invalid = []
160
 
 
167
  retriever = indices[name].as_retriever(similarity_top_k=top_k)
168
  nodes = retriever.retrieve(query)
169
 
170
+ # Obtener metadatos del JSON
171
  metadata = {}
172
+ source = "unknown"
173
+ for src, indexes in retrievers_metadata.items():
174
+ if name in indexes:
175
+ metadata = indexes[name]
176
+ source = src
177
  break
178
 
179
  results[name] = {
 
186
  for node in nodes
187
  ],
188
  "description": metadata.get("description", ""),
189
+ "source": source
190
  }
191
  except Exception as e:
192
  results[name] = {"error": str(e)}