Spaces:
Sleeping
Sleeping
Update server.py
Browse files
server.py
CHANGED
|
@@ -41,19 +41,27 @@ Settings.embed_model = embed_model
|
|
| 41 |
|
| 42 |
|
| 43 |
|
| 44 |
-
# Configuración inicial
|
| 45 |
DOCUMENTS_BASE_PATH = "./"
|
|
|
|
| 46 |
|
| 47 |
-
# Cargar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
|
| 49 |
|
|
|
|
| 50 |
indices: Dict[str, VectorStoreIndex] = {}
|
| 51 |
|
| 52 |
for source, rel_path in SOURCES.items():
|
| 53 |
full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
|
| 54 |
-
|
| 55 |
if not os.path.exists(full_path):
|
| 56 |
-
print(f"Advertencia: No se encontró la ruta {full_path} para {source}")
|
| 57 |
continue
|
| 58 |
|
| 59 |
for root, dirs, files in os.walk(full_path):
|
|
@@ -62,10 +70,9 @@ for source, rel_path in SOURCES.items():
|
|
| 62 |
storage_path = os.path.join(root, "storage_nodes")
|
| 63 |
storage_context = StorageContext.from_defaults(persist_dir=storage_path)
|
| 64 |
index_name = os.path.basename(root)
|
| 65 |
-
indices[index_name] =
|
| 66 |
except Exception as e:
|
| 67 |
-
print(f"Error cargando índice
|
| 68 |
-
continue
|
| 69 |
|
| 70 |
|
| 71 |
|
|
@@ -109,6 +116,45 @@ def retrieve_docs(
|
|
| 109 |
retrievers: List[str],
|
| 110 |
top_k: int = 3
|
| 111 |
) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
results = {}
|
| 113 |
invalid = []
|
| 114 |
|
|
@@ -121,11 +167,13 @@ def retrieve_docs(
|
|
| 121 |
retriever = indices[name].as_retriever(similarity_top_k=top_k)
|
| 122 |
nodes = retriever.retrieve(query)
|
| 123 |
|
| 124 |
-
# Obtener metadatos del JSON
|
| 125 |
metadata = {}
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
break
|
| 130 |
|
| 131 |
results[name] = {
|
|
@@ -138,7 +186,7 @@ def retrieve_docs(
|
|
| 138 |
for node in nodes
|
| 139 |
],
|
| 140 |
"description": metadata.get("description", ""),
|
| 141 |
-
"source":
|
| 142 |
}
|
| 143 |
except Exception as e:
|
| 144 |
results[name] = {"error": str(e)}
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
|
|
|
|
| 44 |
DOCUMENTS_BASE_PATH = "./"
|
| 45 |
+
RETRIEVERS_JSON_PATH = Path("./retrievers.json")
|
| 46 |
|
| 47 |
+
# Cargar metadatos
|
| 48 |
+
def load_retrievers_metadata():
|
| 49 |
+
try:
|
| 50 |
+
with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
|
| 51 |
+
return json.load(f)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"Error cargando retrievers.json: {str(e)}")
|
| 54 |
+
return {}
|
| 55 |
+
|
| 56 |
+
retrievers_metadata = load_retrievers_metadata()
|
| 57 |
SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
|
| 58 |
|
| 59 |
+
# Cargar índices
|
| 60 |
indices: Dict[str, VectorStoreIndex] = {}
|
| 61 |
|
| 62 |
for source, rel_path in SOURCES.items():
|
| 63 |
full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
|
|
|
|
| 64 |
if not os.path.exists(full_path):
|
|
|
|
| 65 |
continue
|
| 66 |
|
| 67 |
for root, dirs, files in os.walk(full_path):
|
|
|
|
| 70 |
storage_path = os.path.join(root, "storage_nodes")
|
| 71 |
storage_context = StorageContext.from_defaults(persist_dir=storage_path)
|
| 72 |
index_name = os.path.basename(root)
|
| 73 |
+
indices[index_name] = load_index_from_storage(storage_context, index_id="vector_index")
|
| 74 |
except Exception as e:
|
| 75 |
+
print(f"Error cargando índice {index_name}: {str(e)}")
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
|
|
|
|
| 116 |
retrievers: List[str],
|
| 117 |
top_k: int = 3
|
| 118 |
) -> dict:
|
| 119 |
+
"""
|
| 120 |
+
Realiza búsqueda semántica en los documentos indexados.
|
| 121 |
+
|
| 122 |
+
Parámetros:
|
| 123 |
+
query (str, requerido): Texto de búsqueda. Ejemplo: "vacunas COVID-19"
|
| 124 |
+
retrievers (List[str], requerido): Lista de nombres de retrievers a consultar.
|
| 125 |
+
Ejemplo: ["vec_who_1", "fda_tree_1"]
|
| 126 |
+
top_k (int, opcional): Número máximo de resultados por retriever. Default: 3
|
| 127 |
+
|
| 128 |
+
Retorna:
|
| 129 |
+
dict: {
|
| 130 |
+
"query": str, # Texto buscado
|
| 131 |
+
"results": {
|
| 132 |
+
"retriever_name": {
|
| 133 |
+
"documents": [
|
| 134 |
+
{
|
| 135 |
+
"content": str, # Texto del documento
|
| 136 |
+
"metadata": dict, # Metadatos del documento
|
| 137 |
+
"score": float # Puntaje de relevancia
|
| 138 |
+
}
|
| 139 |
+
],
|
| 140 |
+
"description": str, # Descripción del retriever
|
| 141 |
+
"source": str # Fuente (oms, fda, etc.)
|
| 142 |
+
}
|
| 143 |
+
},
|
| 144 |
+
"top_k": int,
|
| 145 |
+
"warnings": { # Solo si hay errores
|
| 146 |
+
"invalid_retrievers": [str], # Nombres no válidos
|
| 147 |
+
"valid_options": [str] # Retrievers disponibles
|
| 148 |
+
}
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
Ejemplo de uso:
|
| 152 |
+
retrieve_docs(
|
| 153 |
+
query="guías de vacunación",
|
| 154 |
+
retrievers=["vec_who_2"],
|
| 155 |
+
top_k=2
|
| 156 |
+
)
|
| 157 |
+
"""
|
| 158 |
results = {}
|
| 159 |
invalid = []
|
| 160 |
|
|
|
|
| 167 |
retriever = indices[name].as_retriever(similarity_top_k=top_k)
|
| 168 |
nodes = retriever.retrieve(query)
|
| 169 |
|
| 170 |
+
# Obtener metadatos del JSON
|
| 171 |
metadata = {}
|
| 172 |
+
source = "unknown"
|
| 173 |
+
for src, indexes in retrievers_metadata.items():
|
| 174 |
+
if name in indexes:
|
| 175 |
+
metadata = indexes[name]
|
| 176 |
+
source = src
|
| 177 |
break
|
| 178 |
|
| 179 |
results[name] = {
|
|
|
|
| 186 |
for node in nodes
|
| 187 |
],
|
| 188 |
"description": metadata.get("description", ""),
|
| 189 |
+
"source": source
|
| 190 |
}
|
| 191 |
except Exception as e:
|
| 192 |
results[name] = {"error": str(e)}
|