Spaces:
Sleeping
Sleeping
Zeggai Abdellah
commited on
Commit
Β·
cb15d7e
1
Parent(s):
6bc642b
fix the retreveing of the tools
Browse files- prepare_env.py +5 -16
prepare_env.py
CHANGED
|
@@ -128,14 +128,15 @@ def create_vectorstore_from_json(json_path: str, collection_name: str, embedding
|
|
| 128 |
print(f"β
Vector store created with collection: {collection_name}")
|
| 129 |
return vectorstore, documents
|
| 130 |
|
| 131 |
-
def create_retriever(vectorstore, docs, llm,
|
| 132 |
"""Create ensemble retriever with vector and BM25 search
|
| 133 |
|
| 134 |
Args:
|
| 135 |
vectorstore: The vector store for similarity search
|
| 136 |
docs: Documents for BM25 retriever
|
| 137 |
llm: Language model for multi-query generation
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
Returns:
|
| 141 |
Configured retriever (MultiQueryRetriever or EnsembleRetriever)
|
|
@@ -162,15 +163,7 @@ def create_retriever(vectorstore, docs, llm, get_all: bool = False):
|
|
| 162 |
Provide only the alternative questions, one per line."""
|
| 163 |
)
|
| 164 |
|
| 165 |
-
|
| 166 |
-
if get_all:
|
| 167 |
-
vector_k = len(docs) # Get all documents
|
| 168 |
-
bm25_k = len(docs) # Get all documents
|
| 169 |
-
print(f"π GET_ALL mode: Setting k={len(docs)} (total documents)")
|
| 170 |
-
else:
|
| 171 |
-
vector_k = 6
|
| 172 |
-
bm25_k = 3
|
| 173 |
-
print(f"π― FILTERED mode: Vector k={vector_k}, BM25 k={bm25_k}")
|
| 174 |
|
| 175 |
# Vector retriever
|
| 176 |
vector_retriever = vectorstore.as_retriever(
|
|
@@ -191,10 +184,6 @@ def create_retriever(vectorstore, docs, llm, get_all: bool = False):
|
|
| 191 |
)
|
| 192 |
print("β
Ensemble retriever created (weights: 0.5, 0.5)")
|
| 193 |
|
| 194 |
-
# If get_all is True, return ensemble retriever directly to avoid query processing overhead
|
| 195 |
-
if get_all:
|
| 196 |
-
print("π Returning ensemble retriever (bypassing MultiQuery for get_all mode)")
|
| 197 |
-
return ensemble_retriever
|
| 198 |
|
| 199 |
# Multi-query expanding retriever (only for filtered mode)
|
| 200 |
expanding_retriever = MultiQueryRetriever.from_llm(
|
|
@@ -295,7 +284,7 @@ def create_section_tools(embedding_function, llm):
|
|
| 295 |
if os.path.exists(path):
|
| 296 |
print(f"π Creating retriever for section {section} from {path}")
|
| 297 |
vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
|
| 298 |
-
section_retrievers[section] = create_retriever(vstore, docs, llm,
|
| 299 |
print(f"β
Successfully created retriever for section {section}")
|
| 300 |
else:
|
| 301 |
print(f"β οΈ Warning: File not found for section {section}: {path}")
|
|
|
|
| 128 |
print(f"β
Vector store created with collection: {collection_name}")
|
| 129 |
return vectorstore, documents
|
| 130 |
|
| 131 |
+
def create_retriever(vectorstore, docs, llm, bm25_k=3,vector_k=6):
|
| 132 |
"""Create ensemble retriever with vector and BM25 search
|
| 133 |
|
| 134 |
Args:
|
| 135 |
vectorstore: The vector store for similarity search
|
| 136 |
docs: Documents for BM25 retriever
|
| 137 |
llm: Language model for multi-query generation
|
| 138 |
+
bm25_k: Number of documents to retrieve with BM25
|
| 139 |
+
vector_k: Number of documents to retrieve with vector search
|
| 140 |
|
| 141 |
Returns:
|
| 142 |
Configured retriever (MultiQueryRetriever or EnsembleRetriever)
|
|
|
|
| 163 |
Provide only the alternative questions, one per line."""
|
| 164 |
)
|
| 165 |
|
| 166 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
# Vector retriever
|
| 169 |
vector_retriever = vectorstore.as_retriever(
|
|
|
|
| 184 |
)
|
| 185 |
print("β
Ensemble retriever created (weights: 0.5, 0.5)")
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
# Multi-query expanding retriever (only for filtered mode)
|
| 189 |
expanding_retriever = MultiQueryRetriever.from_llm(
|
|
|
|
| 284 |
if os.path.exists(path):
|
| 285 |
print(f"π Creating retriever for section {section} from {path}")
|
| 286 |
vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
|
| 287 |
+
section_retrievers[section] = create_retriever(vstore, docs, llm, bm25_k=5, vector_k=7)
|
| 288 |
print(f"β
Successfully created retriever for section {section}")
|
| 289 |
else:
|
| 290 |
print(f"β οΈ Warning: File not found for section {section}: {path}")
|