Zeggai Abdellah commited on
Commit
cb15d7e
Β·
1 Parent(s): 6bc642b

fix the retreveing of the tools

Browse files
Files changed (1) hide show
  1. prepare_env.py +5 -16
prepare_env.py CHANGED
@@ -128,14 +128,15 @@ def create_vectorstore_from_json(json_path: str, collection_name: str, embedding
128
  print(f"βœ… Vector store created with collection: {collection_name}")
129
  return vectorstore, documents
130
 
131
- def create_retriever(vectorstore, docs, llm, get_all: bool = False):
132
  """Create ensemble retriever with vector and BM25 search
133
 
134
  Args:
135
  vectorstore: The vector store for similarity search
136
  docs: Documents for BM25 retriever
137
  llm: Language model for multi-query generation
138
- get_all: If True, configure retriever to return all documents
 
139
 
140
  Returns:
141
  Configured retriever (MultiQueryRetriever or EnsembleRetriever)
@@ -162,15 +163,7 @@ def create_retriever(vectorstore, docs, llm, get_all: bool = False):
162
  Provide only the alternative questions, one per line."""
163
  )
164
 
165
- # Determine k values based on get_all parameter
166
- if get_all:
167
- vector_k = len(docs) # Get all documents
168
- bm25_k = len(docs) # Get all documents
169
- print(f"πŸ“„ GET_ALL mode: Setting k={len(docs)} (total documents)")
170
- else:
171
- vector_k = 6
172
- bm25_k = 3
173
- print(f"🎯 FILTERED mode: Vector k={vector_k}, BM25 k={bm25_k}")
174
 
175
  # Vector retriever
176
  vector_retriever = vectorstore.as_retriever(
@@ -191,10 +184,6 @@ def create_retriever(vectorstore, docs, llm, get_all: bool = False):
191
  )
192
  print("βœ… Ensemble retriever created (weights: 0.5, 0.5)")
193
 
194
- # If get_all is True, return ensemble retriever directly to avoid query processing overhead
195
- if get_all:
196
- print("πŸ“‹ Returning ensemble retriever (bypassing MultiQuery for get_all mode)")
197
- return ensemble_retriever
198
 
199
  # Multi-query expanding retriever (only for filtered mode)
200
  expanding_retriever = MultiQueryRetriever.from_llm(
@@ -295,7 +284,7 @@ def create_section_tools(embedding_function, llm):
295
  if os.path.exists(path):
296
  print(f"πŸ“ Creating retriever for section {section} from {path}")
297
  vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
298
- section_retrievers[section] = create_retriever(vstore, docs, llm, get_all=True)
299
  print(f"βœ… Successfully created retriever for section {section}")
300
  else:
301
  print(f"⚠️ Warning: File not found for section {section}: {path}")
 
128
  print(f"βœ… Vector store created with collection: {collection_name}")
129
  return vectorstore, documents
130
 
131
+ def create_retriever(vectorstore, docs, llm, bm25_k=3,vector_k=6):
132
  """Create ensemble retriever with vector and BM25 search
133
 
134
  Args:
135
  vectorstore: The vector store for similarity search
136
  docs: Documents for BM25 retriever
137
  llm: Language model for multi-query generation
138
+ bm25_k: Number of documents to retrieve with BM25
139
+ vector_k: Number of documents to retrieve with vector search
140
 
141
  Returns:
142
  Configured retriever (MultiQueryRetriever or EnsembleRetriever)
 
163
  Provide only the alternative questions, one per line."""
164
  )
165
 
166
+
 
 
 
 
 
 
 
 
167
 
168
  # Vector retriever
169
  vector_retriever = vectorstore.as_retriever(
 
184
  )
185
  print("βœ… Ensemble retriever created (weights: 0.5, 0.5)")
186
 
 
 
 
 
187
 
188
  # Multi-query expanding retriever (only for filtered mode)
189
  expanding_retriever = MultiQueryRetriever.from_llm(
 
284
  if os.path.exists(path):
285
  print(f"πŸ“ Creating retriever for section {section} from {path}")
286
  vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
287
+ section_retrievers[section] = create_retriever(vstore, docs, llm, bm25_k=5, vector_k=7)
288
  print(f"βœ… Successfully created retriever for section {section}")
289
  else:
290
  print(f"⚠️ Warning: File not found for section {section}: {path}")