Zeggai Abdellah commited on
Commit
744fb55
·
1 Parent(s): 4346bfa

add the two mine files

Browse files
config.py DELETED
@@ -1,8 +0,0 @@
1
-
2
- class Config:
3
- GOOGLE_API_KEY_1 = "AIzaSyDsbC8H6e08TKDwa5WPE3SiBA39e20K4co"
4
- GOOGLE_API_KEY_2 = "AIzaSyBho3W4W9fR7wHUJbX18JKH-12wDSD7pWg"
5
- BASE_PATH = "./data" # Configurable base path
6
- EMBEDDING_MODEL = "intfloat/multilingual-e5-base"
7
- LLM_MODEL = "models/gemini-2.0-flash"
8
- CHROMA_DB_PATH = "chroma_db_multilingual"
 
 
 
 
 
 
 
 
 
data/Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json ADDED
The diff for this file is too large to render. See raw diff
 
data/Immunization in Practice_WHO_eng_2015.json ADDED
The diff for this file is too large to render. See raw diff
 
prepare_env.py CHANGED
@@ -199,75 +199,89 @@ def create_section_tools(embedding_function, llm):
199
 
200
  # Define section paths
201
  section_paths = {
202
- # 'one': 'section_one_chunks.json',
203
- # 'two': 'section_two_chunks.json',
204
- # 'three': 'section_three_chunks.json',
205
- # 'four': 'section_four_chunks.json',
206
- # 'five': 'section_five_chunks.json',
207
- # 'six': 'section_six_chunks.json',
208
- # 'seven': 'section_seven_chunks.json',
209
- # 'eight': 'section_eight_chunks.json',
210
- # 'nine': 'section_nine_chunks.json',
211
- 'ten': './data/section_ten_chunks.json'
212
  }
213
 
214
  # Create retrievers for each section
215
  section_retrievers = {}
216
  for section, path in section_paths.items():
217
  if os.path.exists(path):
218
- vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
219
  section_retrievers[section] = create_retriever(vstore, docs, llm)
220
 
221
- # # Create main guide retriever
222
- # guide_path = 'Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json'
223
- # if os.path.exists(guide_path):
224
- # guide_vstore, guide_docs = create_vectorstore_from_json(guide_path, "Guide_2023_multilingual", embedding_function)
225
- # guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
226
- # else:
227
- # guide_retriever = None
228
-
229
- # # Define tool functions
230
- # def guide_retrieval_tool(query: str) -> str:
231
- # """General-purpose retrieval tool for the entire Algerian National Vaccination Guide"""
232
- # if not guide_retriever:
233
- # return "Guide retriever not available"
234
- # return section_tool_wrapper(guide_retriever, guide_path, query)
235
-
236
- # def section_one_tool(query: str) -> str:
237
- # """Section 1: Programme Élargi de Vaccination"""
238
- # return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)
239
-
240
- # def section_two_tool(query: str) -> str:
241
- # """Section 2: Maladies Ciblées"""
242
- # return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)
243
-
244
- # def section_three_tool(query: str) -> str:
245
- # """Section 3: Vaccins du Calendrier"""
246
- # return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)
247
-
248
- # def section_four_tool(query: str) -> str:
249
- # """Section 4: Rattrapage Vaccinal"""
250
- # return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)
251
-
252
- # def section_five_tool(query: str) -> str:
253
- # """Section 5: Populations Particulières"""
254
- # return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)
255
-
256
- # def section_six_tool(query: str) -> str:
257
- # """Section 6: Chaîne du Froid"""
258
- # return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)
259
-
260
- # def section_seven_tool(query: str) -> str:
261
- # """Section 7: Sécurité des Injections"""
262
- # return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)
263
-
264
- # def section_eight_tool(query: str) -> str:
265
- # """Section 8: Séance de Vaccination & Vaccinovigilance"""
266
- # return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)
267
-
268
- # def section_nine_tool(query: str) -> str:
269
- # """Section 9: Planification des Séances de Vaccination"""
270
- # return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  def section_ten_tool(query: str) -> str:
273
  """Section 10: Mobilisation Sociale"""
@@ -275,16 +289,18 @@ def create_section_tools(embedding_function, llm):
275
 
276
  # Create FunctionTool objects
277
  tools = [
278
- # FunctionTool.from_defaults(name="Guide_vector_tool", fn=guide_retrieval_tool),
279
- # FunctionTool.from_defaults(name="section_one_vector_query_tool", fn=section_one_tool),
280
- # FunctionTool.from_defaults(name="section_two_vector_query_tool", fn=section_two_tool),
281
- # FunctionTool.from_defaults(name="section_three_vector_query_tool", fn=section_three_tool),
282
- # FunctionTool.from_defaults(name="section_four_vector_query_tool", fn=section_four_tool),
283
- # FunctionTool.from_defaults(name="section_five_vector_query_tool", fn=section_five_tool),
284
- # FunctionTool.from_defaults(name="section_six_vector_query_tool", fn=section_six_tool),
285
- # FunctionTool.from_defaults(name="section_seven_vector_query_tool", fn=section_seven_tool),
286
- # FunctionTool.from_defaults(name="section_eight_vector_query_tool", fn=section_eight_tool),
287
- # FunctionTool.from_defaults(name="section_nine_vector_query_tool", fn=section_nine_tool),
 
 
288
  FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
289
  ]
290
 
 
199
 
200
  # Define section paths
201
  section_paths = {
202
+ 'one': 'section_one_chunks.json',
203
+ 'two': 'section_two_chunks.json',
204
+ 'three': 'section_three_chunks.json',
205
+ 'four': 'section_four_chunks.json',
206
+ 'five': 'section_five_chunks.json',
207
+ 'six': 'section_six_chunks.json',
208
+ 'seven': 'section_seven_chunks.json',
209
+ 'eight': 'section_eight_chunks.json',
210
+ 'nine': 'section_nine_chunks.json',
211
+ 'ten': 'section_ten_chunks.json'
212
  }
213
 
214
  # Create retrievers for each section
215
  section_retrievers = {}
216
  for section, path in section_paths.items():
217
  if os.path.exists(path):
218
+ vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
219
  section_retrievers[section] = create_retriever(vstore, docs, llm)
220
 
221
+ # Create main guide retriever
222
+ guide_path = './data/Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json'
223
+ if os.path.exists(guide_path):
224
+ guide_vstore, guide_docs = create_vectorstore_from_json(guide_path, "Guide_2023_multilingual", embedding_function)
225
+ guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
226
+ else:
227
+ guide_retriever = None
228
+
229
+ # Define tool functions
230
+ def guide_retrieval_tool(query: str) -> str:
231
+ """General-purpose retrieval tool for the entire Algerian National Vaccination Guide"""
232
+ if not guide_retriever:
233
+ return "Guide retriever not available"
234
+ return section_tool_wrapper(guide_retriever, guide_path, query)
235
+
236
+ # Immunization in Practice_WHO_eng_2015 retriever
237
+ immunization_path = './data/Immunization_in_Practice_WHO_eng_2015.json'
238
+ if os.path.exists(immunization_path):
239
+ immunization_vstore, immunization_docs = create_vectorstore_from_json(immunization_path, "Immunization_in_Practice_WHO_eng_2015", embedding_function)
240
+ immunization_retriever = create_retriever(immunization_vstore, immunization_docs, llm)
241
+ else:
242
+ immunization_retriever = None
243
+
244
+ def immunization_tool(query: str) -> str:
245
+ """Immunization in Practice WHO 2015 retrieval tool"""
246
+ if not immunization_retriever:
247
+ return "Immunization in Practice retriever not available"
248
+ return section_tool_wrapper(immunization_retriever, immunization_path, query)
249
+
250
+ def section_one_tool(query: str) -> str:
251
+ """Section 1: Programme Élargi de Vaccination"""
252
+ return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)
253
+
254
+ def section_two_tool(query: str) -> str:
255
+ """Section 2: Maladies Ciblées"""
256
+ return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)
257
+
258
+ def section_three_tool(query: str) -> str:
259
+ """Section 3: Vaccins du Calendrier"""
260
+ return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)
261
+
262
+ def section_four_tool(query: str) -> str:
263
+ """Section 4: Rattrapage Vaccinal"""
264
+ return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)
265
+
266
+ def section_five_tool(query: str) -> str:
267
+ """Section 5: Populations Particulières"""
268
+ return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)
269
+
270
+ def section_six_tool(query: str) -> str:
271
+ """Section 6: Chaîne du Froid"""
272
+ return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)
273
+
274
+ def section_seven_tool(query: str) -> str:
275
+ """Section 7: Sécurité des Injections"""
276
+ return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)
277
+
278
+ def section_eight_tool(query: str) -> str:
279
+ """Section 8: Séance de Vaccination & Vaccinovigilance"""
280
+ return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)
281
+
282
+ def section_nine_tool(query: str) -> str:
283
+ """Section 9: Planification des Séances de Vaccination"""
284
+ return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)
285
 
286
  def section_ten_tool(query: str) -> str:
287
  """Section 10: Mobilisation Sociale"""
 
289
 
290
  # Create FunctionTool objects
291
  tools = [
292
+ FunctionTool.from_defaults(name="Guide_vector_tool", fn=guide_retrieval_tool),
293
+ FunctionTool.from_defaults(name="Immunization_in_Practice_tool", fn=immunization_tool),
294
+ # Section-specific tools
295
+ FunctionTool.from_defaults(name="section_one_vector_query_tool", fn=section_one_tool),
296
+ FunctionTool.from_defaults(name="section_two_vector_query_tool", fn=section_two_tool),
297
+ FunctionTool.from_defaults(name="section_three_vector_query_tool", fn=section_three_tool),
298
+ FunctionTool.from_defaults(name="section_four_vector_query_tool", fn=section_four_tool),
299
+ FunctionTool.from_defaults(name="section_five_vector_query_tool", fn=section_five_tool),
300
+ FunctionTool.from_defaults(name="section_six_vector_query_tool", fn=section_six_tool),
301
+ FunctionTool.from_defaults(name="section_seven_vector_query_tool", fn=section_seven_tool),
302
+ FunctionTool.from_defaults(name="section_eight_vector_query_tool", fn=section_eight_tool),
303
+ FunctionTool.from_defaults(name="section_nine_vector_query_tool", fn=section_nine_tool),
304
  FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
305
  ]
306
 
rag_pipeline.py CHANGED
@@ -266,7 +266,9 @@ def process_question_with_citations(agent, question: str, chunks_directory="./da
266
 
267
  # Load all chunks data to find cited elements
268
  all_chunks_data = []
269
- for json_file in os.listdir(chunks_directory):
 
 
270
  if json_file.endswith('.json'):
271
  json_path = os.path.join(chunks_directory, json_file)
272
  try:
 
266
 
267
  # Load all chunks data to find cited elements
268
  all_chunks_data = []
269
+ # the ids is only in the two main files, so we can load them all at once
270
+ min_chunks_files = ["Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json", "Immunization in Practice_WHO_eng_2015.json"]
271
+ for json_file in min_chunks_files:
272
  if json_file.endswith('.json'):
273
  json_path = os.path.join(chunks_directory, json_file)
274
  try: