Spaces:
Sleeping
Sleeping
Zeggai Abdellah
commited on
Commit
·
744fb55
1
Parent(s):
4346bfa
add the two mine files
Browse files
config.py
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
class Config:
|
| 3 |
-
GOOGLE_API_KEY_1 = "AIzaSyDsbC8H6e08TKDwa5WPE3SiBA39e20K4co"
|
| 4 |
-
GOOGLE_API_KEY_2 = "AIzaSyBho3W4W9fR7wHUJbX18JKH-12wDSD7pWg"
|
| 5 |
-
BASE_PATH = "./data" # Configurable base path
|
| 6 |
-
EMBEDDING_MODEL = "intfloat/multilingual-e5-base"
|
| 7 |
-
LLM_MODEL = "models/gemini-2.0-flash"
|
| 8 |
-
CHROMA_DB_PATH = "chroma_db_multilingual"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/Immunization in Practice_WHO_eng_2015.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prepare_env.py
CHANGED
|
@@ -199,75 +199,89 @@ def create_section_tools(embedding_function, llm):
|
|
| 199 |
|
| 200 |
# Define section paths
|
| 201 |
section_paths = {
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
'ten': '
|
| 212 |
}
|
| 213 |
|
| 214 |
# Create retrievers for each section
|
| 215 |
section_retrievers = {}
|
| 216 |
for section, path in section_paths.items():
|
| 217 |
if os.path.exists(path):
|
| 218 |
-
vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
|
| 219 |
section_retrievers[section] = create_retriever(vstore, docs, llm)
|
| 220 |
|
| 221 |
-
#
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
#
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
#
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
def section_ten_tool(query: str) -> str:
|
| 273 |
"""Section 10: Mobilisation Sociale"""
|
|
@@ -275,16 +289,18 @@ def create_section_tools(embedding_function, llm):
|
|
| 275 |
|
| 276 |
# Create FunctionTool objects
|
| 277 |
tools = [
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
#
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
|
| 289 |
]
|
| 290 |
|
|
|
|
| 199 |
|
| 200 |
# Define section paths
|
| 201 |
section_paths = {
|
| 202 |
+
'one': 'section_one_chunks.json',
|
| 203 |
+
'two': 'section_two_chunks.json',
|
| 204 |
+
'three': 'section_three_chunks.json',
|
| 205 |
+
'four': 'section_four_chunks.json',
|
| 206 |
+
'five': 'section_five_chunks.json',
|
| 207 |
+
'six': 'section_six_chunks.json',
|
| 208 |
+
'seven': 'section_seven_chunks.json',
|
| 209 |
+
'eight': 'section_eight_chunks.json',
|
| 210 |
+
'nine': 'section_nine_chunks.json',
|
| 211 |
+
'ten': 'section_ten_chunks.json'
|
| 212 |
}
|
| 213 |
|
| 214 |
# Create retrievers for each section
|
| 215 |
section_retrievers = {}
|
| 216 |
for section, path in section_paths.items():
|
| 217 |
if os.path.exists(path):
|
| 218 |
+
vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
|
| 219 |
section_retrievers[section] = create_retriever(vstore, docs, llm)
|
| 220 |
|
| 221 |
+
# Create main guide retriever
|
| 222 |
+
guide_path = './data/Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json'
|
| 223 |
+
if os.path.exists(guide_path):
|
| 224 |
+
guide_vstore, guide_docs = create_vectorstore_from_json(guide_path, "Guide_2023_multilingual", embedding_function)
|
| 225 |
+
guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
|
| 226 |
+
else:
|
| 227 |
+
guide_retriever = None
|
| 228 |
+
|
| 229 |
+
# Define tool functions
|
| 230 |
+
def guide_retrieval_tool(query: str) -> str:
|
| 231 |
+
"""General-purpose retrieval tool for the entire Algerian National Vaccination Guide"""
|
| 232 |
+
if not guide_retriever:
|
| 233 |
+
return "Guide retriever not available"
|
| 234 |
+
return section_tool_wrapper(guide_retriever, guide_path, query)
|
| 235 |
+
|
| 236 |
+
# Immunization in Practice_WHO_eng_2015 retriever
|
| 237 |
+
immunization_path = './data/Immunization_in_Practice_WHO_eng_2015.json'
|
| 238 |
+
if os.path.exists(immunization_path):
|
| 239 |
+
immunization_vstore, immunization_docs = create_vectorstore_from_json(immunization_path, "Immunization_in_Practice_WHO_eng_2015", embedding_function)
|
| 240 |
+
immunization_retriever = create_retriever(immunization_vstore, immunization_docs, llm)
|
| 241 |
+
else:
|
| 242 |
+
immunization_retriever = None
|
| 243 |
+
|
| 244 |
+
def immunization_tool(query: str) -> str:
|
| 245 |
+
"""Immunization in Practice WHO 2015 retrieval tool"""
|
| 246 |
+
if not immunization_retriever:
|
| 247 |
+
return "Immunization in Practice retriever not available"
|
| 248 |
+
return section_tool_wrapper(immunization_retriever, immunization_path, query)
|
| 249 |
+
|
| 250 |
+
def section_one_tool(query: str) -> str:
|
| 251 |
+
"""Section 1: Programme Élargi de Vaccination"""
|
| 252 |
+
return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)
|
| 253 |
+
|
| 254 |
+
def section_two_tool(query: str) -> str:
|
| 255 |
+
"""Section 2: Maladies Ciblées"""
|
| 256 |
+
return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)
|
| 257 |
+
|
| 258 |
+
def section_three_tool(query: str) -> str:
|
| 259 |
+
"""Section 3: Vaccins du Calendrier"""
|
| 260 |
+
return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)
|
| 261 |
+
|
| 262 |
+
def section_four_tool(query: str) -> str:
|
| 263 |
+
"""Section 4: Rattrapage Vaccinal"""
|
| 264 |
+
return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)
|
| 265 |
+
|
| 266 |
+
def section_five_tool(query: str) -> str:
|
| 267 |
+
"""Section 5: Populations Particulières"""
|
| 268 |
+
return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)
|
| 269 |
+
|
| 270 |
+
def section_six_tool(query: str) -> str:
|
| 271 |
+
"""Section 6: Chaîne du Froid"""
|
| 272 |
+
return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)
|
| 273 |
+
|
| 274 |
+
def section_seven_tool(query: str) -> str:
|
| 275 |
+
"""Section 7: Sécurité des Injections"""
|
| 276 |
+
return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)
|
| 277 |
+
|
| 278 |
+
def section_eight_tool(query: str) -> str:
|
| 279 |
+
"""Section 8: Séance de Vaccination & Vaccinovigilance"""
|
| 280 |
+
return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)
|
| 281 |
+
|
| 282 |
+
def section_nine_tool(query: str) -> str:
|
| 283 |
+
"""Section 9: Planification des Séances de Vaccination"""
|
| 284 |
+
return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)
|
| 285 |
|
| 286 |
def section_ten_tool(query: str) -> str:
|
| 287 |
"""Section 10: Mobilisation Sociale"""
|
|
|
|
| 289 |
|
| 290 |
# Create FunctionTool objects
|
| 291 |
tools = [
|
| 292 |
+
FunctionTool.from_defaults(name="Guide_vector_tool", fn=guide_retrieval_tool),
|
| 293 |
+
FunctionTool.from_defaults(name="Immunization_in_Practice_tool", fn=immunization_tool),
|
| 294 |
+
# Section-specific tools
|
| 295 |
+
FunctionTool.from_defaults(name="section_one_vector_query_tool", fn=section_one_tool),
|
| 296 |
+
FunctionTool.from_defaults(name="section_two_vector_query_tool", fn=section_two_tool),
|
| 297 |
+
FunctionTool.from_defaults(name="section_three_vector_query_tool", fn=section_three_tool),
|
| 298 |
+
FunctionTool.from_defaults(name="section_four_vector_query_tool", fn=section_four_tool),
|
| 299 |
+
FunctionTool.from_defaults(name="section_five_vector_query_tool", fn=section_five_tool),
|
| 300 |
+
FunctionTool.from_defaults(name="section_six_vector_query_tool", fn=section_six_tool),
|
| 301 |
+
FunctionTool.from_defaults(name="section_seven_vector_query_tool", fn=section_seven_tool),
|
| 302 |
+
FunctionTool.from_defaults(name="section_eight_vector_query_tool", fn=section_eight_tool),
|
| 303 |
+
FunctionTool.from_defaults(name="section_nine_vector_query_tool", fn=section_nine_tool),
|
| 304 |
FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
|
| 305 |
]
|
| 306 |
|
rag_pipeline.py
CHANGED
|
@@ -266,7 +266,9 @@ def process_question_with_citations(agent, question: str, chunks_directory="./da
|
|
| 266 |
|
| 267 |
# Load all chunks data to find cited elements
|
| 268 |
all_chunks_data = []
|
| 269 |
-
|
|
|
|
|
|
|
| 270 |
if json_file.endswith('.json'):
|
| 271 |
json_path = os.path.join(chunks_directory, json_file)
|
| 272 |
try:
|
|
|
|
| 266 |
|
| 267 |
# Load all chunks data to find cited elements
|
| 268 |
all_chunks_data = []
|
| 269 |
+
# the ids is only in the two main files, so we can load them all at once
|
| 270 |
+
min_chunks_files = ["Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json", "Immunization in Practice_WHO_eng_2015.json"]
|
| 271 |
+
for json_file in min_chunks_files:
|
| 272 |
if json_file.endswith('.json'):
|
| 273 |
json_path = os.path.join(chunks_directory, json_file)
|
| 274 |
try:
|