Spaces:
Sleeping
Sleeping
Zeggai Abdellah
commited on
Commit
·
c0e5c04
1
Parent(s):
5a74e30
update the Immunization_in_Practice_tool tool
Browse files- prepare_env.py +217 -121
- rag_pipeline.py +110 -79
prepare_env.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
Environment preparation script for vaccine assistant
|
| 4 |
-
Creates vector stores and retrieval tools
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
@@ -56,6 +56,11 @@ def extract_source_ids(response_text):
|
|
| 56 |
# Get unique source IDs
|
| 57 |
source_ids = list(set(all_ids))
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
if not source_ids:
|
| 60 |
print("Warning: No valid source IDs found after filtering.")
|
| 61 |
return []
|
|
@@ -70,17 +75,15 @@ def setup_models():
|
|
| 70 |
model_name="intfloat/multilingual-e5-base"
|
| 71 |
)
|
| 72 |
|
| 73 |
-
# Initialize LLM
|
| 74 |
genai_api_key = os.getenv('GOOGLE_API_KEY')
|
| 75 |
llm = ChatGoogleGenerativeAI(
|
| 76 |
model="gemini-2.0-flash",
|
| 77 |
-
google_api_key=genai_api_key
|
| 78 |
-
temperature=0.1 # Lower temperature for more focused responses
|
| 79 |
)
|
| 80 |
|
| 81 |
return embedding_function, llm
|
| 82 |
|
| 83 |
-
|
| 84 |
def create_vectorstore_from_json(json_path: str, collection_name: str, embedding_function):
|
| 85 |
"""Create vector store from JSON chunks"""
|
| 86 |
# Load the chunks.json
|
|
@@ -112,13 +115,12 @@ def create_vectorstore_from_json(json_path: str, collection_name: str, embedding
|
|
| 112 |
)
|
| 113 |
return vectorstore, documents
|
| 114 |
|
| 115 |
-
|
| 116 |
def create_retriever(vectorstore, docs, llm):
|
| 117 |
"""Create ensemble retriever with vector and BM25 search"""
|
| 118 |
# Vector retriever
|
| 119 |
vector_retriever = vectorstore.as_retriever(
|
| 120 |
search_type="similarity",
|
| 121 |
-
search_kwargs={"k":
|
| 122 |
)
|
| 123 |
|
| 124 |
# BM25 retriever
|
|
@@ -131,7 +133,7 @@ def create_retriever(vectorstore, docs, llm):
|
|
| 131 |
weights=[0.5, 0.5]
|
| 132 |
)
|
| 133 |
|
| 134 |
-
# Multi-query expanding retriever
|
| 135 |
expanding_retriever = MultiQueryRetriever.from_llm(
|
| 136 |
retriever=ensemble_retriever,
|
| 137 |
llm=llm
|
|
@@ -139,7 +141,6 @@ def create_retriever(vectorstore, docs, llm):
|
|
| 139 |
|
| 140 |
return expanding_retriever
|
| 141 |
|
| 142 |
-
|
| 143 |
def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextNode]:
|
| 144 |
"""Convert ChromaDB Document objects to LlamaIndex TextNode objects"""
|
| 145 |
nodes = []
|
|
@@ -161,9 +162,8 @@ def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextN
|
|
| 161 |
continue
|
| 162 |
return nodes
|
| 163 |
|
| 164 |
-
|
| 165 |
def section_tool_wrapper(retriever, section_path_chunks, query):
|
| 166 |
-
"""Generic section tool wrapper
|
| 167 |
try:
|
| 168 |
retrieved_docs = retriever.get_relevant_documents(query)
|
| 169 |
nodes_from_retrieved_docs = convert_chromadb_to_llamaindex_nodes(retrieved_docs)
|
|
@@ -178,15 +178,13 @@ def section_tool_wrapper(retriever, section_path_chunks, query):
|
|
| 178 |
chunks_unique = [node for node in chunks_data if node.get('element_id', 'Unknown') in chunk_ids]
|
| 179 |
combined_text = []
|
| 180 |
|
| 181 |
-
|
| 182 |
-
max_chunks = 8 # Reasonable limit
|
| 183 |
-
for chu in chunks_unique[:max_chunks]:
|
| 184 |
if "TableElement" == chu["type"]:
|
| 185 |
-
text = f"[{chu['element_id']}]\n CONTENT: \n{chu['text']}\n HTML: \n {chu['table_text_as_html']} \n\n"
|
| 186 |
combined_text.append(text)
|
| 187 |
else:
|
| 188 |
for element in chu["elements"]:
|
| 189 |
-
text = f"[{element['element_id']}]\n CONTENT: \n{element['text']} \n\n"
|
| 190 |
combined_text.append(text)
|
| 191 |
|
| 192 |
result = "\n---\n".join(combined_text)
|
|
@@ -196,9 +194,8 @@ def section_tool_wrapper(retriever, section_path_chunks, query):
|
|
| 196 |
print(f"Error in section tool: {e}")
|
| 197 |
return f"Error retrieving documents: {str(e)}"
|
| 198 |
|
| 199 |
-
|
| 200 |
def create_section_tools(embedding_function, llm):
|
| 201 |
-
"""Create all section-specific retrieval tools
|
| 202 |
|
| 203 |
# Define section paths
|
| 204 |
section_paths = {
|
|
@@ -217,7 +214,7 @@ def create_section_tools(embedding_function, llm):
|
|
| 217 |
# Create retrievers for each section
|
| 218 |
section_retrievers = {}
|
| 219 |
for section, path in section_paths.items():
|
| 220 |
-
if os.path.exists(
|
| 221 |
vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
|
| 222 |
section_retrievers[section] = create_retriever(vstore, docs, llm)
|
| 223 |
|
|
@@ -228,9 +225,29 @@ def create_section_tools(embedding_function, llm):
|
|
| 228 |
guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
|
| 229 |
else:
|
| 230 |
guide_retriever = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
# Primary + Secondary Document Paths
|
| 233 |
-
immunization_path = './data/
|
| 234 |
|
| 235 |
# WHO Immunization in Practice Tool
|
| 236 |
if os.path.exists(immunization_path):
|
|
@@ -243,134 +260,213 @@ def create_section_tools(embedding_function, llm):
|
|
| 243 |
else:
|
| 244 |
immunization_retriever = None
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
def guide_retrieval_tool(query: str) -> str:
|
| 249 |
"""
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
- Disease definitions and descriptions
|
| 257 |
-
- Vaccine schedules and protocols
|
| 258 |
-
- Comparative questions needing Algerian perspective
|
| 259 |
-
- Any question about Algeria's vaccination program
|
| 260 |
-
|
| 261 |
-
**Keywords that indicate this tool:** Algeria, Algerian, national, calendrier, vaccination, PEV, diseases (diphteria, polio, measles, etc.)
|
| 262 |
-
|
| 263 |
Args:
|
| 264 |
-
query (str):
|
| 265 |
-
|
| 266 |
Returns:
|
| 267 |
-
str:
|
| 268 |
"""
|
| 269 |
-
if not
|
| 270 |
-
return "
|
| 271 |
-
return section_tool_wrapper(
|
| 272 |
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
"""
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
**Keywords that indicate this tool:** WHO, international, global, best practices, standards
|
| 286 |
-
|
| 287 |
Args:
|
| 288 |
-
query (str):
|
| 289 |
-
|
| 290 |
Returns:
|
| 291 |
-
str:
|
| 292 |
"""
|
| 293 |
-
|
| 294 |
-
return "Immunization in Practice retriever not available"
|
| 295 |
-
return section_tool_wrapper(immunization_retriever, immunization_path, query)
|
| 296 |
|
| 297 |
-
# Section-Specific Tools (USE ONLY IF QUESTION IS VERY SPECIFIC TO THE SECTION)
|
| 298 |
|
| 299 |
def section_two_tool(query: str) -> str:
|
| 300 |
"""
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
**Keywords:** definition, symptoms, transmission, complications, disease characteristics
|
| 311 |
-
|
| 312 |
Args:
|
| 313 |
-
query (str):
|
| 314 |
-
|
| 315 |
Returns:
|
| 316 |
-
str: Disease-specific
|
| 317 |
"""
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
return section_tool_wrapper(section_retrievers['two'], f'./data/{section_paths["two"]}', query)
|
| 321 |
|
| 322 |
def section_three_tool(query: str) -> str:
|
| 323 |
"""
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
- "What type of vaccine is used for diphtheria?"
|
| 330 |
-
- "How is the MMR vaccine administered?"
|
| 331 |
-
- "Vaccine composition and dosage"
|
| 332 |
-
|
| 333 |
-
**Keywords:** vaccine type, composition, administration, dosage, technical details
|
| 334 |
-
|
| 335 |
Args:
|
| 336 |
-
query (str):
|
| 337 |
-
|
| 338 |
Returns:
|
| 339 |
-
str:
|
| 340 |
"""
|
| 341 |
-
|
| 342 |
-
return "Section 3 retriever not available"
|
| 343 |
-
return section_tool_wrapper(section_retrievers['three'], f'./data/{section_paths["three"]}', query)
|
| 344 |
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
tools = [
|
| 347 |
-
|
| 348 |
-
FunctionTool.from_defaults(
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
),
|
| 353 |
-
FunctionTool.from_defaults(
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
),
|
| 358 |
-
|
| 359 |
-
FunctionTool.from_defaults(
|
| 360 |
-
name="disease_definitions_search",
|
| 361 |
-
fn=section_two_tool,
|
| 362 |
-
description="SPECIALIZED: Search for specific disease definitions, symptoms, and characteristics"
|
| 363 |
-
),
|
| 364 |
-
FunctionTool.from_defaults(
|
| 365 |
-
name="vaccine_technical_search",
|
| 366 |
-
fn=section_three_tool,
|
| 367 |
-
description="SPECIALIZED: Search for technical vaccine details, composition, and administration methods"
|
| 368 |
-
),
|
| 369 |
]
|
| 370 |
|
| 371 |
return tools
|
| 372 |
|
| 373 |
-
|
| 374 |
def prepare_environment():
|
| 375 |
"""Main function to prepare the environment and return tools"""
|
| 376 |
print("Setting up models...")
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
Environment preparation script for vaccine assistant
|
| 4 |
+
Creates vector stores and retrieval tools
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
|
|
| 56 |
# Get unique source IDs
|
| 57 |
source_ids = list(set(all_ids))
|
| 58 |
|
| 59 |
+
# Filter out any non-UUID-like IDs (if needed)
|
| 60 |
+
# This is now optional as we're handling various source ID formats
|
| 61 |
+
# uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$'
|
| 62 |
+
# source_ids = [source_id for source_id in source_ids if re.match(uuid_pattern, source_id, re.IGNORECASE)]
|
| 63 |
+
|
| 64 |
if not source_ids:
|
| 65 |
print("Warning: No valid source IDs found after filtering.")
|
| 66 |
return []
|
|
|
|
| 75 |
model_name="intfloat/multilingual-e5-base"
|
| 76 |
)
|
| 77 |
|
| 78 |
+
# Initialize LLM
|
| 79 |
genai_api_key = os.getenv('GOOGLE_API_KEY')
|
| 80 |
llm = ChatGoogleGenerativeAI(
|
| 81 |
model="gemini-2.0-flash",
|
| 82 |
+
google_api_key=genai_api_key
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
return embedding_function, llm
|
| 86 |
|
|
|
|
| 87 |
def create_vectorstore_from_json(json_path: str, collection_name: str, embedding_function):
|
| 88 |
"""Create vector store from JSON chunks"""
|
| 89 |
# Load the chunks.json
|
|
|
|
| 115 |
)
|
| 116 |
return vectorstore, documents
|
| 117 |
|
|
|
|
| 118 |
def create_retriever(vectorstore, docs, llm):
|
| 119 |
"""Create ensemble retriever with vector and BM25 search"""
|
| 120 |
# Vector retriever
|
| 121 |
vector_retriever = vectorstore.as_retriever(
|
| 122 |
search_type="similarity",
|
| 123 |
+
search_kwargs={"k": 6}
|
| 124 |
)
|
| 125 |
|
| 126 |
# BM25 retriever
|
|
|
|
| 133 |
weights=[0.5, 0.5]
|
| 134 |
)
|
| 135 |
|
| 136 |
+
# Multi-query expanding retriever
|
| 137 |
expanding_retriever = MultiQueryRetriever.from_llm(
|
| 138 |
retriever=ensemble_retriever,
|
| 139 |
llm=llm
|
|
|
|
| 141 |
|
| 142 |
return expanding_retriever
|
| 143 |
|
|
|
|
| 144 |
def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextNode]:
|
| 145 |
"""Convert ChromaDB Document objects to LlamaIndex TextNode objects"""
|
| 146 |
nodes = []
|
|
|
|
| 162 |
continue
|
| 163 |
return nodes
|
| 164 |
|
|
|
|
| 165 |
def section_tool_wrapper(retriever, section_path_chunks, query):
|
| 166 |
+
"""Generic section tool wrapper"""
|
| 167 |
try:
|
| 168 |
retrieved_docs = retriever.get_relevant_documents(query)
|
| 169 |
nodes_from_retrieved_docs = convert_chromadb_to_llamaindex_nodes(retrieved_docs)
|
|
|
|
| 178 |
chunks_unique = [node for node in chunks_data if node.get('element_id', 'Unknown') in chunk_ids]
|
| 179 |
combined_text = []
|
| 180 |
|
| 181 |
+
for chu in chunks_unique:
|
|
|
|
|
|
|
| 182 |
if "TableElement" == chu["type"]:
|
| 183 |
+
text = f"[Source: {chu['element_id']}]\n CONTENT: \n{chu['text']}\n HTML: \n {chu['table_text_as_html']} \n\n"
|
| 184 |
combined_text.append(text)
|
| 185 |
else:
|
| 186 |
for element in chu["elements"]:
|
| 187 |
+
text = f"[Source: {element['element_id']}]\n CONTENT: \n{element['text']} \n\n"
|
| 188 |
combined_text.append(text)
|
| 189 |
|
| 190 |
result = "\n---\n".join(combined_text)
|
|
|
|
| 194 |
print(f"Error in section tool: {e}")
|
| 195 |
return f"Error retrieving documents: {str(e)}"
|
| 196 |
|
|
|
|
| 197 |
def create_section_tools(embedding_function, llm):
|
| 198 |
+
"""Create all section-specific retrieval tools"""
|
| 199 |
|
| 200 |
# Define section paths
|
| 201 |
section_paths = {
|
|
|
|
| 214 |
# Create retrievers for each section
|
| 215 |
section_retrievers = {}
|
| 216 |
for section, path in section_paths.items():
|
| 217 |
+
if os.path.exists(path):
|
| 218 |
vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
|
| 219 |
section_retrievers[section] = create_retriever(vstore, docs, llm)
|
| 220 |
|
|
|
|
| 225 |
guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
|
| 226 |
else:
|
| 227 |
guide_retriever = None
|
| 228 |
+
# General-purpose tool (entire Algerian guide)
|
| 229 |
+
def guide_retrieval_tool(query: str) -> str:
|
| 230 |
+
"""
|
| 231 |
+
General-purpose retrieval tool for the entire Algerian National Vaccination Guide (2023).
|
| 232 |
+
|
| 233 |
+
Use this tool when a query spans multiple sections or cannot be routed confidently to a specific tool.
|
| 234 |
+
This is the fallback and all-encompassing tool to retrieve any vaccination-related information
|
| 235 |
+
from the national guide.
|
| 236 |
+
|
| 237 |
+
Secondary source: The WHO Immunization Guide can be queried separately via `Immunization_in_Practice_tool`.
|
| 238 |
+
|
| 239 |
+
Args:
|
| 240 |
+
query (str): A general or complex question related to vaccination policy, schedules, or practice.
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
str: Synthesized response based on the full Algerian guide.
|
| 244 |
+
"""
|
| 245 |
+
if not guide_retriever:
|
| 246 |
+
return "Guide retriever not available"
|
| 247 |
+
return section_tool_wrapper(guide_retriever, guide_path, query)
|
| 248 |
|
| 249 |
# Primary + Secondary Document Paths
|
| 250 |
+
immunization_path = './data/Immunization in Practice_WHO_eng_2015.json'
|
| 251 |
|
| 252 |
# WHO Immunization in Practice Tool
|
| 253 |
if os.path.exists(immunization_path):
|
|
|
|
| 260 |
else:
|
| 261 |
immunization_retriever = None
|
| 262 |
|
| 263 |
+
def immunization_tool(query: str) -> str:
|
|
|
|
|
|
|
| 264 |
"""
|
| 265 |
+
WHO Immunization in Practice 2015 retrieval tool.
|
| 266 |
+
|
| 267 |
+
Use this tool to provide global best practices and operational guidance on immunization,
|
| 268 |
+
especially when context or clarification is needed beyond the Algerian national guide.
|
| 269 |
+
This can serve as a secondary source for training, logistics, and procedural reference.
|
| 270 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
Args:
|
| 272 |
+
query (str): A question related to immunization practice in general.
|
| 273 |
+
|
| 274 |
Returns:
|
| 275 |
+
str: Retrieved guidance from the WHO Immunization in Practice manual (2015).
|
| 276 |
"""
|
| 277 |
+
if not immunization_retriever:
|
| 278 |
+
return "Immunization in Practice retriever not available"
|
| 279 |
+
return section_tool_wrapper(immunization_retriever, immunization_path, query)
|
| 280 |
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
# Section-Specific Tools (Primary: Algerian National Vaccination Guide)
|
| 284 |
+
|
| 285 |
+
def section_one_tool(query: str) -> str:
|
| 286 |
"""
|
| 287 |
+
Section 1: Programme Élargi de Vaccination (PEV)
|
| 288 |
+
|
| 289 |
+
Use this tool to retrieve information about the Algerian immunization program:
|
| 290 |
+
its objectives, historical background, strengths and weaknesses, and justification
|
| 291 |
+
for calendar updates.
|
| 292 |
+
|
| 293 |
+
Primary source: Algerian National Vaccination Guide, Section 1.
|
| 294 |
+
Secondary source for operational benchmarks: WHO Immunization in Practice (optional).
|
| 295 |
+
|
|
|
|
|
|
|
|
|
|
| 296 |
Args:
|
| 297 |
+
query (str): A question about Algeria’s national immunization strategy.
|
| 298 |
+
|
| 299 |
Returns:
|
| 300 |
+
str: Relevant content from Section 1 of the guide.
|
| 301 |
"""
|
| 302 |
+
return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)
|
|
|
|
|
|
|
| 303 |
|
|
|
|
| 304 |
|
| 305 |
def section_two_tool(query: str) -> str:
|
| 306 |
"""
|
| 307 |
+
Section 2: Maladies Ciblées par la Vaccination
|
| 308 |
+
|
| 309 |
+
Use this tool for questions about the diseases targeted by the national vaccination calendar:
|
| 310 |
+
symptoms, transmission, complications, and prevention strategies.
|
| 311 |
+
|
| 312 |
+
Primary source: Algerian National Guide, Section 2.
|
| 313 |
+
Secondary source: WHO guide may support contextual insights.
|
| 314 |
+
|
|
|
|
|
|
|
|
|
|
| 315 |
Args:
|
| 316 |
+
query (str): A question about a vaccine-preventable disease (e.g. polio, rougeole).
|
| 317 |
+
|
| 318 |
Returns:
|
| 319 |
+
str: Disease-specific guidance from Section 2.
|
| 320 |
"""
|
| 321 |
+
return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)
|
| 322 |
+
|
|
|
|
| 323 |
|
| 324 |
def section_three_tool(query: str) -> str:
|
| 325 |
"""
|
| 326 |
+
Section 3: Vaccins du Calendrier
|
| 327 |
+
|
| 328 |
+
Use this tool to retrieve technical and procedural information about the vaccines used in the calendar:
|
| 329 |
+
names, contents, administration method, and dosing details.
|
| 330 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
Args:
|
| 332 |
+
query (str): A question about a specific vaccine's type or method of use.
|
| 333 |
+
|
| 334 |
Returns:
|
| 335 |
+
str: Vaccine information from Section 3.
|
| 336 |
"""
|
| 337 |
+
return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)
|
|
|
|
|
|
|
| 338 |
|
| 339 |
+
|
| 340 |
+
def section_four_tool(query: str) -> str:
|
| 341 |
+
"""
|
| 342 |
+
Section 4: Rattrapage Vaccinal
|
| 343 |
+
|
| 344 |
+
Use this tool to determine catch-up strategies for children who missed or delayed one or more doses.
|
| 345 |
+
It provides age-adjusted rescheduling rules and justifications.
|
| 346 |
+
|
| 347 |
+
Args:
|
| 348 |
+
query (str): A question about how to manage missed vaccinations.
|
| 349 |
+
|
| 350 |
+
Returns:
|
| 351 |
+
str: Catch-up guidelines from Section 4.
|
| 352 |
+
"""
|
| 353 |
+
return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def section_five_tool(query: str) -> str:
|
| 357 |
+
"""
|
| 358 |
+
Section 5: Vaccination des Populations Particulières
|
| 359 |
+
|
| 360 |
+
Use this tool to retrieve recommendations for specific medical contexts:
|
| 361 |
+
preterm infants, immunocompromised children, allergies (e.g. eggs), and chronic diseases.
|
| 362 |
+
|
| 363 |
+
Args:
|
| 364 |
+
query (str): A question about vaccination adaptations for vulnerable groups.
|
| 365 |
+
|
| 366 |
+
Returns:
|
| 367 |
+
str: Guidelines from Section 5.
|
| 368 |
+
"""
|
| 369 |
+
return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
def section_six_tool(query: str) -> str:
|
| 373 |
+
"""
|
| 374 |
+
Section 6: Chaîne du Froid
|
| 375 |
+
|
| 376 |
+
Use this tool for logistics, storage conditions, temperature monitoring,
|
| 377 |
+
and emergency procedures in case of cold chain failure.
|
| 378 |
+
|
| 379 |
+
Args:
|
| 380 |
+
query (str): A question about how vaccines should be stored and transported.
|
| 381 |
+
|
| 382 |
+
Returns:
|
| 383 |
+
str: Operational cold chain standards from Section 6.
|
| 384 |
+
"""
|
| 385 |
+
return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def section_seven_tool(query: str) -> str:
|
| 389 |
+
"""
|
| 390 |
+
Section 7: Sécurité des Injections
|
| 391 |
+
|
| 392 |
+
Use this tool to ensure injection safety: handling equipment, preventing needle-stick injuries,
|
| 393 |
+
and disposing of biomedical waste.
|
| 394 |
+
|
| 395 |
+
Args:
|
| 396 |
+
query (str): A question about safe injection practices.
|
| 397 |
+
|
| 398 |
+
Returns:
|
| 399 |
+
str: Procedures and guidelines from Section 7.
|
| 400 |
+
"""
|
| 401 |
+
return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def section_eight_tool(query: str) -> str:
|
| 405 |
+
"""
|
| 406 |
+
Section 8: Tenue d'une Séance de Vaccination & Vaccinovigilance
|
| 407 |
+
|
| 408 |
+
Use this tool to plan and monitor vaccination sessions, including material preparation,
|
| 409 |
+
injection recording, and handling of adverse events post-immunization (AEFI).
|
| 410 |
+
|
| 411 |
+
Args:
|
| 412 |
+
query (str): A question about session operations or vaccine side effect monitoring.
|
| 413 |
+
|
| 414 |
+
Returns:
|
| 415 |
+
str: Guidelines from Section 8.
|
| 416 |
+
"""
|
| 417 |
+
return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def section_nine_tool(query: str) -> str:
|
| 421 |
+
"""
|
| 422 |
+
Section 9: Planification des Séances de Vaccination
|
| 423 |
+
|
| 424 |
+
Use this tool to support logistical planning: mapping, resource estimation,
|
| 425 |
+
scheduling, and stock management.
|
| 426 |
+
|
| 427 |
+
Args:
|
| 428 |
+
query (str): A question about planning and organizing vaccination sessions.
|
| 429 |
+
|
| 430 |
+
Returns:
|
| 431 |
+
str: Recommendations from Section 9.
|
| 432 |
+
"""
|
| 433 |
+
return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def section_ten_tool(query: str) -> str:
|
| 437 |
+
"""
|
| 438 |
+
Section 10: Mobilisation Sociale
|
| 439 |
+
|
| 440 |
+
Use this tool for strategies to increase public engagement, combat vaccine hesitancy,
|
| 441 |
+
and manage misinformation.
|
| 442 |
+
|
| 443 |
+
Args:
|
| 444 |
+
query (str): A question about public communication and trust-building around vaccines.
|
| 445 |
+
|
| 446 |
+
Returns:
|
| 447 |
+
str: Social mobilization approaches from Section 10.
|
| 448 |
+
"""
|
| 449 |
+
return section_tool_wrapper(section_retrievers['ten'], section_paths['ten'], query)
|
| 450 |
+
|
| 451 |
+
# Create FunctionTool objects
|
| 452 |
tools = [
|
| 453 |
+
FunctionTool.from_defaults(name="Guide_vector_tool", fn=guide_retrieval_tool),
|
| 454 |
+
FunctionTool.from_defaults(name="Immunization_in_Practice_tool", fn=immunization_tool),
|
| 455 |
+
# Section-specific tools
|
| 456 |
+
FunctionTool.from_defaults(name="section_one_vector_query_tool", fn=section_one_tool),
|
| 457 |
+
FunctionTool.from_defaults(name="section_two_vector_query_tool", fn=section_two_tool),
|
| 458 |
+
FunctionTool.from_defaults(name="section_three_vector_query_tool", fn=section_three_tool),
|
| 459 |
+
FunctionTool.from_defaults(name="section_four_vector_query_tool", fn=section_four_tool),
|
| 460 |
+
FunctionTool.from_defaults(name="section_five_vector_query_tool", fn=section_five_tool),
|
| 461 |
+
FunctionTool.from_defaults(name="section_six_vector_query_tool", fn=section_six_tool),
|
| 462 |
+
FunctionTool.from_defaults(name="section_seven_vector_query_tool", fn=section_seven_tool),
|
| 463 |
+
FunctionTool.from_defaults(name="section_eight_vector_query_tool", fn=section_eight_tool),
|
| 464 |
+
FunctionTool.from_defaults(name="section_nine_vector_query_tool", fn=section_nine_tool),
|
| 465 |
+
FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
]
|
| 467 |
|
| 468 |
return tools
|
| 469 |
|
|
|
|
| 470 |
def prepare_environment():
|
| 471 |
"""Main function to prepare the environment and return tools"""
|
| 472 |
print("Setting up models...")
|
rag_pipeline.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
Enhanced RAG Pipeline for vaccine assistant
|
| 4 |
Handles agent creation and question answering with sequential citation numbering
|
| 5 |
"""
|
| 6 |
|
|
@@ -97,6 +97,89 @@ def convert_citations_to_sequential(response_text, source_id_to_number_map):
|
|
| 97 |
return sequential_response
|
| 98 |
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
def create_safe_custom_prompt(tools, llm):
|
| 101 |
"""Create a safe version that won't have formatting conflicts"""
|
| 102 |
|
|
@@ -106,21 +189,13 @@ You are a helpful and knowledgeable AI-powered vaccine assistant designed to sup
|
|
| 106 |
You provide evidence-based guidance using only information from official vaccine medical documents.
|
| 107 |
Answer the doctor's question accurately and concisely using only the provided information.
|
| 108 |
|
| 109 |
-
##
|
| 110 |
-
|
| 111 |
-
### Tool Usage Strategy
|
| 112 |
-
1. **MAXIMUM 3 TOOL CALLS**: You must provide a complete answer within 3 tool calls maximum.
|
| 113 |
-
2. **Smart Tool Selection**: Choose the most relevant tool first based on the question topic.
|
| 114 |
-
3. **Comparative Questions**: For questions comparing documents/protocols:
|
| 115 |
-
- First tool call: Get information from primary source (e.g., Algerian guide)
|
| 116 |
-
- Second tool call: Get information from secondary source (e.g., WHO document)
|
| 117 |
-
- Third tool call: Only if absolutely necessary for missing details
|
| 118 |
-
4. **Stop Early**: If you have sufficient information after 1-2 tool calls, provide your answer immediately.
|
| 119 |
|
| 120 |
### Citation and Sourcing
|
| 121 |
1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
|
| 122 |
2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
|
| 123 |
-
3. If a fact is supported by multiple sources, use
|
|
|
|
| 124 |
4. Use ONLY the provided information and never include facts from your general knowledge.
|
| 125 |
|
| 126 |
### Content Formatting
|
|
@@ -131,12 +206,6 @@ Answer the doctor's question accurately and concisely using only the provided in
|
|
| 131 |
2. For lists, maintain the original bullet points/numbering and include citations.
|
| 132 |
3. Present information concisely but ensure clinical accuracy is never compromised.
|
| 133 |
|
| 134 |
-
### Answer Completeness Guidelines
|
| 135 |
-
- If you find relevant information from 1-2 sources, synthesize and provide a complete answer
|
| 136 |
-
- Don't keep searching for more sources unless critical information is missing
|
| 137 |
-
- For comparative questions, clearly structure your answer with sections for each source
|
| 138 |
-
- If information is not available in the documents, clearly state this limitation
|
| 139 |
-
|
| 140 |
---
|
| 141 |
|
| 142 |
"""
|
|
@@ -163,38 +232,34 @@ Answer the doctor's question accurately and concisely using only the provided in
|
|
| 163 |
# Even safer fallback
|
| 164 |
return PromptTemplate(template=safe_template)
|
| 165 |
|
| 166 |
-
|
| 167 |
def create_agent(tools, llm):
|
| 168 |
-
"""Create the ReAct agent with custom prompt
|
| 169 |
|
| 170 |
-
# Create agent
|
| 171 |
agent = ReActAgent.from_tools(
|
| 172 |
tools,
|
| 173 |
llm=llm,
|
| 174 |
verbose=True,
|
| 175 |
-
max_iterations=5, # Reduced max iterations
|
| 176 |
)
|
| 177 |
|
| 178 |
# Create and apply safe custom prompt
|
| 179 |
try:
|
| 180 |
safe_custom_prompt = create_safe_custom_prompt(tools, llm)
|
| 181 |
agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
|
| 182 |
-
print("✅ Successfully updated with safe custom prompt
|
| 183 |
except Exception as e:
|
| 184 |
print(f"❌ Safe prompt update failed: {e}")
|
| 185 |
print("⚠️ Using original agent without modifications")
|
| 186 |
|
| 187 |
return agent
|
| 188 |
|
| 189 |
-
|
| 190 |
def initialize_rag_pipeline(tools):
|
| 191 |
"""Initialize the RAG pipeline with tools"""
|
| 192 |
|
| 193 |
-
# Initialize LlamaIndex LLM
|
| 194 |
llama_index_llm = GoogleGenAI(
|
| 195 |
model="models/gemini-2.0-flash",
|
| 196 |
api_key=os.getenv('GOOGLE_API_KEY'),
|
| 197 |
-
temperature=0.1, # Lower temperature for more focused responses
|
| 198 |
)
|
| 199 |
|
| 200 |
# Create agent
|
|
@@ -202,26 +267,14 @@ def initialize_rag_pipeline(tools):
|
|
| 202 |
|
| 203 |
return agent
|
| 204 |
|
| 205 |
-
|
| 206 |
def process_question(agent, question: str) -> str:
|
| 207 |
-
"""Process a question through the RAG pipeline
|
| 208 |
try:
|
| 209 |
-
# Add timeout/retry logic
|
| 210 |
response = agent.chat(question)
|
| 211 |
return response.response
|
| 212 |
except Exception as e:
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
# Handle specific "max iterations" error
|
| 217 |
-
if "max iterations" in error_msg.lower() or "reached max" in error_msg.lower():
|
| 218 |
-
return ("I apologize, but I was unable to find a complete answer within the allowed search attempts. "
|
| 219 |
-
"This might be because the specific comparison you're asking about requires information "
|
| 220 |
-
"that spans multiple sections of the documents. Could you please rephrase your question "
|
| 221 |
-
"to be more specific about which aspect of the difference you're most interested in?")
|
| 222 |
-
|
| 223 |
-
return f"Error processing your question: {error_msg}"
|
| 224 |
-
|
| 225 |
|
| 226 |
def aswer_language_detection(response_text: str) -> str:
|
| 227 |
"""
|
|
@@ -233,23 +286,24 @@ def aswer_language_detection(response_text: str) -> str:
|
|
| 233 |
Returns:
|
| 234 |
str: Detected language code (e.g., 'en', 'fr', etc.)
|
| 235 |
"""
|
|
|
|
| 236 |
try:
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
except:
|
| 244 |
-
|
| 245 |
|
| 246 |
-
|
|
|
|
| 247 |
|
| 248 |
|
| 249 |
def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 250 |
"""
|
| 251 |
Process a question through the RAG pipeline and return response with sequential citation numbers.
|
| 252 |
-
Enhanced with better error handling for max iterations.
|
| 253 |
|
| 254 |
Args:
|
| 255 |
agent: The initialized RAG agent
|
|
@@ -265,18 +319,10 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
|
|
| 265 |
}
|
| 266 |
"""
|
| 267 |
try:
|
| 268 |
-
# Get the response from the agent
|
| 269 |
response = agent.chat(question)
|
| 270 |
response_text = response.response
|
| 271 |
|
| 272 |
-
# Check if the response indicates max iterations was reached
|
| 273 |
-
if "max iterations" in response_text.lower() or len(response_text.strip()) == 0:
|
| 274 |
-
# Provide a more helpful fallback response
|
| 275 |
-
response_text = ("I apologize, but I encountered difficulties processing your comparative question "
|
| 276 |
-
"within the allowed search attempts. For questions comparing different protocols "
|
| 277 |
-
"or documents, please try asking about each aspect separately. For example, "
|
| 278 |
-
"first ask about the Algerian definition of Diphtheria, then ask about the WHO definition.")
|
| 279 |
-
|
| 280 |
# Extract source IDs from the response (preserving order)
|
| 281 |
unique_ids = extract_source_ids(response_text)
|
| 282 |
|
|
@@ -320,40 +366,25 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
|
|
| 320 |
|
| 321 |
# Convert to JSON
|
| 322 |
cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
|
| 323 |
-
|
| 324 |
-
|
| 325 |
return {
|
| 326 |
"response": sequential_response,
|
| 327 |
"cited_elements_json": cited_elements_json,
|
| 328 |
"unique_ids": unique_ids,
|
| 329 |
"citation_mapping": source_id_to_number,
|
| 330 |
-
"answer_language":
|
| 331 |
}
|
| 332 |
|
| 333 |
except Exception as e:
|
| 334 |
-
|
| 335 |
-
print(f"Error processing question: {error_msg}")
|
| 336 |
-
|
| 337 |
-
# Create appropriate fallback response based on error type
|
| 338 |
-
if "max iterations" in error_msg.lower() or "reached max" in error_msg.lower():
|
| 339 |
-
fallback_response = ("I apologize, but I was unable to complete the comparison within the allowed search attempts. "
|
| 340 |
-
"For complex comparative questions like yours about the differences between Algerian and WHO "
|
| 341 |
-
"definitions of Diphtheria, please try asking about each source separately: \n\n"
|
| 342 |
-
"1. First ask: 'What is the definition of Diphtheria in the Algerian vaccination guide?'\n"
|
| 343 |
-
"2. Then ask: 'What is the definition of Diphtheria in the WHO document?'\n\n"
|
| 344 |
-
"This will help me provide you with more focused and complete information.")
|
| 345 |
-
else:
|
| 346 |
-
fallback_response = f"I encountered an error while processing your question: {error_msg}"
|
| 347 |
-
|
| 348 |
return {
|
| 349 |
-
"response":
|
| 350 |
"cited_elements_json": "[]",
|
| 351 |
"unique_ids": [],
|
| 352 |
"citation_mapping": {},
|
| 353 |
-
"answer_language": "en"
|
| 354 |
}
|
| 355 |
|
| 356 |
-
|
| 357 |
def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 358 |
"""
|
| 359 |
Legacy function - maintained for backward compatibility.
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
Enhanced RAG Pipeline for vaccine assistant
|
| 4 |
Handles agent creation and question answering with sequential citation numbering
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 97 |
return sequential_response
|
| 98 |
|
| 99 |
|
| 100 |
+
def create_custom_prompt():
|
| 101 |
+
"""Create custom prompt with medical assistant instructions"""
|
| 102 |
+
|
| 103 |
+
custom_instructions = """
|
| 104 |
+
## MEDICAL ASSISTANT ROLE
|
| 105 |
+
You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
|
| 106 |
+
You provide evidence-based guidance using only information from official vaccine medical documents.
|
| 107 |
+
Answer the doctor's question accurately and concisely using only the provided information.
|
| 108 |
+
|
| 109 |
+
## IMPORTANT REQUIREMENTS
|
| 110 |
+
|
| 111 |
+
### Citation and Sourcing
|
| 112 |
+
1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
|
| 113 |
+
2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
|
| 114 |
+
3. If a fact is supported by multiple sources, use the following format:
|
| 115 |
+
- Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
|
| 116 |
+
4. Use ONLY the provided information and never include facts from your general knowledge.
|
| 117 |
+
|
| 118 |
+
### Content Formatting
|
| 119 |
+
1. When rendering tables:
|
| 120 |
+
- Convert HTML tables into clean Markdown format
|
| 121 |
+
- Preserve all original headers and data rows exactly
|
| 122 |
+
- Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
|
| 123 |
+
2. For lists, maintain the original bullet points/numbering and include citations.
|
| 124 |
+
3. Present information concisely but ensure clinical accuracy is never compromised.
|
| 125 |
+
|
| 126 |
+
## Tools
|
| 127 |
+
|
| 128 |
+
You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
|
| 129 |
+
This may require breaking the task into subtasks and using different tools to complete each subtask.
|
| 130 |
+
|
| 131 |
+
You have access to the following tools:
|
| 132 |
+
{tool_desc}
|
| 133 |
+
|
| 134 |
+
## Output Format
|
| 135 |
+
|
| 136 |
+
Please answer in the same language as the question and use the following format:
|
| 137 |
+
|
| 138 |
+
```
|
| 139 |
+
Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
|
| 140 |
+
Action: tool name (one of {tool_names}) if using a tool.
|
| 141 |
+
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
Please ALWAYS start with a Thought.
|
| 145 |
+
|
| 146 |
+
NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.
|
| 147 |
+
|
| 148 |
+
Please use a valid JSON format for the Action Input. Do NOT do this {{"input": "hello world", "num_beams": 5}}.
|
| 149 |
+
|
| 150 |
+
If this format is used, the tool will respond in the following format:
|
| 151 |
+
|
| 152 |
+
```
|
| 153 |
+
Observation: tool response
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:
|
| 157 |
+
|
| 158 |
+
```
|
| 159 |
+
Thought: I can answer without using any more tools. I'll use the user's language to answer. Remember to include proper citations
|
| 160 |
+
Answer: [your answer here with proper citations (In the same language as the user's question)]
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
Thought: I cannot answer the question with the provided tools.
|
| 165 |
+
Answer: [your answer here (In the same language as the user's question)]
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
## Current Conversation
|
| 169 |
+
|
| 170 |
+
Below is the current conversation consisting of interleaving human and assistant messages.
|
| 171 |
+
"""
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
custom_prompt = PromptTemplate(
|
| 175 |
+
template=custom_instructions,
|
| 176 |
+
template_vars=["tool_desc", "tool_names"]
|
| 177 |
+
)
|
| 178 |
+
return custom_prompt
|
| 179 |
+
except:
|
| 180 |
+
# Fallback to simple template
|
| 181 |
+
return PromptTemplate(template=custom_instructions)
|
| 182 |
+
|
| 183 |
def create_safe_custom_prompt(tools, llm):
|
| 184 |
"""Create a safe version that won't have formatting conflicts"""
|
| 185 |
|
|
|
|
| 189 |
You provide evidence-based guidance using only information from official vaccine medical documents.
|
| 190 |
Answer the doctor's question accurately and concisely using only the provided information.
|
| 191 |
|
| 192 |
+
## IMPORTANT REQUIREMENTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
### Citation and Sourcing
|
| 195 |
1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
|
| 196 |
2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
|
| 197 |
+
3. If a fact is supported by multiple sources, use the following format:
|
| 198 |
+
- Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
|
| 199 |
4. Use ONLY the provided information and never include facts from your general knowledge.
|
| 200 |
|
| 201 |
### Content Formatting
|
|
|
|
| 206 |
2. For lists, maintain the original bullet points/numbering and include citations.
|
| 207 |
3. Present information concisely but ensure clinical accuracy is never compromised.
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
---
|
| 210 |
|
| 211 |
"""
|
|
|
|
| 232 |
# Even safer fallback
|
| 233 |
return PromptTemplate(template=safe_template)
|
| 234 |
|
|
|
|
| 235 |
def create_agent(tools, llm):
|
| 236 |
+
"""Create the ReAct agent with custom prompt"""
|
| 237 |
|
| 238 |
+
# Create agent
|
| 239 |
agent = ReActAgent.from_tools(
|
| 240 |
tools,
|
| 241 |
llm=llm,
|
| 242 |
verbose=True,
|
|
|
|
| 243 |
)
|
| 244 |
|
| 245 |
# Create and apply safe custom prompt
|
| 246 |
try:
|
| 247 |
safe_custom_prompt = create_safe_custom_prompt(tools, llm)
|
| 248 |
agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
|
| 249 |
+
print("✅ Successfully updated with safe custom prompt")
|
| 250 |
except Exception as e:
|
| 251 |
print(f"❌ Safe prompt update failed: {e}")
|
| 252 |
print("⚠️ Using original agent without modifications")
|
| 253 |
|
| 254 |
return agent
|
| 255 |
|
|
|
|
| 256 |
def initialize_rag_pipeline(tools):
|
| 257 |
"""Initialize the RAG pipeline with tools"""
|
| 258 |
|
| 259 |
+
# Initialize LlamaIndex LLM
|
| 260 |
llama_index_llm = GoogleGenAI(
|
| 261 |
model="models/gemini-2.0-flash",
|
| 262 |
api_key=os.getenv('GOOGLE_API_KEY'),
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
# Create agent
|
|
|
|
| 267 |
|
| 268 |
return agent
|
| 269 |
|
|
|
|
| 270 |
def process_question(agent, question: str) -> str:
|
| 271 |
+
"""Process a question through the RAG pipeline"""
|
| 272 |
try:
|
|
|
|
| 273 |
response = agent.chat(question)
|
| 274 |
return response.response
|
| 275 |
except Exception as e:
|
| 276 |
+
print(f"Error processing question: {e}")
|
| 277 |
+
return f"Error processing your question: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
def aswer_language_detection(response_text: str) -> str:
|
| 280 |
"""
|
|
|
|
| 286 |
Returns:
|
| 287 |
str: Detected language code (e.g., 'en', 'fr', etc.)
|
| 288 |
"""
|
| 289 |
+
|
| 290 |
try:
|
| 291 |
+
# Detect the language of the first 5 words of the response
|
| 292 |
+
first_line = " ".join(response_text.split()[:5])
|
| 293 |
+
first_line = re.sub(r'\[.*?\]', '', first_line) # Remove citations
|
| 294 |
+
answer_language = detect(first_line)
|
| 295 |
+
if answer_language not in ['en', 'ar', 'fr']:
|
| 296 |
+
answer_language ='en'
|
| 297 |
except:
|
| 298 |
+
answer_language ='en'
|
| 299 |
|
| 300 |
+
finally:
|
| 301 |
+
return answer_language
|
| 302 |
|
| 303 |
|
| 304 |
def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 305 |
"""
|
| 306 |
Process a question through the RAG pipeline and return response with sequential citation numbers.
|
|
|
|
| 307 |
|
| 308 |
Args:
|
| 309 |
agent: The initialized RAG agent
|
|
|
|
| 319 |
}
|
| 320 |
"""
|
| 321 |
try:
|
| 322 |
+
# Get the response from the agent
|
| 323 |
response = agent.chat(question)
|
| 324 |
response_text = response.response
|
| 325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
# Extract source IDs from the response (preserving order)
|
| 327 |
unique_ids = extract_source_ids(response_text)
|
| 328 |
|
|
|
|
| 366 |
|
| 367 |
# Convert to JSON
|
| 368 |
cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
|
| 369 |
+
aswer_language= aswer_language_detection(response_text)
|
|
|
|
| 370 |
return {
|
| 371 |
"response": sequential_response,
|
| 372 |
"cited_elements_json": cited_elements_json,
|
| 373 |
"unique_ids": unique_ids,
|
| 374 |
"citation_mapping": source_id_to_number,
|
| 375 |
+
"answer_language":aswer_language
|
| 376 |
}
|
| 377 |
|
| 378 |
except Exception as e:
|
| 379 |
+
print(f"Error processing question: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
return {
|
| 381 |
+
"response": response_text if 'response_text' in locals() else "Error occurred",
|
| 382 |
"cited_elements_json": "[]",
|
| 383 |
"unique_ids": [],
|
| 384 |
"citation_mapping": {},
|
| 385 |
+
"answer_language": "en" # Default to English if not specified
|
| 386 |
}
|
| 387 |
|
|
|
|
| 388 |
def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
|
| 389 |
"""
|
| 390 |
Legacy function - maintained for backward compatibility.
|