AgenticRAG_test

Sleeping

App Files Files Community

Zeggai Abdellah commited on Jun 4, 2025

Commit

c0e5c04

1 Parent(s): 5a74e30

update the Immunization_in_Practice_tool tool

Browse files

Files changed (2) hide show

prepare_env.py +217 -121
rag_pipeline.py +110 -79

prepare_env.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 """
-Environment preparation script for vaccine assistant - Improved version
-Creates vector stores and retrieval tools with better descriptions for efficient agent routing
 """
 import os
@@ -56,6 +56,11 @@ def extract_source_ids(response_text):
     # Get unique source IDs
     source_ids = list(set(all_ids))
     if not source_ids:
         print("Warning: No valid source IDs found after filtering.")
         return []
@@ -70,17 +75,15 @@ def setup_models():
         model_name="intfloat/multilingual-e5-base"
     )
-    # Initialize LLM with better parameters for focused responses
     genai_api_key = os.getenv('GOOGLE_API_KEY')
     llm = ChatGoogleGenerativeAI(
         model="gemini-2.0-flash",
-        google_api_key=genai_api_key,
-        temperature=0.1  # Lower temperature for more focused responses
     )
     return embedding_function, llm
 def create_vectorstore_from_json(json_path: str, collection_name: str, embedding_function):
     """Create vector store from JSON chunks"""
     # Load the chunks.json
@@ -112,13 +115,12 @@ def create_vectorstore_from_json(json_path: str, collection_name: str, embedding
     )
     return vectorstore, documents
 def create_retriever(vectorstore, docs, llm):
     """Create ensemble retriever with vector and BM25 search"""
     # Vector retriever
     vector_retriever = vectorstore.as_retriever(
         search_type="similarity",
-        search_kwargs={"k": 4}  # Reduced from 6 to 4 for efficiency
     )
     # BM25 retriever
@@ -131,7 +133,7 @@ def create_retriever(vectorstore, docs, llm):
         weights=[0.5, 0.5]
     )
-    # Multi-query expanding retriever (with reduced complexity for efficiency)
     expanding_retriever = MultiQueryRetriever.from_llm(
         retriever=ensemble_retriever,
         llm=llm
@@ -139,7 +141,6 @@ def create_retriever(vectorstore, docs, llm):
     return expanding_retriever
 def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextNode]:
     """Convert ChromaDB Document objects to LlamaIndex TextNode objects"""
     nodes = []
@@ -161,9 +162,8 @@ def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextN
             continue
     return nodes
 def section_tool_wrapper(retriever, section_path_chunks, query):
-    """Generic section tool wrapper with improved efficiency"""
     try:
         retrieved_docs = retriever.get_relevant_documents(query)
         nodes_from_retrieved_docs = convert_chromadb_to_llamaindex_nodes(retrieved_docs)
@@ -178,15 +178,13 @@ def section_tool_wrapper(retriever, section_path_chunks, query):
         chunks_unique = [node for node in chunks_data if node.get('element_id', 'Unknown') in chunk_ids]
         combined_text = []
-        # Limit the number of chunks to avoid overwhelming the context
-        max_chunks = 8  # Reasonable limit
-        for chu in chunks_unique[:max_chunks]:
             if "TableElement" == chu["type"]:
-                text = f"[{chu['element_id']}]\n CONTENT: \n{chu['text']}\n HTML: \n {chu['table_text_as_html']}  \n\n"
                 combined_text.append(text)
             else:
                 for element in chu["elements"]:
-                    text = f"[{element['element_id']}]\n CONTENT: \n{element['text']} \n\n"
                     combined_text.append(text)
         result = "\n---\n".join(combined_text)
@@ -196,9 +194,8 @@ def section_tool_wrapper(retriever, section_path_chunks, query):
         print(f"Error in section tool: {e}")
         return f"Error retrieving documents: {str(e)}"
 def create_section_tools(embedding_function, llm):
-    """Create all section-specific retrieval tools with improved descriptions"""
     # Define section paths
     section_paths = {
@@ -217,7 +214,7 @@ def create_section_tools(embedding_function, llm):
     # Create retrievers for each section
     section_retrievers = {}
     for section, path in section_paths.items():
-        if os.path.exists(f'./data/{path}'):
             vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
             section_retrievers[section] = create_retriever(vstore, docs, llm)
@@ -228,9 +225,29 @@ def create_section_tools(embedding_function, llm):
         guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
     else:
         guide_retriever = None
     # Primary + Secondary Document Paths
-    immunization_path = './data/Immunization_in_Practice_WHO_eng_2015.json'
     # WHO Immunization in Practice Tool
     if os.path.exists(immunization_path):
@@ -243,134 +260,213 @@ def create_section_tools(embedding_function, llm):
     else:
         immunization_retriever = None
-    # Tool Functions with Improved Efficiency Focus
-    def guide_retrieval_tool(query: str) -> str:
         """
-        **PRIMARY TOOL - USE FIRST FOR MOST QUESTIONS**
-        Comprehensive search across the entire Algerian National Vaccination Guide (2023).
-        **When to use this tool:**
-        - General vaccination questions
-        - Disease definitions and descriptions
-        - Vaccine schedules and protocols
-        - Comparative questions needing Algerian perspective
-        - Any question about Algeria's vaccination program
-        **Keywords that indicate this tool:** Algeria, Algerian, national, calendrier, vaccination, PEV, diseases (diphteria, polio, measles, etc.)
         Args:
-            query (str): Any vaccination-related question about Algeria's national program
         Returns:
-            str: Comprehensive information from the Algerian guide with citations
         """
-        if not guide_retriever:
-            return "Guide retriever not available"
-        return section_tool_wrapper(guide_retriever, guide_path, query)
-    def immunization_tool(query: str) -> str:
         """
-        **SECONDARY TOOL - USE FOR WHO/INTERNATIONAL PERSPECTIVE**
-        WHO Immunization in Practice 2015 - Global best practices and international standards.
-        **When to use this tool:**
-        - Questions specifically asking about WHO recommendations
-        - International/global immunization practices
-        - Comparative questions needing WHO perspective
-        - Technical immunization procedures and best practices
-        **Keywords that indicate this tool:** WHO, international, global, best practices, standards
         Args:
-            query (str): Question about international immunization practices or WHO recommendations
         Returns:
-            str: WHO guidance and international best practices with citations
         """
-        if not immunization_retriever:
-            return "Immunization in Practice retriever not available"
-        return section_tool_wrapper(immunization_retriever, immunization_path, query)
-    # Section-Specific Tools (USE ONLY IF QUESTION IS VERY SPECIFIC TO THE SECTION)
     def section_two_tool(query: str) -> str:
         """
-        **DISEASE-SPECIFIC TOOL**
-        Section 2: Vaccine-preventable diseases - definitions, symptoms, transmission, complications.
-        **Use ONLY for specific disease definition questions like:**
-        - "What is diphtheria?"
-        - "Define measles according to Algerian protocol"
-        - "Symptoms of polio"
-        **Keywords:** definition, symptoms, transmission, complications, disease characteristics
         Args:
-            query (str): Specific question about disease definitions or characteristics
         Returns:
-            str: Disease-specific medical information with citations
         """
-        if 'two' not in section_retrievers:
-            return "Section 2 retriever not available"
-        return section_tool_wrapper(section_retrievers['two'], f'./data/{section_paths["two"]}', query)
     def section_three_tool(query: str) -> str:
         """
-        **VACCINE-SPECIFIC TOOL**
-        Section 3: Vaccine details - types, composition, administration methods.
-        **Use ONLY for specific vaccine technical questions like:**
-        - "What type of vaccine is used for diphtheria?"
-        - "How is the MMR vaccine administered?"
-        - "Vaccine composition and dosage"
-        **Keywords:** vaccine type, composition, administration, dosage, technical details
         Args:
-            query (str): Technical question about specific vaccines
         Returns:
-            str: Technical vaccine information with citations
         """
-        if 'three' not in section_retrievers:
-            return "Section 3 retriever not available"
-        return section_tool_wrapper(section_retrievers['three'], f'./data/{section_paths["three"]}', query)
-    # Create FunctionTool objects with focused selection
     tools = [
-        # Primary tools - most commonly used
-        FunctionTool.from_defaults(
-            name="algerian_guide_search",
-            fn=guide_retrieval_tool,
-            description="PRIMARY TOOL: Search the complete Algerian National Vaccination Guide for any vaccination-related question"
-        ),
-        FunctionTool.from_defaults(
-            name="who_immunization_search",
-            fn=immunization_tool,
-            description="SECONDARY TOOL: Search WHO Immunization in Practice for international standards and WHO recommendations"
-        ),
-        # Specialized tools - use only when very specific
-        FunctionTool.from_defaults(
-            name="disease_definitions_search",
-            fn=section_two_tool,
-            description="SPECIALIZED: Search for specific disease definitions, symptoms, and characteristics"
-        ),
-        FunctionTool.from_defaults(
-            name="vaccine_technical_search",
-            fn=section_three_tool,
-            description="SPECIALIZED: Search for technical vaccine details, composition, and administration methods"
-        ),
     ]
     return tools
 def prepare_environment():
     """Main function to prepare the environment and return tools"""
     print("Setting up models...")

 # -*- coding: utf-8 -*-
 """
+Environment preparation script for vaccine assistant
+Creates vector stores and retrieval tools
 """
 import os
     # Get unique source IDs
     source_ids = list(set(all_ids))
+    # Filter out any non-UUID-like IDs (if needed)
+    # This is now optional as we're handling various source ID formats
+    # uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$'
+    # source_ids = [source_id for source_id in source_ids if re.match(uuid_pattern, source_id, re.IGNORECASE)]
     if not source_ids:
         print("Warning: No valid source IDs found after filtering.")
         return []
         model_name="intfloat/multilingual-e5-base"
     )
+    # Initialize LLM
     genai_api_key = os.getenv('GOOGLE_API_KEY')
     llm = ChatGoogleGenerativeAI(
         model="gemini-2.0-flash",
+        google_api_key=genai_api_key
     )
     return embedding_function, llm
 def create_vectorstore_from_json(json_path: str, collection_name: str, embedding_function):
     """Create vector store from JSON chunks"""
     # Load the chunks.json
     )
     return vectorstore, documents
 def create_retriever(vectorstore, docs, llm):
     """Create ensemble retriever with vector and BM25 search"""
     # Vector retriever
     vector_retriever = vectorstore.as_retriever(
         search_type="similarity",
+        search_kwargs={"k": 6}
     )
     # BM25 retriever
         weights=[0.5, 0.5]
     )
+    # Multi-query expanding retriever
     expanding_retriever = MultiQueryRetriever.from_llm(
         retriever=ensemble_retriever,
         llm=llm
     return expanding_retriever
 def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextNode]:
     """Convert ChromaDB Document objects to LlamaIndex TextNode objects"""
     nodes = []
             continue
     return nodes
 def section_tool_wrapper(retriever, section_path_chunks, query):
+    """Generic section tool wrapper"""
     try:
         retrieved_docs = retriever.get_relevant_documents(query)
         nodes_from_retrieved_docs = convert_chromadb_to_llamaindex_nodes(retrieved_docs)
         chunks_unique = [node for node in chunks_data if node.get('element_id', 'Unknown') in chunk_ids]
         combined_text = []
+        for chu in chunks_unique:
             if "TableElement" == chu["type"]:
+                text = f"[Source: {chu['element_id']}]\n CONTENT: \n{chu['text']}\n HTML: \n {chu['table_text_as_html']}  \n\n"
                 combined_text.append(text)
             else:
                 for element in chu["elements"]:
+                    text = f"[Source: {element['element_id']}]\n CONTENT: \n{element['text']} \n\n"
                     combined_text.append(text)
         result = "\n---\n".join(combined_text)
         print(f"Error in section tool: {e}")
         return f"Error retrieving documents: {str(e)}"
 def create_section_tools(embedding_function, llm):
+    """Create all section-specific retrieval tools"""
     # Define section paths
     section_paths = {
     # Create retrievers for each section
     section_retrievers = {}
     for section, path in section_paths.items():
+        if os.path.exists(path):
             vstore, docs = create_vectorstore_from_json(f'./data/{path}', f"Guide_2023_{section}", embedding_function)
             section_retrievers[section] = create_retriever(vstore, docs, llm)
         guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
     else:
         guide_retriever = None
+    # General-purpose tool (entire Algerian guide)
+    def guide_retrieval_tool(query: str) -> str:
+        """
+        General-purpose retrieval tool for the entire Algerian National Vaccination Guide (2023).
+        Use this tool when a query spans multiple sections or cannot be routed confidently to a specific tool.
+        This is the fallback and all-encompassing tool to retrieve any vaccination-related information
+        from the national guide.
+        Secondary source: The WHO Immunization Guide can be queried separately via `Immunization_in_Practice_tool`.
+        Args:
+            query (str): A general or complex question related to vaccination policy, schedules, or practice.
+        Returns:
+            str: Synthesized response based on the full Algerian guide.
+        """
+        if not guide_retriever:
+            return "Guide retriever not available"
+        return section_tool_wrapper(guide_retriever, guide_path, query)
     # Primary + Secondary Document Paths
+    immunization_path = './data/Immunization in Practice_WHO_eng_2015.json'
     # WHO Immunization in Practice Tool
     if os.path.exists(immunization_path):
     else:
         immunization_retriever = None
+    def immunization_tool(query: str) -> str:
         """
+        WHO Immunization in Practice 2015 retrieval tool.
+        Use this tool to provide global best practices and operational guidance on immunization,
+        especially when context or clarification is needed beyond the Algerian national guide.
+        This can serve as a secondary source for training, logistics, and procedural reference.
         Args:
+            query (str): A question related to immunization practice in general.
         Returns:
+            str: Retrieved guidance from the WHO Immunization in Practice manual (2015).
         """
+        if not immunization_retriever:
+            return "Immunization in Practice retriever not available"
+        return section_tool_wrapper(immunization_retriever, immunization_path, query)
+    # Section-Specific Tools (Primary: Algerian National Vaccination Guide)
+    def section_one_tool(query: str) -> str:
         """
+        Section 1: Programme Élargi de Vaccination (PEV)
+        Use this tool to retrieve information about the Algerian immunization program:
+        its objectives, historical background, strengths and weaknesses, and justification
+        for calendar updates.
+        Primary source: Algerian National Vaccination Guide, Section 1.
+        Secondary source for operational benchmarks: WHO Immunization in Practice (optional).
         Args:
+            query (str): A question about Algeria’s national immunization strategy.
         Returns:
+            str: Relevant content from Section 1 of the guide.
         """
+        return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)
     def section_two_tool(query: str) -> str:
         """
+        Section 2: Maladies Ciblées par la Vaccination
+        Use this tool for questions about the diseases targeted by the national vaccination calendar:
+        symptoms, transmission, complications, and prevention strategies.
+        Primary source: Algerian National Guide, Section 2.
+        Secondary source: WHO guide may support contextual insights.
         Args:
+            query (str): A question about a vaccine-preventable disease (e.g. polio, rougeole).
         Returns:
+            str: Disease-specific guidance from Section 2.
         """
+        return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)
     def section_three_tool(query: str) -> str:
         """
+        Section 3: Vaccins du Calendrier
+        Use this tool to retrieve technical and procedural information about the vaccines used in the calendar:
+        names, contents, administration method, and dosing details.
         Args:
+            query (str): A question about a specific vaccine's type or method of use.
         Returns:
+            str: Vaccine information from Section 3.
         """
+        return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)
+    def section_four_tool(query: str) -> str:
+        """
+        Section 4: Rattrapage Vaccinal
+        Use this tool to determine catch-up strategies for children who missed or delayed one or more doses.
+        It provides age-adjusted rescheduling rules and justifications.
+        Args:
+            query (str): A question about how to manage missed vaccinations.
+        Returns:
+            str: Catch-up guidelines from Section 4.
+        """
+        return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)
+    def section_five_tool(query: str) -> str:
+        """
+        Section 5: Vaccination des Populations Particulières
+        Use this tool to retrieve recommendations for specific medical contexts:
+        preterm infants, immunocompromised children, allergies (e.g. eggs), and chronic diseases.
+        Args:
+            query (str): A question about vaccination adaptations for vulnerable groups.
+        Returns:
+            str: Guidelines from Section 5.
+        """
+        return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)
+    def section_six_tool(query: str) -> str:
+        """
+        Section 6: Chaîne du Froid
+        Use this tool for logistics, storage conditions, temperature monitoring,
+        and emergency procedures in case of cold chain failure.
+        Args:
+            query (str): A question about how vaccines should be stored and transported.
+        Returns:
+            str: Operational cold chain standards from Section 6.
+        """
+        return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)
+    def section_seven_tool(query: str) -> str:
+        """
+        Section 7: Sécurité des Injections
+        Use this tool to ensure injection safety: handling equipment, preventing needle-stick injuries,
+        and disposing of biomedical waste.
+        Args:
+            query (str): A question about safe injection practices.
+        Returns:
+            str: Procedures and guidelines from Section 7.
+        """
+        return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)
+    def section_eight_tool(query: str) -> str:
+        """
+        Section 8: Tenue d'une Séance de Vaccination & Vaccinovigilance
+        Use this tool to plan and monitor vaccination sessions, including material preparation,
+        injection recording, and handling of adverse events post-immunization (AEFI).
+        Args:
+            query (str): A question about session operations or vaccine side effect monitoring.
+        Returns:
+            str: Guidelines from Section 8.
+        """
+        return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)
+    def section_nine_tool(query: str) -> str:
+        """
+        Section 9: Planification des Séances de Vaccination
+        Use this tool to support logistical planning: mapping, resource estimation,
+        scheduling, and stock management.
+        Args:
+            query (str): A question about planning and organizing vaccination sessions.
+        Returns:
+            str: Recommendations from Section 9.
+        """
+        return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)
+    def section_ten_tool(query: str) -> str:
+        """
+        Section 10: Mobilisation Sociale
+        Use this tool for strategies to increase public engagement, combat vaccine hesitancy,
+        and manage misinformation.
+        Args:
+            query (str): A question about public communication and trust-building around vaccines.
+        Returns:
+            str: Social mobilization approaches from Section 10.
+        """
+        return section_tool_wrapper(section_retrievers['ten'], section_paths['ten'], query)
+    # Create FunctionTool objects
     tools = [
+        FunctionTool.from_defaults(name="Guide_vector_tool", fn=guide_retrieval_tool),
+        FunctionTool.from_defaults(name="Immunization_in_Practice_tool", fn=immunization_tool),
+        # Section-specific tools
+        FunctionTool.from_defaults(name="section_one_vector_query_tool", fn=section_one_tool),
+        FunctionTool.from_defaults(name="section_two_vector_query_tool", fn=section_two_tool),
+        FunctionTool.from_defaults(name="section_three_vector_query_tool", fn=section_three_tool),
+        FunctionTool.from_defaults(name="section_four_vector_query_tool", fn=section_four_tool),
+        FunctionTool.from_defaults(name="section_five_vector_query_tool", fn=section_five_tool),
+        FunctionTool.from_defaults(name="section_six_vector_query_tool", fn=section_six_tool),
+        FunctionTool.from_defaults(name="section_seven_vector_query_tool", fn=section_seven_tool),
+        FunctionTool.from_defaults(name="section_eight_vector_query_tool", fn=section_eight_tool),
+        FunctionTool.from_defaults(name="section_nine_vector_query_tool", fn=section_nine_tool),
+        FunctionTool.from_defaults(name="section_ten_vector_query_tool", fn=section_ten_tool),
     ]
     return tools
 def prepare_environment():
     """Main function to prepare the environment and return tools"""
     print("Setting up models...")

rag_pipeline.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Enhanced RAG Pipeline for vaccine assistant - Fixed version with max iterations control
 Handles agent creation and question answering with sequential citation numbering
 """
@@ -97,6 +97,89 @@ def convert_citations_to_sequential(response_text, source_id_to_number_map):
     return sequential_response
 def create_safe_custom_prompt(tools, llm):
     """Create a safe version that won't have formatting conflicts"""
@@ -106,21 +189,13 @@ You are a helpful and knowledgeable AI-powered vaccine assistant designed to sup
 You provide evidence-based guidance using only information from official vaccine medical documents.
 Answer the doctor's question accurately and concisely using only the provided information.
-## CRITICAL RULES FOR EFFICIENCY
-### Tool Usage Strategy
-1. **MAXIMUM 3 TOOL CALLS**: You must provide a complete answer within 3 tool calls maximum.
-2. **Smart Tool Selection**: Choose the most relevant tool first based on the question topic.
-3. **Comparative Questions**: For questions comparing documents/protocols:
-   - First tool call: Get information from primary source (e.g., Algerian guide)
-   - Second tool call: Get information from secondary source (e.g., WHO document)
-   - Third tool call: Only if absolutely necessary for missing details
-4. **Stop Early**: If you have sufficient information after 1-2 tool calls, provide your answer immediately.
 ### Citation and Sourcing
 1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
 2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
-3. If a fact is supported by multiple sources, use adjacent citations: [source1][source2]
 4. Use ONLY the provided information and never include facts from your general knowledge.
 ### Content Formatting
@@ -131,12 +206,6 @@ Answer the doctor's question accurately and concisely using only the provided in
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
-### Answer Completeness Guidelines
-- If you find relevant information from 1-2 sources, synthesize and provide a complete answer
-- Don't keep searching for more sources unless critical information is missing
-- For comparative questions, clearly structure your answer with sections for each source
-- If information is not available in the documents, clearly state this limitation
 ---
 """
@@ -163,38 +232,34 @@ Answer the doctor's question accurately and concisely using only the provided in
         # Even safer fallback
         return PromptTemplate(template=safe_template)
 def create_agent(tools, llm):
-    """Create the ReAct agent with custom prompt and controlled max iterations"""
-    # Create agent with controlled max iterations (reduced from default 10 to 5)
     agent = ReActAgent.from_tools(
         tools,
         llm=llm,
         verbose=True,
-        max_iterations=5,  # Reduced max iterations
     )
     # Create and apply safe custom prompt
     try:
         safe_custom_prompt = create_safe_custom_prompt(tools, llm)
         agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
-        print("✅ Successfully updated with safe custom prompt and max_iterations=5")
     except Exception as e:
         print(f"❌ Safe prompt update failed: {e}")
         print("⚠️  Using original agent without modifications")
     return agent
 def initialize_rag_pipeline(tools):
     """Initialize the RAG pipeline with tools"""
-    # Initialize LlamaIndex LLM with specific parameters to improve efficiency
     llama_index_llm = GoogleGenAI(
         model="models/gemini-2.0-flash",
         api_key=os.getenv('GOOGLE_API_KEY'),
-        temperature=0.1,  # Lower temperature for more focused responses
     )
     # Create agent
@@ -202,26 +267,14 @@ def initialize_rag_pipeline(tools):
     return agent
 def process_question(agent, question: str) -> str:
-    """Process a question through the RAG pipeline with timeout handling"""
     try:
-        # Add timeout/retry logic
         response = agent.chat(question)
         return response.response
     except Exception as e:
-        error_msg = str(e)
-        print(f"Error processing question: {error_msg}")
-        # Handle specific "max iterations" error
-        if "max iterations" in error_msg.lower() or "reached max" in error_msg.lower():
-            return ("I apologize, but I was unable to find a complete answer within the allowed search attempts. "
-                   "This might be because the specific comparison you're asking about requires information "
-                   "that spans multiple sections of the documents. Could you please rephrase your question "
-                   "to be more specific about which aspect of the difference you're most interested in?")
-        return f"Error processing your question: {error_msg}"
 def aswer_language_detection(response_text: str) -> str:
     """
@@ -233,23 +286,24 @@ def aswer_language_detection(response_text: str) -> str:
     Returns:
         str: Detected language code (e.g., 'en', 'fr', etc.)
     """
     try:
-        # Detect the language of the first 5 words of the response
-        first_line = " ".join(response_text.split()[:5])
-        first_line = re.sub(r'\[.*?\]', '', first_line)  # Remove citations
-        answer_language = detect(first_line)
-        if answer_language not in ['en', 'ar', 'fr']:
-            answer_language = 'en'
     except:
-        answer_language = 'en'
-    return answer_language
 def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Process a question through the RAG pipeline and return response with sequential citation numbers.
-    Enhanced with better error handling for max iterations.
     Args:
         agent: The initialized RAG agent
@@ -265,18 +319,10 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
         }
     """
     try:
-        # Get the response from the agent with improved error handling
         response = agent.chat(question)
         response_text = response.response
-        # Check if the response indicates max iterations was reached
-        if "max iterations" in response_text.lower() or len(response_text.strip()) == 0:
-            # Provide a more helpful fallback response
-            response_text = ("I apologize, but I encountered difficulties processing your comparative question "
-                           "within the allowed search attempts. For questions comparing different protocols "
-                           "or documents, please try asking about each aspect separately. For example, "
-                           "first ask about the Algerian definition of Diphtheria, then ask about the WHO definition.")
         # Extract source IDs from the response (preserving order)
         unique_ids = extract_source_ids(response_text)
@@ -320,40 +366,25 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
-        answer_language = aswer_language_detection(response_text)
         return {
             "response": sequential_response,
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
             "citation_mapping": source_id_to_number,
-            "answer_language": answer_language
         }
     except Exception as e:
-        error_msg = str(e)
-        print(f"Error processing question: {error_msg}")
-        # Create appropriate fallback response based on error type
-        if "max iterations" in error_msg.lower() or "reached max" in error_msg.lower():
-            fallback_response = ("I apologize, but I was unable to complete the comparison within the allowed search attempts. "
-                               "For complex comparative questions like yours about the differences between Algerian and WHO "
-                               "definitions of Diphtheria, please try asking about each source separately: \n\n"
-                               "1. First ask: 'What is the definition of Diphtheria in the Algerian vaccination guide?'\n"
-                               "2. Then ask: 'What is the definition of Diphtheria in the WHO document?'\n\n"
-                               "This will help me provide you with more focused and complete information.")
-        else:
-            fallback_response = f"I encountered an error while processing your question: {error_msg}"
         return {
-            "response": fallback_response,
             "cited_elements_json": "[]",
             "unique_ids": [],
             "citation_mapping": {},
-            "answer_language": "en"
         }
 def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Legacy function - maintained for backward compatibility.

 # -*- coding: utf-8 -*-
 """
+Enhanced RAG Pipeline for vaccine assistant
 Handles agent creation and question answering with sequential citation numbering
 """
     return sequential_response
+def create_custom_prompt():
+    """Create custom prompt with medical assistant instructions"""
+    custom_instructions = """
+## MEDICAL ASSISTANT ROLE
+You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
+You provide evidence-based guidance using only information from official vaccine medical documents.
+Answer the doctor's question accurately and concisely using only the provided information.
+## IMPORTANT REQUIREMENTS
+### Citation and Sourcing
+1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
+2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
+3. If a fact is supported by multiple sources, use the following format:
+   - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
+4. Use ONLY the provided information and never include facts from your general knowledge.
+### Content Formatting
+1. When rendering tables:
+   - Convert HTML tables into clean Markdown format
+   - Preserve all original headers and data rows exactly
+   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
+2. For lists, maintain the original bullet points/numbering and include citations.
+3. Present information concisely but ensure clinical accuracy is never compromised.
+## Tools
+You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
+This may require breaking the task into subtasks and using different tools to complete each subtask.
+You have access to the following tools:
+{tool_desc}
+## Output Format
+Please answer in the same language as the question and use the following format:
+```
+Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
+Action: tool name (one of {tool_names}) if using a tool.
+Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
+```
+Please ALWAYS start with a Thought.
+NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.
+Please use a valid JSON format for the Action Input. Do NOT do this {{"input": "hello world", "num_beams": 5}}.
+If this format is used, the tool will respond in the following format:
+```
+Observation: tool response
+```
+You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:
+```
+Thought: I can answer without using any more tools. I'll use the user's language to answer. Remember to include proper citations
+Answer: [your answer here with proper citations (In the same language as the user's question)]
+```
+```
+Thought: I cannot answer the question with the provided tools.
+Answer: [your answer here (In the same language as the user's question)]
+```
+## Current Conversation
+Below is the current conversation consisting of interleaving human and assistant messages.
+"""
+    try:
+        custom_prompt = PromptTemplate(
+            template=custom_instructions,
+            template_vars=["tool_desc", "tool_names"]
+        )
+        return custom_prompt
+    except:
+        # Fallback to simple template
+        return PromptTemplate(template=custom_instructions)
 def create_safe_custom_prompt(tools, llm):
     """Create a safe version that won't have formatting conflicts"""
 You provide evidence-based guidance using only information from official vaccine medical documents.
 Answer the doctor's question accurately and concisely using only the provided information.
+## IMPORTANT REQUIREMENTS
 ### Citation and Sourcing
 1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
 2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
+3. If a fact is supported by multiple sources, use the following format:
+   - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
 4. Use ONLY the provided information and never include facts from your general knowledge.
 ### Content Formatting
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
 ---
 """
         # Even safer fallback
         return PromptTemplate(template=safe_template)
 def create_agent(tools, llm):
+    """Create the ReAct agent with custom prompt"""
+    # Create agent
     agent = ReActAgent.from_tools(
         tools,
         llm=llm,
         verbose=True,
     )
     # Create and apply safe custom prompt
     try:
         safe_custom_prompt = create_safe_custom_prompt(tools, llm)
         agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
+        print("✅ Successfully updated with safe custom prompt")
     except Exception as e:
         print(f"❌ Safe prompt update failed: {e}")
         print("⚠️  Using original agent without modifications")
     return agent
 def initialize_rag_pipeline(tools):
     """Initialize the RAG pipeline with tools"""
+    # Initialize LlamaIndex LLM
     llama_index_llm = GoogleGenAI(
         model="models/gemini-2.0-flash",
         api_key=os.getenv('GOOGLE_API_KEY'),
     )
     # Create agent
     return agent
 def process_question(agent, question: str) -> str:
+    """Process a question through the RAG pipeline"""
     try:
         response = agent.chat(question)
         return response.response
     except Exception as e:
+        print(f"Error processing question: {e}")
+        return f"Error processing your question: {str(e)}"
 def aswer_language_detection(response_text: str) -> str:
     """
     Returns:
         str: Detected language code (e.g., 'en', 'fr', etc.)
     """
     try:
+            # Detect the language of the first 5 words of the response
+            first_line = " ".join(response_text.split()[:5])
+            first_line = re.sub(r'\[.*?\]', '', first_line)  # Remove citations
+            answer_language = detect(first_line)
+            if answer_language not in ['en', 'ar', 'fr']:
+                answer_language ='en'
     except:
+            answer_language ='en'
+    finally:
+        return answer_language
 def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Process a question through the RAG pipeline and return response with sequential citation numbers.
     Args:
         agent: The initialized RAG agent
         }
     """
     try:
+        # Get the response from the agent
         response = agent.chat(question)
         response_text = response.response
         # Extract source IDs from the response (preserving order)
         unique_ids = extract_source_ids(response_text)
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
+        aswer_language= aswer_language_detection(response_text)
         return {
             "response": sequential_response,
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
             "citation_mapping": source_id_to_number,
+            "answer_language":aswer_language
         }
     except Exception as e:
+        print(f"Error processing question: {e}")
         return {
+            "response": response_text if 'response_text' in locals() else "Error occurred",
             "cited_elements_json": "[]",
             "unique_ids": [],
             "citation_mapping": {},
+            "answer_language": "en"  # Default to English if not specified
         }
 def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Legacy function - maintained for backward compatibility.