Spaces:

Zeggai
/

AgenticRAG

Sleeping

App Files Files Community

Zeggai Abdellah commited on Jun 6, 2025

Commit

e40cfb6

1 Parent(s): b12f17b

add fall back system

Browse files

Files changed (1) hide show

rag_pipeline.py +243 -69

rag_pipeline.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-Enhanced RAG Pipeline for vaccine assistant
 Handles agent creation and question answering with sequential citation numbering
 """
 import json
@@ -106,13 +107,60 @@ def convert_citations_to_sequential(response_text, source_id_to_number_map):
     return sequential_response
-def create_safe_custom_prompt(tools, llm):
     """Create a safe version that won't have formatting conflicts"""
-    print(f"[LOG] Creating custom prompt with {len(tools)} tools")
-    custom_instructions = """
 ## MEDICAL ASSISTANT ROLE
 You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
 You provide evidence-based guidance using only information from official vaccine medical documents.
@@ -176,43 +224,70 @@ If you cannot find complete information to fully answer a question:
             template_vars=original_prompt.template_vars,
             metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else None
         )
-        print("[LOG] ✅ Successfully created safe custom prompt")
         return new_prompt
     except:
         # Even safer fallback
-        print("[LOG] ⚠️ Using fallback prompt template")
         return PromptTemplate(template=safe_template)
-def create_agent(tools, llm):
     """Create the ReAct agent with custom prompt"""
-    print(f"[LOG] Creating ReAct agent with {len(tools)} tools and max_iterations=8")
-    # Create agent with increased max iterations and better handling
-    # Force verbose=True to see the Thought/Action/Observation cycle
     agent = ReActAgent.from_tools(
         tools,
         llm=llm,
-        verbose=True,  # This should show the ReAct reasoning steps
-        max_iterations=8,  # Reduced from default to prevent excessive looping
     )
-    # Create and apply safe custom prompt
     try:
-        safe_custom_prompt = create_safe_custom_prompt(tools, llm)
         agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
-        print("✅ Successfully updated with safe custom prompt")
     except Exception as e:
-        print(f"❌ Safe prompt update failed: {e}")
-        print("⚠️  Using original agent without modifications")
-    print("[LOG] Agent creation completed")
     return agent
 def initialize_rag_pipeline(tools):
-    """Initialize the RAG pipeline with tools"""
-    print("[LOG] Initializing RAG pipeline...")
     print(f"[LOG] Available tools: {[tool.metadata.name if hasattr(tool, 'metadata') else str(tool) for tool in tools]}")
     # Initialize LlamaIndex LLM
@@ -222,38 +297,146 @@ def initialize_rag_pipeline(tools):
         api_key=os.getenv('GOOGLE_API_KEY'),
     )
-    # Create agent
-    agent = create_agent(tools, llama_index_llm)
-    print("[LOG] ✅ RAG pipeline initialization completed")
-    return agent
-def process_question(agent, question: str) -> str:
-    """Process a question through the RAG pipeline"""
     print(f"[LOG] Processing question: '{question[:100]}{'...' if len(question) > 100 else ''}'")
     print("="*50)
-    print("AGENT REASONING PROCESS:")
     print("="*50)
     start_time = time.time()
     try:
-        # The agent.chat() call should now show the full ReAct process
-        response = agent.chat(question)
         print("="*50)
-        print("END OF AGENT REASONING")
         print("="*50)
         elapsed_time = time.time() - start_time
-        print(f"[LOG] ✅ Agent response received in {elapsed_time:.2f} seconds")
-        print(f"[LOG] Response length: {len(response.response)} characters")
-        return response.response
     except Exception as e:
         elapsed_time = time.time() - start_time
-        print(f"[LOG] ❌ Error processing question after {elapsed_time:.2f} seconds: {e}")
         return f"Error processing your question: {str(e)}"
 def aswer_language_detection(response_text: str) -> str:
     """
     Detect the language of the response text.
@@ -283,12 +466,12 @@ def aswer_language_detection(response_text: str) -> str:
         return answer_language
-def process_question_with_sequential_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
-    Process a question through the RAG pipeline and return response with sequential citation numbers.
     Args:
-        agent: The initialized RAG agent
         question (str): The user's question
         chunks_directory (str): Path to the directory containing JSON files
@@ -297,43 +480,30 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
             "response": str,  # Response with sequential citation numbers [1], [2], etc.
             "cited_elements_json": str,  # JSON array of cited elements in order
             "unique_ids": list,  # Original source IDs in order
-            "citation_mapping": dict  # Mapping from source ID to citation number
         }
     """
-    print(f"\n[LOG] === STARTING QUESTION PROCESSING ===")
     print(f"[LOG] Question: '{question[:150]}{'...' if len(question) > 150 else ''}'")
     print(f"[LOG] Chunks directory: {chunks_directory}")
     start_time = time.time()
     try:
-        # Get the response from the agent
-        print("\n" + "="*60)
-        print("🤖 AGENT REASONING PROCESS STARTING...")
-        print("="*60)
-        response = agent.chat(question)
-        print("="*60)
-        print("🤖 AGENT REASONING PROCESS COMPLETED")
-        print("="*60)
-        response_text = response.response
         agent_time = time.time() - start_time
         print(f"[LOG] Agent processing completed in {agent_time:.2f} seconds")
         print(f"[LOG] Raw response length: {len(response_text)} characters")
-        # Enhanced handling for max iterations error
-        if ("max iterations" in response_text.lower() or
-            "reached max iterations" in response_text.lower() or
-            len(response_text.strip()) == 0 or
-            "agent stopped due to max iterations" in response_text.lower()):
-            print("[LOG] ⚠️ Detected max iterations error, providing fallback response")
-            # Provide a more helpful fallback response
-            response_text = ("I apologize, but I encountered difficulties processing your question within the available search iterations. "
-                           "This may be due to the complexity of your query or limitations in finding specific information in the available documents. "
-                           "Please try rephrasing your question more specifically, or break it down into smaller, more focused questions for better results.")
         # Extract source IDs from the response (preserving order)
         unique_ids = extract_source_ids(response_text)
@@ -392,11 +562,12 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
-        aswer_language = aswer_language_detection(response_text)
         total_time = time.time() - start_time
         print(f"[LOG] ✅ Processing completed in {total_time:.2f} seconds total")
         print(f"[LOG] Final response length: {len(sequential_response)} characters")
         print(f"[LOG] === QUESTION PROCESSING COMPLETED ===\n")
         return {
@@ -404,7 +575,8 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
             "citation_mapping": source_id_to_number,
-            "answer_language": aswer_language
         }
     except Exception as e:
@@ -417,13 +589,15 @@ def process_question_with_sequential_citations(agent, question: str, chunks_dire
             "cited_elements_json": "[]",
             "unique_ids": [],
             "citation_mapping": {},
-            "answer_language": "en"  # Default to English if not specified
         }
-def process_question_with_citations(agent, question: str, chunks_directory="./data/") -> dict:
     """
     Legacy function - maintained for backward compatibility.
-    Now calls the new sequential citation function.
     """
-    print("[LOG] Using legacy function wrapper - redirecting to sequential citations")
-    return process_question_with_sequential_citations(agent, question, chunks_directory)

 # -*- coding: utf-8 -*-
 """
+Enhanced RAG Pipeline for vaccine assistant with fallback system
 Handles agent creation and question answering with sequential citation numbering
+Includes fallback agent for max iterations handling
 """
 import json
     return sequential_response
+def create_safe_custom_prompt(tools, llm, is_fallback=False):
     """Create a safe version that won't have formatting conflicts"""
+    print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
+    if is_fallback:
+        custom_instructions = """
+## MEDICAL ASSISTANT ROLE - FALLBACK MODE
+You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
+You are operating in FALLBACK MODE with access to only the most essential and comprehensive tools.
+You provide evidence-based guidance using only information from official vaccine medical documents.
+Answer the doctor's question accurately and concisely using only the provided information.
+## FALLBACK MODE INSTRUCTIONS
+- You have access to only 2 powerful tools that search the entire main documents
+- Use Guide_vector_tool for questions about the Algerian National Vaccination Guide
+- Use Immunization_in_Practice_tool for questions requiring WHO global guidance
+- Be direct and efficient - search once with each tool if needed, then provide your answer
+- Do not overthink or search repeatedly - these tools are comprehensive
+## IMPORTANT REQUIREMENTS
+### Citation and Sourcing
+1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
+2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
+3. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
+4. Use ONLY the provided information and never include facts from your general knowledge.
+### Content Formatting
+1. When rendering tables:
+   - Convert HTML tables into clean Markdown format
+   - Preserve all original headers and data rows exactly
+   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
+2. For lists, maintain the original bullet points/numbering and include citations.
+3. Present information concisely but ensure clinical accuracy is never compromised.
+### CRITICAL: Efficient Fallback Strategy
+1. **SEARCH ONCE**: Use each tool at most once - they are comprehensive and powerful
+2. **BE DECISIVE**: Once you find relevant information, formulate your response immediately
+3. **ANSWER DIRECTLY**: Provide a clear, direct answer based on the information found
+4. **STOP WHEN SUFFICIENT**: If you have found adequate information, provide the response and stop
+5. **COMPREHENSIVE COVERAGE**: These tools search entire documents, so one search should be sufficient
+### Response Guidelines
+- Start with the most relevant tool for the question
+- If the question requires both Algerian-specific and global context, use both tools once each
+- Provide whatever information you find with proper citations
+- If information is limited, clearly state what is and isn't available in the documents
+---
+"""
+    else:
+        custom_instructions = """
 ## MEDICAL ASSISTANT ROLE
 You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
 You provide evidence-based guidance using only information from official vaccine medical documents.
             template_vars=original_prompt.template_vars,
             metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else None
         )
+        print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
         return new_prompt
     except:
         # Even safer fallback
+        print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
         return PromptTemplate(template=safe_template)
+def create_agent(tools, llm, is_fallback=False):
     """Create the ReAct agent with custom prompt"""
+    agent_type = "FALLBACK" if is_fallback else "STANDARD"
+    max_iter = 3 if is_fallback else 8
+    print(f"[LOG] Creating {agent_type} ReAct agent with {len(tools)} tools and max_iterations={max_iter}")
+    # Create agent with appropriate settings
     agent = ReActAgent.from_tools(
         tools,
         llm=llm,
+        verbose=True,
+        max_iterations=max_iter,  # Reduced iterations for fallback agent
     )
+    # Create and apply appropriate custom prompt
     try:
+        safe_custom_prompt = create_safe_custom_prompt(tools, llm, is_fallback=is_fallback)
         agent.update_prompts({"agent_worker:system_prompt": safe_custom_prompt})
+        print(f"✅ Successfully updated {agent_type} agent with custom prompt")
     except Exception as e:
+        print(f"❌ {agent_type} agent prompt update failed: {e}")
+        print(f"⚠️  Using original {agent_type} agent without modifications")
+    print(f"[LOG] {agent_type} agent creation completed")
     return agent
+def create_fallback_tools(all_tools):
+    """Extract only the guide_retrieval_tool and immunization_tool for fallback agent"""
+    print("[LOG] Creating fallback tools (guide + immunization only)")
+    fallback_tools = []
+    tool_names_found = []
+    for tool in all_tools:
+        tool_name = tool.metadata.name if hasattr(tool, 'metadata') else str(tool)
+        if tool_name in ["Guide_vector_tool", "Immunization_in_Practice_tool"]:
+            fallback_tools.append(tool)
+            tool_names_found.append(tool_name)
+    print(f"[LOG] Found {len(fallback_tools)} fallback tools: {tool_names_found}")
+    if len(fallback_tools) == 0:
+        print("[LOG] ❌ ERROR: No fallback tools found! Check tool names.")
+        return None
+    return fallback_tools
 def initialize_rag_pipeline(tools):
+    """Initialize the RAG pipeline with both standard and fallback agents"""
+    print("[LOG] Initializing RAG pipeline with fallback system...")
     print(f"[LOG] Available tools: {[tool.metadata.name if hasattr(tool, 'metadata') else str(tool) for tool in tools]}")
     # Initialize LlamaIndex LLM
         api_key=os.getenv('GOOGLE_API_KEY'),
     )
+    # Create standard agent
+    print("[LOG] Creating standard agent...")
+    standard_agent = create_agent(tools, llama_index_llm, is_fallback=False)
+    # Create fallback tools and agent
+    print("[LOG] Creating fallback agent...")
+    fallback_tools = create_fallback_tools(tools)
+    if fallback_tools is None:
+        print("[LOG] ❌ WARNING: Fallback agent creation failed - no fallback tools available")
+        fallback_agent = None
+    else:
+        fallback_agent = create_agent(fallback_tools, llama_index_llm, is_fallback=True)
+        print("[LOG] ✅ Fallback agent created successfully")
+    print("[LOG] ✅ RAG pipeline initialization completed with fallback system")
+    return {
+        "standard_agent": standard_agent,
+        "fallback_agent": fallback_agent,
+        "llm": llama_index_llm
+    }
+def detect_max_iterations_error(response_text):
+    """Detect if the response indicates a max iterations error"""
+    max_iteration_indicators = [
+        "max iterations",
+        "reached max iterations",
+        "agent stopped due to max iterations",
+        "maximum number of iterations",
+        "iteration limit"
+    ]
+    response_lower = response_text.lower()
+    # Check for max iterations indicators
+    for indicator in max_iteration_indicators:
+        if indicator in response_lower:
+            return True
+    # Check for very short or empty responses (often indicates failure)
+    if len(response_text.strip()) < 10:
+        return True
+    # Check for generic error patterns
+    if ("error" in response_lower and "processing" in response_lower):
+        return True
+    return False
+def process_question(agents_dict, question: str) -> str:
+    """Process a question through the RAG pipeline with fallback support"""
     print(f"[LOG] Processing question: '{question[:100]}{'...' if len(question) > 100 else ''}'")
+    standard_agent = agents_dict["standard_agent"]
+    fallback_agent = agents_dict["fallback_agent"]
     print("="*50)
+    print("🤖 STANDARD AGENT REASONING PROCESS:")
     print("="*50)
     start_time = time.time()
     try:
+        # Try standard agent first
+        response = standard_agent.chat(question)
+        response_text = response.response
         print("="*50)
+        print("🤖 STANDARD AGENT REASONING COMPLETED")
         print("="*50)
         elapsed_time = time.time() - start_time
+        print(f"[LOG] ✅ Standard agent response received in {elapsed_time:.2f} seconds")
+        print(f"[LOG] Response length: {len(response_text)} characters")
+        # Check if we need to use fallback
+        if detect_max_iterations_error(response_text):
+            print("[LOG] 🔄 Max iterations detected, switching to FALLBACK AGENT...")
+            if fallback_agent is None:
+                print("[LOG] ❌ Fallback agent not available, returning error message")
+                return ("I apologize, but I encountered difficulties processing your question. "
+                       "Please try rephrasing your question more specifically or breaking it down into smaller parts.")
+            print("="*50)
+            print("🛡️ FALLBACK AGENT REASONING PROCESS:")
+            print("="*50)
+            fallback_start_time = time.time()
+            try:
+                fallback_response = fallback_agent.chat(question)
+                fallback_text = fallback_response.response
+                print("="*50)
+                print("🛡️ FALLBACK AGENT REASONING COMPLETED")
+                print("="*50)
+                fallback_elapsed = time.time() - fallback_start_time
+                total_elapsed = time.time() - start_time
+                print(f"[LOG] ✅ Fallback agent response received in {fallback_elapsed:.2f} seconds")
+                print(f"[LOG] Total processing time: {total_elapsed:.2f} seconds")
+                print(f"[LOG] Fallback response length: {len(fallback_text)} characters")
+                # Check if fallback also failed
+                if detect_max_iterations_error(fallback_text):
+                    print("[LOG] ❌ Fallback agent also hit max iterations")
+                    return ("I apologize, but I'm having difficulty finding specific information about your question in the available documents. "
+                           "Please try asking a more specific question or rephrasing your query.")
+                return fallback_text
+            except Exception as e:
+                fallback_elapsed = time.time() - fallback_start_time
+                print(f"[LOG] ❌ Fallback agent error after {fallback_elapsed:.2f} seconds: {e}")
+                return ("I apologize, but I encountered an error while processing your question. "
+                       "Please try rephrasing your question or asking about a more specific topic.")
+        return response_text
     except Exception as e:
         elapsed_time = time.time() - start_time
+        print(f"[LOG] ❌ Standard agent error after {elapsed_time:.2f} seconds: {e}")
+        # Try fallback even on standard agent exception
+        if fallback_agent is not None:
+            print("[LOG] 🔄 Standard agent failed, trying FALLBACK AGENT...")
+            try:
+                fallback_response = fallback_agent.chat(question)
+                return fallback_response.response
+            except Exception as fallback_e:
+                print(f"[LOG] ❌ Fallback agent also failed: {fallback_e}")
         return f"Error processing your question: {str(e)}"
 def aswer_language_detection(response_text: str) -> str:
     """
     Detect the language of the response text.
         return answer_language
+def process_question_with_sequential_citations(agents_dict, question: str, chunks_directory="./data/") -> dict:
     """
+    Process a question through the RAG pipeline with fallback support and return response with sequential citation numbers.
     Args:
+        agents_dict: Dictionary containing standard_agent, fallback_agent, and llm
         question (str): The user's question
         chunks_directory (str): Path to the directory containing JSON files
             "response": str,  # Response with sequential citation numbers [1], [2], etc.
             "cited_elements_json": str,  # JSON array of cited elements in order
             "unique_ids": list,  # Original source IDs in order
+            "citation_mapping": dict,  # Mapping from source ID to citation number
+            "used_fallback": bool  # Whether fallback agent was used
         }
     """
+    print(f"\n[LOG] === STARTING QUESTION PROCESSING WITH FALLBACK SUPPORT ===")
     print(f"[LOG] Question: '{question[:150]}{'...' if len(question) > 150 else ''}'")
     print(f"[LOG] Chunks directory: {chunks_directory}")
     start_time = time.time()
+    used_fallback = False
     try:
+        # Get the response using the enhanced process_question function
+        response_text = process_question(agents_dict, question)
+        # Check if this looks like a fallback was used (simple heuristic)
+        if "fallback" in response_text.lower() or len(response_text) < 50:
+            used_fallback = True
+            print("[LOG] 🛡️ Fallback agent was likely used")
         agent_time = time.time() - start_time
         print(f"[LOG] Agent processing completed in {agent_time:.2f} seconds")
         print(f"[LOG] Raw response length: {len(response_text)} characters")
         # Extract source IDs from the response (preserving order)
         unique_ids = extract_source_ids(response_text)
         # Convert to JSON
         cited_elements_json = json.dumps(cited_elements_ordered, ensure_ascii=False, indent=2)
+        answer_language = aswer_language_detection(response_text)
         total_time = time.time() - start_time
         print(f"[LOG] ✅ Processing completed in {total_time:.2f} seconds total")
         print(f"[LOG] Final response length: {len(sequential_response)} characters")
+        print(f"[LOG] Used fallback: {used_fallback}")
         print(f"[LOG] === QUESTION PROCESSING COMPLETED ===\n")
         return {
             "cited_elements_json": cited_elements_json,
             "unique_ids": unique_ids,
             "citation_mapping": source_id_to_number,
+            "answer_language": answer_language,
+            "used_fallback": used_fallback
         }
     except Exception as e:
             "cited_elements_json": "[]",
             "unique_ids": [],
             "citation_mapping": {},
+            "answer_language": "en",
+            "used_fallback": False
         }
+def process_question_with_citations(agents_dict, question: str, chunks_directory="./data/") -> dict:
     """
     Legacy function - maintained for backward compatibility.
+    Now calls the new sequential citation function with fallback support.
     """
+    print("[LOG] Using legacy function wrapper - redirecting to sequential citations with fallback")
+    return process_question_with_sequential_citations(agents_dict, question, chunks_directory)