AgenticRAG_test

Sleeping

App Files Files Community

Zeggai Abdellah commited on Jun 7, 2025

Commit

dc90437

1 Parent(s): d8d8050

update

Browse files

Files changed (2) hide show

prepare_env.py +1 -1
rag_pipeline.py +76 -24

prepare_env.py CHANGED Viewed

@@ -159,7 +159,7 @@ def create_retriever(vectorstore, docs, llm):
     # BM25 retriever
     bm25_retriever = BM25Retriever.from_documents(docs)
-    bm25_retriever.k = 2
     print("✅ BM25 retriever created (k=2)")
     # Ensemble retriever

     # BM25 retriever
     bm25_retriever = BM25Retriever.from_documents(docs)
+    bm25_retriever.k = 3
     print("✅ BM25 retriever created (k=2)")
     # Ensemble retriever

rag_pipeline.py CHANGED Viewed

@@ -112,6 +112,7 @@ def create_safe_custom_prompt(tools, llm, is_fallback=False):
     print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
     if is_fallback:
         custom_instructions = """
 ## MEDICAL ASSISTANT ROLE - FALLBACK MODE
@@ -129,21 +130,19 @@ Answer the doctor's question accurately and concisely using only the provided in
 ## IMPORTANT REQUIREMENTS
 ### Citation and Sourcing
-1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
-2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
-3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
-4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
-5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
-6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
 ### Content Formatting
 1. When rendering tables:
-   - Convert HTML tables into clean Markdown format.
-   - Preserve all original headers and data rows exactly.
-   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
----
 """
     else:
         custom_instructions = """
@@ -155,31 +154,84 @@ Answer the doctor's question accurately and concisely using only the provided in
 ## IMPORTANT REQUIREMENTS
 ### Citation and Sourcing
-1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
-2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
-3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
-4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
-5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
-6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
 ### Content Formatting
 1. When rendering tables:
-   - Convert HTML tables into clean Markdown format.
-   - Preserve all original headers and data rows exactly.
-   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
----
 """
     # Get the exact original template first
     temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
     original_prompts = temp_agent.get_prompts()
     original_template = original_prompts["agent_worker:system_prompt"].template
-    # Add instructions at the very beginning
-    safe_template = f"{custom_instructions}{original_template}"
     # Create new prompt with same metadata as original
     original_prompt = original_prompts["agent_worker:system_prompt"]
@@ -193,10 +245,10 @@ Answer the doctor's question accurately and concisely using only the provided in
         print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
         return new_prompt
     except:
-        # Even safer fallback
         print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
         return PromptTemplate(template=safe_template)
 def create_agent(tools, llm, is_fallback=False):
     """Create the ReAct agent with custom prompt"""

     print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
+    # Your custom instructions
     if is_fallback:
         custom_instructions = """
 ## MEDICAL ASSISTANT ROLE - FALLBACK MODE
 ## IMPORTANT REQUIREMENTS
 ### Citation and Sourcing
+1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
+2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
+3. If a fact is supported by multiple sources, use the following format:
+   - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
+4. Use ONLY the provided information and never include facts from your general knowledge.
 ### Content Formatting
 1. When rendering tables:
+   - Convert HTML tables into clean Markdown format
+   - Preserve all original headers and data rows exactly
+   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
 """
     else:
         custom_instructions = """
 ## IMPORTANT REQUIREMENTS
 ### Citation and Sourcing
+1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
+2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
+3. If a fact is supported by multiple sources, use the following format:
+   - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
+4. Use ONLY the provided information and never include facts from your general knowledge.
 ### Content Formatting
 1. When rendering tables:
+   - Convert HTML tables into clean Markdown format
+   - Preserve all original headers and data rows exactly
+   - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
 2. For lists, maintain the original bullet points/numbering and include citations.
 3. Present information concisely but ensure clinical accuracy is never compromised.
+"""
+    # Create the modified prompt template by combining your instructions with the default structure
+    # IMPORTANT: Fix the curly brace escaping for JSON examples
+    modified_template = f"""{custom_instructions}
+## Tools
+You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
+This may require breaking the task into subtasks and using different tools to complete each subtask.
+You have access to the following tools:
+{{tool_desc}}
+## Output Format
+Please answer in the same language as the question and use the following format:
+```
+Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
+Action: tool name (one of {{tool_names}}) if using a tool.
+Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
+```
+Please ALWAYS start with a Thought.
+NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.
+Please use a valid JSON format for the Action Input. Do NOT do this {{"input": "hello world", "num_beams": 5}}.
+If this format is used, the tool will respond in the following format:
+```
+Observation: tool response
+```
+You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:
+```
+Thought: I can answer without using any more tools. I'll use the user's language to answer. Remember to include proper citations
+Answer: [your answer here with proper citations (In the same language as the user's question)]
+```
+```
+Thought: I cannot answer the question with the provided tools.
+Answer: [your answer here (In the same language as the user's question)]
+```
+## Current Conversation
+Below is the current conversation consisting of interleaving human and assistant messages.
 """
+    # SAFER APPROACH: Let's use the original template and just prepend instructions
     # Get the exact original template first
     temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
     original_prompts = temp_agent.get_prompts()
     original_template = original_prompts["agent_worker:system_prompt"].template
+    # Add instructions at the very beginning, before "You are designed to help..."
+    safe_template = f"""{custom_instructions}
+---
+{original_template}"""
     # Create new prompt with same metadata as original
     original_prompt = original_prompts["agent_worker:system_prompt"]
         print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
         return new_prompt
     except:
+        # Even safer fallback - just use PromptTemplate with template only
         print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
         return PromptTemplate(template=safe_template)
 def create_agent(tools, llm, is_fallback=False):
     """Create the ReAct agent with custom prompt"""