Spaces:
Sleeping
Sleeping
Zeggai Abdellah
commited on
Commit
·
6bc642b
1
Parent(s):
760d971
back the old sysprompt
Browse files- rag_pipeline.py +113 -74
rag_pipeline.py
CHANGED
|
@@ -108,100 +108,139 @@ def convert_citations_to_sequential(response_text, source_id_to_number_map):
|
|
| 108 |
|
| 109 |
|
| 110 |
def create_safe_custom_prompt(tools, llm, is_fallback=False):
|
| 111 |
-
"""
|
| 112 |
-
|
| 113 |
-
outputting tool code instead of a final answer.
|
| 114 |
-
"""
|
| 115 |
print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
|
| 116 |
-
|
| 117 |
-
# This prompt is heavily structured to guide the agent's reasoning process
|
| 118 |
-
# and to explicitly separate the tool-using phase from the final answer phase.
|
| 119 |
|
| 120 |
-
role_and_formatting = """
|
| 121 |
-
## ROLE: MEDICAL VACCINE ASSISTANT
|
| 122 |
-
You are a highly specialized AI assistant for Algerian doctors. Your purpose is to provide accurate, evidence-based answers about vaccines using only the provided official medical documents.
|
| 123 |
-
|
| 124 |
-
## RESPONSE REQUIREMENTS
|
| 125 |
-
|
| 126 |
-
### 1. Citation
|
| 127 |
-
- You MUST cite every piece of information.
|
| 128 |
-
- Use the source ID directly in brackets, like this: `[e795ebd28318886c0b1a5395ac30ad90]`.
|
| 129 |
-
- For information from multiple sources, place citations next to each other: `[source1][source2]`.
|
| 130 |
-
- NEVER use your own knowledge. If you can't find it in the documents, state that.
|
| 131 |
-
|
| 132 |
-
### 2. Formatting
|
| 133 |
-
- Convert any HTML tables into clean, readable Markdown.
|
| 134 |
-
- Preserve all table data and headers accurately.
|
| 135 |
-
- Place the citation for the table in its caption, e.g., `Table: Vaccine Schedule [source_id]`.
|
| 136 |
-
"""
|
| 137 |
-
|
| 138 |
if is_fallback:
|
| 139 |
-
|
| 140 |
-
##
|
| 141 |
-
You are
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
### --- CRITICAL: HOW TO PROVIDE THE FINAL ANSWER ---
|
| 163 |
-
When you have enough information and are ready to answer, you MUST STOP using tools.
|
| 164 |
-
Your final turn MUST follow this exact format:
|
| 165 |
-
```
|
| 166 |
-
Thought: I have gathered all the necessary information. I will now synthesize the final answer in the user's language, ensuring every fact is cited correctly.
|
| 167 |
-
Answer: [Your final, comprehensive answer in natural, human-readable language. It should be fully formatted with Markdown and include citations like [source_id_1] as required.]
|
| 168 |
-
```
|
| 169 |
-
|
| 170 |
-
**IMPORTANT**: The final `Answer:` block MUST NOT contain any JSON or `tool_code`. It must ONLY contain the text response for the user.
|
| 171 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
| 174 |
-
{role_and_formatting}
|
| 175 |
-
{agent_specific_instructions}
|
| 176 |
-
{final_answer_instructions}
|
| 177 |
"""
|
| 178 |
|
| 179 |
-
# Get the original template
|
| 180 |
temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
|
| 181 |
original_prompts = temp_agent.get_prompts()
|
| 182 |
original_template = original_prompts["agent_worker:system_prompt"].template
|
| 183 |
-
|
| 184 |
-
#
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
Here are the tools available to you. Follow the reasoning process described above.
|
| 189 |
-
---
|
| 190 |
-
{original_template}
|
| 191 |
-
"""
|
| 192 |
-
|
| 193 |
original_prompt = original_prompts["agent_worker:system_prompt"]
|
| 194 |
|
| 195 |
try:
|
| 196 |
new_prompt = PromptTemplate(
|
| 197 |
template=safe_template,
|
| 198 |
template_vars=original_prompt.template_vars,
|
| 199 |
-
metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else
|
| 200 |
)
|
| 201 |
print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
|
| 202 |
return new_prompt
|
| 203 |
-
except
|
| 204 |
-
|
|
|
|
| 205 |
return PromptTemplate(template=safe_template)
|
| 206 |
|
| 207 |
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
def create_safe_custom_prompt(tools, llm, is_fallback=False):
|
| 111 |
+
"""Create a safe version that won't have formatting conflicts"""
|
| 112 |
+
|
|
|
|
|
|
|
| 113 |
print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
|
|
|
|
|
|
|
|
|
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
if is_fallback:
|
| 116 |
+
custom_instructions = """
|
| 117 |
+
## MEDICAL ASSISTANT ROLE - FALLBACK MODE
|
| 118 |
+
You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
|
| 119 |
+
You are operating in FALLBACK MODE with access to only the most essential and comprehensive tools.
|
| 120 |
+
You provide evidence-based guidance using only information from official vaccine medical documents.
|
| 121 |
+
Answer the doctor's question accurately and concisely using only the provided information.
|
| 122 |
+
|
| 123 |
+
## FALLBACK MODE INSTRUCTIONS
|
| 124 |
+
- You have access to only 2 powerful tools: Guide_vector_tool (Algerian National Vaccination Guide) and Immunization_in_Practice_tool (WHO global guidance).
|
| 125 |
+
- **MANDATORY TOOL USAGE**: Always use the relevant tool(s) to search for information before answering, even if you initially think no information is available.
|
| 126 |
+
- Be direct and efficient - search once with each tool if needed, then provide your answer.
|
| 127 |
+
- Do not overthink or search repeatedly - these tools are comprehensive.
|
| 128 |
+
|
| 129 |
+
## IMPORTANT REQUIREMENTS
|
| 130 |
+
|
| 131 |
+
### Citation and Sourcing
|
| 132 |
+
1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
|
| 133 |
+
2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
|
| 134 |
+
3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
|
| 135 |
+
4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
|
| 136 |
+
5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
|
| 137 |
+
6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
|
| 138 |
+
|
| 139 |
+
### Content Formatting
|
| 140 |
+
1. When rendering tables:
|
| 141 |
+
- Convert HTML tables into clean Markdown format.
|
| 142 |
+
- Preserve all original headers and data rows exactly.
|
| 143 |
+
- Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
|
| 144 |
+
2. For lists, maintain the original bullet points/numbering and include citations.
|
| 145 |
+
3. Present information concisely but ensure clinical accuracy is never compromised.
|
| 146 |
+
|
| 147 |
+
### CRITICAL: Efficient Fallback Strategy
|
| 148 |
+
1. **MANDATORY SEARCH**: Use each relevant tool at least once to search for information, even if you suspect the information might not be available.
|
| 149 |
+
2. **BREAK DOWN COMPLEX QUERIES**: For comparative or multi-part questions (e.g., comparing Algerian and WHO guidelines), break the query into sub-queries and use the appropriate tool for each part:
|
| 150 |
+
- Use Guide_vector_tool for Algerian-specific information (e.g., national schedules, coverage targets).
|
| 151 |
+
- Use Immunization_in_Practice_tool for WHO-specific information (e.g., global recommendations, coverage targets).
|
| 152 |
+
3. **DO NOT STOP PREMATURELY**: Do not conclude "no information is available" without using the relevant tool(s) to search for the answer.
|
| 153 |
+
4. **BE DECISIVE**: Once you find relevant information for each sub-query, formulate your response immediately.
|
| 154 |
+
5. **ANSWER FULLY**: Address all parts of the question, using multiple tools if required by the query.
|
| 155 |
+
|
| 156 |
+
### Response Guidelines
|
| 157 |
+
- **MANDATORY TOOL SELECTION**:
|
| 158 |
+
- For queries mentioning "WHO," "World Health Organization," "international," "global guidance," or WHO documents (e.g., page numbers), use Immunization_in_Practice_tool first.
|
| 159 |
+
- For queries mentioning "Algerian," "national guide," or Algerian-specific terms (e.g., page numbers), use Guide_vector_tool first.
|
| 160 |
+
- For comparative queries (e.g., Algerian vs. WHO), use both Guide_vector_tool and Immunization_in_Practice_tool, addressing each part systematically.
|
| 161 |
+
- **EXPLICIT REASONING**: Before answering, log your reasoning steps, including which tools you will use and why, based on the query’s content.
|
| 162 |
+
- **Query Decomposition**: Break comparative or multi-part queries into sub-queries (e.g., one for Algerian information, one for WHO information) and use the appropriate tool for each.
|
| 163 |
+
- Provide all found information with proper citations using Source IDs only.
|
| 164 |
+
- If information is limited, clearly state: "Based on the available documents, I can provide the following information..." and indicate what is not available.
|
| 165 |
|
| 166 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
"""
|
| 168 |
+
else:
|
| 169 |
+
custom_instructions = """
|
| 170 |
+
## MEDICAL ASSISTANT ROLE
|
| 171 |
+
You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
|
| 172 |
+
You provide evidence-based guidance using only information from official vaccine medical documents.
|
| 173 |
+
Answer the doctor's question accurately and concisely using only the provided information.
|
| 174 |
+
|
| 175 |
+
## IMPORTANT REQUIREMENTS
|
| 176 |
+
|
| 177 |
+
### Citation and Sourcing
|
| 178 |
+
1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
|
| 179 |
+
2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
|
| 180 |
+
3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
|
| 181 |
+
4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
|
| 182 |
+
5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
|
| 183 |
+
6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
|
| 184 |
+
|
| 185 |
+
### Content Formatting
|
| 186 |
+
1. When rendering tables:
|
| 187 |
+
- Convert HTML tables into clean Markdown format.
|
| 188 |
+
- Preserve all original headers and data rows exactly.
|
| 189 |
+
- Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
|
| 190 |
+
2. For lists, maintain the original bullet points/numbering and include citations.
|
| 191 |
+
3. Present information concisely but ensure clinical accuracy is never compromised.
|
| 192 |
+
|
| 193 |
+
### CRITICAL: Efficient Response Strategy
|
| 194 |
+
1. **MANDATORY SEARCH**: Always use the relevant tool(s) to search for information before answering, even if you initially think no information is available.
|
| 195 |
+
2. **MANDATORY TOOL SELECTION**:
|
| 196 |
+
- For queries mentioning "WHO," "World Health Organization," "international," "global guidance," or WHO documents (e.g., page numbers), use Immunization_in_Practice_tool first.
|
| 197 |
+
- For queries mentioning "Algerian," "national guide," or Algerian-specific terms (e.g., page numbers), use Guide_vector_tool first.
|
| 198 |
+
- For comparative queries (e.g., Algerian vs. WHO), use both Guide_vector_tool and Immunization_in_Practice_tool, addressing each part systematically.
|
| 199 |
+
3. **Query Decomposition**: Break comparative or multi-part queries into sub-queries (e.g., one for Algerian information, one for WHO information) and use the appropriate tool for each.
|
| 200 |
+
4. **DO NOT STOP PREMATURELY**: Do not conclude "no information is available" without using the relevant tool(s) to search for the answer.
|
| 201 |
+
5. **EXPLICIT REASONING**: Before answering, log your reasoning steps, including which tools you will use and why, based on the query’s content.
|
| 202 |
+
6. **BE DECISIVE**: Once you find relevant information for each sub-query, formulate your response immediately.
|
| 203 |
+
7. **ANSWER FULLY**: Address all parts of the question, using multiple tools if required by the query.
|
| 204 |
+
8. **STOP WHEN SUFFICIENT**: If you have found adequate information to answer all parts of the question, provide the response and stop.
|
| 205 |
+
|
| 206 |
+
### Response Guidelines for Complex Questions
|
| 207 |
+
- For comparative questions: Break the query into sub-queries (e.g., Algerian vs. WHO), use Guide_vector_tool for Algerian specifics and Immunization_in_Practice_tool for WHO specifics, then provide the comparison.
|
| 208 |
+
- For multi-part questions: Address each part systematically, using the appropriate tool for each sub-query.
|
| 209 |
+
- If information is not found after using the relevant tool(s): State clearly: "Based on the available documents, I can provide the following information..." and specify what is not available.
|
| 210 |
+
- Do not repeatedly search for the same terms or rephrase searches excessively.
|
| 211 |
+
|
| 212 |
+
### When Information is Limited
|
| 213 |
+
If you cannot find complete information to fully answer a question:
|
| 214 |
+
1. Provide whatever relevant information you did find with proper citations using Source IDs only.
|
| 215 |
+
2. Clearly state: "Based on the available documents, I can provide the following information..."
|
| 216 |
+
3. Indicate what specific information is not available: "However, information about [specific topic] was not found in the provided documents after searching with the relevant tool(s)."
|
| 217 |
+
4. Do not conclude "no information is available" without attempting a search with the appropriate tool(s).
|
| 218 |
|
| 219 |
+
---
|
|
|
|
|
|
|
|
|
|
| 220 |
"""
|
| 221 |
|
| 222 |
+
# Get the exact original template first
|
| 223 |
temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
|
| 224 |
original_prompts = temp_agent.get_prompts()
|
| 225 |
original_template = original_prompts["agent_worker:system_prompt"].template
|
| 226 |
+
|
| 227 |
+
# Add instructions at the very beginning
|
| 228 |
+
safe_template = f"{custom_instructions}{original_template}"
|
| 229 |
+
|
| 230 |
+
# Create new prompt with same metadata as original
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
original_prompt = original_prompts["agent_worker:system_prompt"]
|
| 232 |
|
| 233 |
try:
|
| 234 |
new_prompt = PromptTemplate(
|
| 235 |
template=safe_template,
|
| 236 |
template_vars=original_prompt.template_vars,
|
| 237 |
+
metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else None
|
| 238 |
)
|
| 239 |
print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
|
| 240 |
return new_prompt
|
| 241 |
+
except:
|
| 242 |
+
# Even safer fallback
|
| 243 |
+
print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
|
| 244 |
return PromptTemplate(template=safe_template)
|
| 245 |
|
| 246 |
|