Zeggai Abdellah commited on
Commit
6bc642b
·
1 Parent(s): 760d971

back the old sysprompt

Browse files
Files changed (1) hide show
  1. rag_pipeline.py +113 -74
rag_pipeline.py CHANGED
@@ -108,100 +108,139 @@ def convert_citations_to_sequential(response_text, source_id_to_number_map):
108
 
109
 
110
  def create_safe_custom_prompt(tools, llm, is_fallback=False):
111
- """
112
- Creates a robust and explicit system prompt to prevent the agent from
113
- outputting tool code instead of a final answer.
114
- """
115
  print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
116
-
117
- # This prompt is heavily structured to guide the agent's reasoning process
118
- # and to explicitly separate the tool-using phase from the final answer phase.
119
 
120
- role_and_formatting = """
121
- ## ROLE: MEDICAL VACCINE ASSISTANT
122
- You are a highly specialized AI assistant for Algerian doctors. Your purpose is to provide accurate, evidence-based answers about vaccines using only the provided official medical documents.
123
-
124
- ## RESPONSE REQUIREMENTS
125
-
126
- ### 1. Citation
127
- - You MUST cite every piece of information.
128
- - Use the source ID directly in brackets, like this: `[e795ebd28318886c0b1a5395ac30ad90]`.
129
- - For information from multiple sources, place citations next to each other: `[source1][source2]`.
130
- - NEVER use your own knowledge. If you can't find it in the documents, state that.
131
-
132
- ### 2. Formatting
133
- - Convert any HTML tables into clean, readable Markdown.
134
- - Preserve all table data and headers accurately.
135
- - Place the citation for the table in its caption, e.g., `Table: Vaccine Schedule [source_id]`.
136
- """
137
-
138
  if is_fallback:
139
- agent_specific_instructions = """
140
- ## MODE: FALLBACK
141
- You are in FALLBACK MODE. This means you have limited but powerful tools. Be direct and efficient.
142
- - **Tools Available**: `general_guide_tool` (Algerian Guide) and `who_immunization_tool` (WHO Guide).
143
- - **Mandatory Tool Use**: You MUST use these tools to find the answer.
144
- - **Process**: Search with the tools, then synthesize the final answer. Avoid repeated or unnecessary searches.
145
- """
146
- else:
147
- agent_specific_instructions = """
148
- ## MODE: STANDARD
149
- You have access to a full suite of specialized tools. Your primary goal is to select the best tool for the user's specific question to provide the most precise answer possible.
150
- """
151
-
152
- final_answer_instructions = """
153
- ## REASONING PROCESS & FINAL ANSWER GENERATION
154
-
155
- You will reason in a loop of `Thought` and `Action`.
156
-
157
- 1. **`Thought`**: First, think about what you need to do to answer the user's question.
158
- 2. **`Action`**: Use a tool to find the information. The format is a JSON block.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- You will repeat this process until you have gathered all the information you need.
161
-
162
- ### --- CRITICAL: HOW TO PROVIDE THE FINAL ANSWER ---
163
- When you have enough information and are ready to answer, you MUST STOP using tools.
164
- Your final turn MUST follow this exact format:
165
- ```
166
- Thought: I have gathered all the necessary information. I will now synthesize the final answer in the user's language, ensuring every fact is cited correctly.
167
- Answer: [Your final, comprehensive answer in natural, human-readable language. It should be fully formatted with Markdown and include citations like [source_id_1] as required.]
168
- ```
169
-
170
- **IMPORTANT**: The final `Answer:` block MUST NOT contain any JSON or `tool_code`. It must ONLY contain the text response for the user.
171
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- custom_instructions = f"""
174
- {role_and_formatting}
175
- {agent_specific_instructions}
176
- {final_answer_instructions}
177
  """
178
 
179
- # Get the original template from a temporary agent to ensure we don't miss any required placeholders
180
  temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
181
  original_prompts = temp_agent.get_prompts()
182
  original_template = original_prompts["agent_worker:system_prompt"].template
183
-
184
- # We PREPEND our detailed instructions to the original template.
185
- # This gives our rules higher priority while keeping the original template's structure.
186
- safe_template = f"""{custom_instructions}
187
- ---
188
- Here are the tools available to you. Follow the reasoning process described above.
189
- ---
190
- {original_template}
191
- """
192
-
193
  original_prompt = original_prompts["agent_worker:system_prompt"]
194
 
195
  try:
196
  new_prompt = PromptTemplate(
197
  template=safe_template,
198
  template_vars=original_prompt.template_vars,
199
- metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else {}
200
  )
201
  print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
202
  return new_prompt
203
- except Exception as e:
204
- print(f"[LOG] CRITICAL ERROR creating PromptTemplate: {e}. Using a basic template.")
 
205
  return PromptTemplate(template=safe_template)
206
 
207
 
 
108
 
109
 
110
  def create_safe_custom_prompt(tools, llm, is_fallback=False):
111
+ """Create a safe version that won't have formatting conflicts"""
112
+
 
 
113
  print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
 
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  if is_fallback:
116
+ custom_instructions = """
117
+ ## MEDICAL ASSISTANT ROLE - FALLBACK MODE
118
+ You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
119
+ You are operating in FALLBACK MODE with access to only the most essential and comprehensive tools.
120
+ You provide evidence-based guidance using only information from official vaccine medical documents.
121
+ Answer the doctor's question accurately and concisely using only the provided information.
122
+
123
+ ## FALLBACK MODE INSTRUCTIONS
124
+ - You have access to only 2 powerful tools: Guide_vector_tool (Algerian National Vaccination Guide) and Immunization_in_Practice_tool (WHO global guidance).
125
+ - **MANDATORY TOOL USAGE**: Always use the relevant tool(s) to search for information before answering, even if you initially think no information is available.
126
+ - Be direct and efficient - search once with each tool if needed, then provide your answer.
127
+ - Do not overthink or search repeatedly - these tools are comprehensive.
128
+
129
+ ## IMPORTANT REQUIREMENTS
130
+
131
+ ### Citation and Sourcing
132
+ 1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
133
+ 2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
134
+ 3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
135
+ 4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
136
+ 5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
137
+ 6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
138
+
139
+ ### Content Formatting
140
+ 1. When rendering tables:
141
+ - Convert HTML tables into clean Markdown format.
142
+ - Preserve all original headers and data rows exactly.
143
+ - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
144
+ 2. For lists, maintain the original bullet points/numbering and include citations.
145
+ 3. Present information concisely but ensure clinical accuracy is never compromised.
146
+
147
+ ### CRITICAL: Efficient Fallback Strategy
148
+ 1. **MANDATORY SEARCH**: Use each relevant tool at least once to search for information, even if you suspect the information might not be available.
149
+ 2. **BREAK DOWN COMPLEX QUERIES**: For comparative or multi-part questions (e.g., comparing Algerian and WHO guidelines), break the query into sub-queries and use the appropriate tool for each part:
150
+ - Use Guide_vector_tool for Algerian-specific information (e.g., national schedules, coverage targets).
151
+ - Use Immunization_in_Practice_tool for WHO-specific information (e.g., global recommendations, coverage targets).
152
+ 3. **DO NOT STOP PREMATURELY**: Do not conclude "no information is available" without using the relevant tool(s) to search for the answer.
153
+ 4. **BE DECISIVE**: Once you find relevant information for each sub-query, formulate your response immediately.
154
+ 5. **ANSWER FULLY**: Address all parts of the question, using multiple tools if required by the query.
155
+
156
+ ### Response Guidelines
157
+ - **MANDATORY TOOL SELECTION**:
158
+ - For queries mentioning "WHO," "World Health Organization," "international," "global guidance," or WHO documents (e.g., page numbers), use Immunization_in_Practice_tool first.
159
+ - For queries mentioning "Algerian," "national guide," or Algerian-specific terms (e.g., page numbers), use Guide_vector_tool first.
160
+ - For comparative queries (e.g., Algerian vs. WHO), use both Guide_vector_tool and Immunization_in_Practice_tool, addressing each part systematically.
161
+ - **EXPLICIT REASONING**: Before answering, log your reasoning steps, including which tools you will use and why, based on the query’s content.
162
+ - **Query Decomposition**: Break comparative or multi-part queries into sub-queries (e.g., one for Algerian information, one for WHO information) and use the appropriate tool for each.
163
+ - Provide all found information with proper citations using Source IDs only.
164
+ - If information is limited, clearly state: "Based on the available documents, I can provide the following information..." and indicate what is not available.
165
 
166
+ ---
 
 
 
 
 
 
 
 
 
 
167
  """
168
+ else:
169
+ custom_instructions = """
170
+ ## MEDICAL ASSISTANT ROLE
171
+ You are a helpful and knowledgeable AI-powered vaccine assistant designed to support doctors in clinical decision-making.
172
+ You provide evidence-based guidance using only information from official vaccine medical documents.
173
+ Answer the doctor's question accurately and concisely using only the provided information.
174
+
175
+ ## IMPORTANT REQUIREMENTS
176
+
177
+ ### Citation and Sourcing
178
+ 1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
179
+ 2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
180
+ 3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
181
+ 4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
182
+ 5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
183
+ 6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
184
+
185
+ ### Content Formatting
186
+ 1. When rendering tables:
187
+ - Convert HTML tables into clean Markdown format.
188
+ - Preserve all original headers and data rows exactly.
189
+ - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
190
+ 2. For lists, maintain the original bullet points/numbering and include citations.
191
+ 3. Present information concisely but ensure clinical accuracy is never compromised.
192
+
193
+ ### CRITICAL: Efficient Response Strategy
194
+ 1. **MANDATORY SEARCH**: Always use the relevant tool(s) to search for information before answering, even if you initially think no information is available.
195
+ 2. **MANDATORY TOOL SELECTION**:
196
+ - For queries mentioning "WHO," "World Health Organization," "international," "global guidance," or WHO documents (e.g., page numbers), use Immunization_in_Practice_tool first.
197
+ - For queries mentioning "Algerian," "national guide," or Algerian-specific terms (e.g., page numbers), use Guide_vector_tool first.
198
+ - For comparative queries (e.g., Algerian vs. WHO), use both Guide_vector_tool and Immunization_in_Practice_tool, addressing each part systematically.
199
+ 3. **Query Decomposition**: Break comparative or multi-part queries into sub-queries (e.g., one for Algerian information, one for WHO information) and use the appropriate tool for each.
200
+ 4. **DO NOT STOP PREMATURELY**: Do not conclude "no information is available" without using the relevant tool(s) to search for the answer.
201
+ 5. **EXPLICIT REASONING**: Before answering, log your reasoning steps, including which tools you will use and why, based on the query’s content.
202
+ 6. **BE DECISIVE**: Once you find relevant information for each sub-query, formulate your response immediately.
203
+ 7. **ANSWER FULLY**: Address all parts of the question, using multiple tools if required by the query.
204
+ 8. **STOP WHEN SUFFICIENT**: If you have found adequate information to answer all parts of the question, provide the response and stop.
205
+
206
+ ### Response Guidelines for Complex Questions
207
+ - For comparative questions: Break the query into sub-queries (e.g., Algerian vs. WHO), use Guide_vector_tool for Algerian specifics and Immunization_in_Practice_tool for WHO specifics, then provide the comparison.
208
+ - For multi-part questions: Address each part systematically, using the appropriate tool for each sub-query.
209
+ - If information is not found after using the relevant tool(s): State clearly: "Based on the available documents, I can provide the following information..." and specify what is not available.
210
+ - Do not repeatedly search for the same terms or rephrase searches excessively.
211
+
212
+ ### When Information is Limited
213
+ If you cannot find complete information to fully answer a question:
214
+ 1. Provide whatever relevant information you did find with proper citations using Source IDs only.
215
+ 2. Clearly state: "Based on the available documents, I can provide the following information..."
216
+ 3. Indicate what specific information is not available: "However, information about [specific topic] was not found in the provided documents after searching with the relevant tool(s)."
217
+ 4. Do not conclude "no information is available" without attempting a search with the appropriate tool(s).
218
 
219
+ ---
 
 
 
220
  """
221
 
222
+ # Get the exact original template first
223
  temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
224
  original_prompts = temp_agent.get_prompts()
225
  original_template = original_prompts["agent_worker:system_prompt"].template
226
+
227
+ # Add instructions at the very beginning
228
+ safe_template = f"{custom_instructions}{original_template}"
229
+
230
+ # Create new prompt with same metadata as original
 
 
 
 
 
231
  original_prompt = original_prompts["agent_worker:system_prompt"]
232
 
233
  try:
234
  new_prompt = PromptTemplate(
235
  template=safe_template,
236
  template_vars=original_prompt.template_vars,
237
+ metadata=original_prompt.metadata if hasattr(original_prompt, 'metadata') else None
238
  )
239
  print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
240
  return new_prompt
241
+ except:
242
+ # Even safer fallback
243
+ print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
244
  return PromptTemplate(template=safe_template)
245
 
246