Zeggai Abdellah commited on
Commit
dc90437
·
1 Parent(s): d8d8050
Files changed (2) hide show
  1. prepare_env.py +1 -1
  2. rag_pipeline.py +76 -24
prepare_env.py CHANGED
@@ -159,7 +159,7 @@ def create_retriever(vectorstore, docs, llm):
159
 
160
  # BM25 retriever
161
  bm25_retriever = BM25Retriever.from_documents(docs)
162
- bm25_retriever.k = 2
163
  print("✅ BM25 retriever created (k=2)")
164
 
165
  # Ensemble retriever
 
159
 
160
  # BM25 retriever
161
  bm25_retriever = BM25Retriever.from_documents(docs)
162
+ bm25_retriever.k = 3
163
  print("✅ BM25 retriever created (k=2)")
164
 
165
  # Ensemble retriever
rag_pipeline.py CHANGED
@@ -112,6 +112,7 @@ def create_safe_custom_prompt(tools, llm, is_fallback=False):
112
 
113
  print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
114
 
 
115
  if is_fallback:
116
  custom_instructions = """
117
  ## MEDICAL ASSISTANT ROLE - FALLBACK MODE
@@ -129,21 +130,19 @@ Answer the doctor's question accurately and concisely using only the provided in
129
  ## IMPORTANT REQUIREMENTS
130
 
131
  ### Citation and Sourcing
132
- 1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
133
- 2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
134
- 3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
135
- 4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
136
- 5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
137
- 6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
138
 
139
  ### Content Formatting
140
  1. When rendering tables:
141
- - Convert HTML tables into clean Markdown format.
142
- - Preserve all original headers and data rows exactly.
143
- - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
144
  2. For lists, maintain the original bullet points/numbering and include citations.
145
  3. Present information concisely but ensure clinical accuracy is never compromised.
146
- ---
147
  """
148
  else:
149
  custom_instructions = """
@@ -155,31 +154,84 @@ Answer the doctor's question accurately and concisely using only the provided in
155
  ## IMPORTANT REQUIREMENTS
156
 
157
  ### Citation and Sourcing
158
- 1. For each fact in your response, include an inline citation in the format [Source ID] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
159
- 2. The Source ID must be the exact alphanumeric identifier from the search results, NOT the tool name or any other text.
160
- 3. Do NOT use 'Source:' in the citation format; use only the Source ID in square brackets.
161
- 4. Do NOT use tool names (like Guide_vector_tool, Immunization_in_Practice_tool) as citations.
162
- 5. If a fact is supported by multiple sources, use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
163
- 6. Use ONLY the provided information from tool outputs and never include facts from your general knowledge.
164
 
165
  ### Content Formatting
166
  1. When rendering tables:
167
- - Convert HTML tables into clean Markdown format.
168
- - Preserve all original headers and data rows exactly.
169
- - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source ID]'.
170
  2. For lists, maintain the original bullet points/numbering and include citations.
171
  3. Present information concisely but ensure clinical accuracy is never compromised.
 
172
 
173
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  """
175
 
 
176
  # Get the exact original template first
177
  temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
178
  original_prompts = temp_agent.get_prompts()
179
  original_template = original_prompts["agent_worker:system_prompt"].template
180
 
181
- # Add instructions at the very beginning
182
- safe_template = f"{custom_instructions}{original_template}"
 
 
 
 
183
 
184
  # Create new prompt with same metadata as original
185
  original_prompt = original_prompts["agent_worker:system_prompt"]
@@ -193,10 +245,10 @@ Answer the doctor's question accurately and concisely using only the provided in
193
  print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
194
  return new_prompt
195
  except:
196
- # Even safer fallback
197
  print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
198
  return PromptTemplate(template=safe_template)
199
-
200
  def create_agent(tools, llm, is_fallback=False):
201
  """Create the ReAct agent with custom prompt"""
202
 
 
112
 
113
  print(f"[LOG] Creating {'fallback' if is_fallback else 'standard'} custom prompt with {len(tools)} tools")
114
 
115
+ # Your custom instructions
116
  if is_fallback:
117
  custom_instructions = """
118
  ## MEDICAL ASSISTANT ROLE - FALLBACK MODE
 
130
  ## IMPORTANT REQUIREMENTS
131
 
132
  ### Citation and Sourcing
133
+ 1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
134
+ 2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
135
+ 3. If a fact is supported by multiple sources, use the following format:
136
+ - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
137
+ 4. Use ONLY the provided information and never include facts from your general knowledge.
 
138
 
139
  ### Content Formatting
140
  1. When rendering tables:
141
+ - Convert HTML tables into clean Markdown format
142
+ - Preserve all original headers and data rows exactly
143
+ - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
144
  2. For lists, maintain the original bullet points/numbering and include citations.
145
  3. Present information concisely but ensure clinical accuracy is never compromised.
 
146
  """
147
  else:
148
  custom_instructions = """
 
154
  ## IMPORTANT REQUIREMENTS
155
 
156
  ### Citation and Sourcing
157
+ 1. For each fact in your response, include an inline citation in the format [Source] immediately following the information, e.g., [e795ebd28318886c0b1a5395ac30ad90].
158
+ 2. Do NOT use 'Source:' in the citation format; use only the Source in square brackets.
159
+ 3. If a fact is supported by multiple sources, use the following format:
160
+ - Use adjacent citations: [e795ebd28318886c0b1a5395ac30ad90][21a932b2340bb16707763f57f0ad2]
161
+ 4. Use ONLY the provided information and never include facts from your general knowledge.
 
162
 
163
  ### Content Formatting
164
  1. When rendering tables:
165
+ - Convert HTML tables into clean Markdown format
166
+ - Preserve all original headers and data rows exactly
167
+ - Include the citation in the table caption, e.g., 'Table: Vaccination Schedule [Source]'
168
  2. For lists, maintain the original bullet points/numbering and include citations.
169
  3. Present information concisely but ensure clinical accuracy is never compromised.
170
+ """
171
 
172
+ # Create the modified prompt template by combining your instructions with the default structure
173
+ # IMPORTANT: Fix the curly brace escaping for JSON examples
174
+ modified_template = f"""{custom_instructions}
175
+
176
+ ## Tools
177
+
178
+ You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
179
+ This may require breaking the task into subtasks and using different tools to complete each subtask.
180
+
181
+ You have access to the following tools:
182
+ {{tool_desc}}
183
+
184
+ ## Output Format
185
+
186
+ Please answer in the same language as the question and use the following format:
187
+
188
+ ```
189
+ Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
190
+ Action: tool name (one of {{tool_names}}) if using a tool.
191
+ Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
192
+ ```
193
+
194
+ Please ALWAYS start with a Thought.
195
+
196
+ NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.
197
+
198
+ Please use a valid JSON format for the Action Input. Do NOT do this {{"input": "hello world", "num_beams": 5}}.
199
+
200
+ If this format is used, the tool will respond in the following format:
201
+
202
+ ```
203
+ Observation: tool response
204
+ ```
205
+
206
+ You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in one of the following two formats:
207
+
208
+ ```
209
+ Thought: I can answer without using any more tools. I'll use the user's language to answer. Remember to include proper citations
210
+ Answer: [your answer here with proper citations (In the same language as the user's question)]
211
+ ```
212
+
213
+ ```
214
+ Thought: I cannot answer the question with the provided tools.
215
+ Answer: [your answer here (In the same language as the user's question)]
216
+ ```
217
+
218
+ ## Current Conversation
219
+
220
+ Below is the current conversation consisting of interleaving human and assistant messages.
221
  """
222
 
223
+ # SAFER APPROACH: Let's use the original template and just prepend instructions
224
  # Get the exact original template first
225
  temp_agent = ReActAgent.from_tools(tools, llm=llm, verbose=False)
226
  original_prompts = temp_agent.get_prompts()
227
  original_template = original_prompts["agent_worker:system_prompt"].template
228
 
229
+ # Add instructions at the very beginning, before "You are designed to help..."
230
+ safe_template = f"""{custom_instructions}
231
+
232
+ ---
233
+
234
+ {original_template}"""
235
 
236
  # Create new prompt with same metadata as original
237
  original_prompt = original_prompts["agent_worker:system_prompt"]
 
245
  print(f"[LOG] ✅ Successfully created {'fallback' if is_fallback else 'standard'} custom prompt")
246
  return new_prompt
247
  except:
248
+ # Even safer fallback - just use PromptTemplate with template only
249
  print(f"[LOG] ⚠️ Using fallback prompt template for {'fallback' if is_fallback else 'standard'} agent")
250
  return PromptTemplate(template=safe_template)
251
+
252
  def create_agent(tools, llm, is_fallback=False):
253
  """Create the ReAct agent with custom prompt"""
254