LiamKhoaLe commited on
Commit
88e7ced
·
1 Parent(s): e0b2bb1
Files changed (5) hide show
  1. utils/augment.py +8 -9
  2. utils/cloud_llm.py +23 -42
  3. utils/local_llm.py +22 -41
  4. utils/processor.py +16 -24
  5. utils/rag.py +4 -16
utils/augment.py CHANGED
@@ -224,11 +224,11 @@ def retry_invalid_response(text: str, paraphraser, max_retries: int = 3) -> str:
224
  retry_text = paraphraser.paraphrase(text, difficulty="easy")
225
  elif attempt == 1:
226
  # Second try: More aggressive paraphrasing with medical focus
227
- medical_prompt = f"Rewrite this medical response to be more professional and accurate:\n\n{text}"
228
  retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
229
  else:
230
  # Third try: Direct medical content generation
231
- medical_prompt = f"Provide a professional medical response to this question:\n\n{text}"
232
  retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
233
 
234
  if retry_text and not is_invalid_response(retry_text):
@@ -274,9 +274,8 @@ def enhance_medical_terminology(text: str, paraphraser) -> str:
274
  return enhanced
275
  else:
276
  prompt = (
277
- "Improve the medical terminology in this text while preserving all factual information:\n\n"
278
- f"{text}\n\n"
279
- "Return only the improved text with better medical terminology:"
280
  )
281
 
282
  enhanced = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=prompt)
@@ -298,10 +297,10 @@ def create_clinical_scenarios(question: str, answer: str, paraphraser) -> list:
298
  else:
299
  # Fallback to original implementation
300
  context_prompts = [
301
- f"Rewrite this medical question as if asked by a patient in an emergency room:\n\n{question}",
302
- f"Rewrite this medical question as if asked by a patient in a routine checkup:\n\n{question}",
303
- f"Rewrite this medical question as if asked by a patient with chronic conditions:\n\n{question}",
304
- f"Rewrite this medical question as if asked by a patient's family member:\n\n{question}"
305
  ]
306
 
307
  for i, prompt in enumerate(context_prompts):
 
224
  retry_text = paraphraser.paraphrase(text, difficulty="easy")
225
  elif attempt == 1:
226
  # Second try: More aggressive paraphrasing with medical focus
227
+ medical_prompt = f"Rewrite this medical response to be more professional and accurate. Return only the rewritten response without any introduction or commentary:\n\n{text}"
228
  retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
229
  else:
230
  # Third try: Direct medical content generation
231
+ medical_prompt = f"Provide a professional medical response to this question. Return only the medical response without any introduction or commentary:\n\n{text}"
232
  retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
233
 
234
  if retry_text and not is_invalid_response(retry_text):
 
274
  return enhanced
275
  else:
276
  prompt = (
277
+ "Improve the medical terminology in this text while preserving all factual information. Return only the improved text with better medical terminology without any introduction or commentary:\n\n"
278
+ f"{text}"
 
279
  )
280
 
281
  enhanced = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=prompt)
 
297
  else:
298
  # Fallback to original implementation
299
  context_prompts = [
300
+ f"Rewrite this medical question as if asked by a patient in an emergency room. Return only the rewritten question without any introduction or commentary:\n\n{question}",
301
+ f"Rewrite this medical question as if asked by a patient in a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
302
+ f"Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
303
+ f"Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}"
304
  ]
305
 
306
  for i, prompt in enumerate(context_prompts):
utils/cloud_llm.py CHANGED
@@ -153,15 +153,13 @@ class Paraphraser:
153
  # Optimized medical paraphrasing prompts based on difficulty
154
  if difficulty == "easy":
155
  prompt = (
156
- "You are a medical professional. Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy.\n\n"
157
- f"Original medical text: {text}\n\n"
158
- "Rewritten medical text:"
159
  )
160
  else: # hard difficulty
161
  prompt = (
162
- "You are a medical expert. Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone.\n\n"
163
- f"Original medical text: {text}\n\n"
164
- "Enhanced medical text:"
165
  )
166
 
167
  # Optimize temperature and token limits based on difficulty
@@ -187,15 +185,13 @@ class Paraphraser:
187
  # Optimized medical translation prompts
188
  if target_lang == "vi":
189
  prompt = (
190
- "You are a medical translator. Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms.\n\n"
191
- f"English medical text: {text}\n\n"
192
- "Vietnamese medical translation:"
193
  )
194
  else:
195
  prompt = (
196
- f"You are a medical translator. Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language.\n\n"
197
- f"Original medical text: {text}\n\n"
198
- f"{target_lang} medical translation:"
199
  )
200
 
201
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(800, len(text)+100))
@@ -210,15 +206,13 @@ class Paraphraser:
210
  # Optimized backtranslation prompt with medical focus
211
  if via_lang == "vi":
212
  prompt = (
213
- "You are a medical translator. Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate.\n\n"
214
- f"Vietnamese medical text: {mid}\n\n"
215
- "English medical translation:"
216
  )
217
  else:
218
  prompt = (
219
- f"You are a medical translator. Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language.\n\n"
220
- f"{via_lang} medical text: {mid}\n\n"
221
- "English medical translation:"
222
  )
223
 
224
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(900, len(text)+150))
@@ -230,15 +224,9 @@ class Paraphraser:
230
  def consistency_check(self, user: str, output: str) -> bool:
231
  """Return True if 'output' appears supported by 'user' (context/question). Optimized medical validation."""
232
  prompt = (
233
- "You are a medical quality assurance expert. Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider:\n"
234
- "1. Medical accuracy and clinical appropriateness\n"
235
- "2. Consistency with the question asked\n"
236
- "3. Safety and professional medical standards\n"
237
- "4. Completeness of the medical information\n\n"
238
- "Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
239
  f"Question/Context: {user}\n\n"
240
- f"Medical Answer: {output}\n\n"
241
- "Evaluation:"
242
  )
243
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
244
  if not out:
@@ -251,15 +239,9 @@ class Paraphraser:
251
  return False
252
 
253
  prompt = (
254
- "You are a medical accuracy validator. Evaluate if the medical answer is accurate and appropriate for the question. Consider:\n"
255
- "1. Medical facts and clinical knowledge\n"
256
- "2. Appropriate medical terminology\n"
257
- "3. Clinical reasoning and logic\n"
258
- "4. Safety considerations\n\n"
259
- "Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
260
  f"Medical Question: {question}\n\n"
261
- f"Medical Answer: {answer}\n\n"
262
- "Medical Accuracy Assessment:"
263
  )
264
 
265
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
@@ -273,9 +255,8 @@ class Paraphraser:
273
  return text
274
 
275
  prompt = (
276
- "You are a medical terminology expert. Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate.\n\n"
277
- f"Original text: {text}\n\n"
278
- "Enhanced medical text:"
279
  )
280
 
281
  out = self.nv.generate(prompt, temperature=0.1, max_tokens=min(800, len(text)+100))
@@ -290,26 +271,26 @@ class Paraphraser:
290
  # Different clinical context prompts
291
  context_prompts = [
292
  (
293
- "Rewrite this medical question as if asked by a patient in an emergency room setting:",
294
  "emergency_room"
295
  ),
296
  (
297
- "Rewrite this medical question as if asked by a patient during a routine checkup:",
298
  "routine_checkup"
299
  ),
300
  (
301
- "Rewrite this medical question as if asked by a patient with chronic conditions:",
302
  "chronic_care"
303
  ),
304
  (
305
- "Rewrite this medical question as if asked by a patient's family member:",
306
  "family_inquiry"
307
  )
308
  ]
309
 
310
  for prompt_template, scenario_type in context_prompts:
311
  try:
312
- prompt = f"{prompt_template}\n\nOriginal question: {question}\n\nRewritten question:"
313
  scenario_question = self.paraphrase(question, difficulty="hard", custom_prompt=prompt)
314
 
315
  if scenario_question and not self._is_invalid_response(scenario_question):
 
153
  # Optimized medical paraphrasing prompts based on difficulty
154
  if difficulty == "easy":
155
  prompt = (
156
+ "Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy. Return only the rewritten text without any introduction or commentary.\n\n"
157
+ f"{text}"
 
158
  )
159
  else: # hard difficulty
160
  prompt = (
161
+ "Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone. Return only the rewritten text without any introduction or commentary.\n\n"
162
+ f"{text}"
 
163
  )
164
 
165
  # Optimize temperature and token limits based on difficulty
 
185
  # Optimized medical translation prompts
186
  if target_lang == "vi":
187
  prompt = (
188
+ "Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms. Return only the translation without any introduction or commentary.\n\n"
189
+ f"{text}"
 
190
  )
191
  else:
192
  prompt = (
193
+ f"Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
194
+ f"{text}"
 
195
  )
196
 
197
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(800, len(text)+100))
 
206
  # Optimized backtranslation prompt with medical focus
207
  if via_lang == "vi":
208
  prompt = (
209
+ "Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate. Return only the translation without any introduction or commentary.\n\n"
210
+ f"{mid}"
 
211
  )
212
  else:
213
  prompt = (
214
+ f"Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
215
+ f"{mid}"
 
216
  )
217
 
218
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(900, len(text)+150))
 
224
  def consistency_check(self, user: str, output: str) -> bool:
225
  """Return True if 'output' appears supported by 'user' (context/question). Optimized medical validation."""
226
  prompt = (
227
+ "Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider medical accuracy, clinical appropriateness, consistency with the question, safety standards, and completeness of medical information. Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
 
 
 
 
 
228
  f"Question/Context: {user}\n\n"
229
+ f"Medical Answer: {output}"
 
230
  )
231
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
232
  if not out:
 
239
  return False
240
 
241
  prompt = (
242
+ "Evaluate if the medical answer is accurate and appropriate for the question. Consider medical facts, clinical knowledge, appropriate medical terminology, clinical reasoning, logic, and safety considerations. Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
 
 
 
 
 
243
  f"Medical Question: {question}\n\n"
244
+ f"Medical Answer: {answer}"
 
245
  )
246
 
247
  out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
 
255
  return text
256
 
257
  prompt = (
258
+ "Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate. Return only the improved text without any introduction or commentary.\n\n"
259
+ f"{text}"
 
260
  )
261
 
262
  out = self.nv.generate(prompt, temperature=0.1, max_tokens=min(800, len(text)+100))
 
271
  # Different clinical context prompts
272
  context_prompts = [
273
  (
274
+ "Rewrite this medical question as if asked by a patient in an emergency room setting. Return only the rewritten question without any introduction or commentary:\n\n{question}",
275
  "emergency_room"
276
  ),
277
  (
278
+ "Rewrite this medical question as if asked by a patient during a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
279
  "routine_checkup"
280
  ),
281
  (
282
+ "Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
283
  "chronic_care"
284
  ),
285
  (
286
+ "Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}",
287
  "family_inquiry"
288
  )
289
  ]
290
 
291
  for prompt_template, scenario_type in context_prompts:
292
  try:
293
+ prompt = prompt_template.format(question=question)
294
  scenario_question = self.paraphrase(question, difficulty="hard", custom_prompt=prompt)
295
 
296
  if scenario_question and not self._is_invalid_response(scenario_question):
utils/local_llm.py CHANGED
@@ -268,15 +268,13 @@ class LocalParaphraser:
268
  # Medical-specific paraphrasing prompts based on difficulty
269
  if difficulty == "easy":
270
  prompt = (
271
- "You are a medical professional. Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy.\n\n"
272
- f"Original medical text: {text}\n\n"
273
- "Rewritten medical text:"
274
  )
275
  else: # hard difficulty
276
  prompt = (
277
- "You are a medical expert. Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone.\n\n"
278
- f"Original medical text: {text}\n\n"
279
- "Enhanced medical text:"
280
  )
281
 
282
  # Adjust temperature based on difficulty
@@ -292,15 +290,13 @@ class LocalParaphraser:
292
  # Medical-specific translation prompt
293
  if target_lang == "vi":
294
  prompt = (
295
- "You are a medical translator. Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms.\n\n"
296
- f"English medical text: {text}\n\n"
297
- "Vietnamese medical translation:"
298
  )
299
  else:
300
  prompt = (
301
- f"You are a medical translator. Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language.\n\n"
302
- f"Original medical text: {text}\n\n"
303
- f"{target_lang} medical translation:"
304
  )
305
 
306
  result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.0)
@@ -319,15 +315,13 @@ class LocalParaphraser:
319
  # Then translate back to English with medical focus
320
  if via_lang == "vi":
321
  prompt = (
322
- "You are a medical translator. Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate.\n\n"
323
- f"Vietnamese medical text: {translated}\n\n"
324
- "English medical translation:"
325
  )
326
  else:
327
  prompt = (
328
- f"You are a medical translator. Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language.\n\n"
329
- f"{via_lang} medical text: {translated}\n\n"
330
- "English medical translation:"
331
  )
332
 
333
  result = self.client.generate(prompt, max_tokens=min(900, len(text)+150), temperature=0.0)
@@ -336,15 +330,9 @@ class LocalParaphraser:
336
  def consistency_check(self, user: str, output: str) -> bool:
337
  """Check consistency using MedAlpaca with medical validation focus"""
338
  prompt = (
339
- "You are a medical quality assurance expert. Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider:\n"
340
- "1. Medical accuracy and clinical appropriateness\n"
341
- "2. Consistency with the question asked\n"
342
- "3. Safety and professional medical standards\n"
343
- "4. Completeness of the medical information\n\n"
344
- "Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
345
  f"Question/Context: {user}\n\n"
346
- f"Medical Answer: {output}\n\n"
347
- "Evaluation:"
348
  )
349
 
350
  result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
@@ -356,15 +344,9 @@ class LocalParaphraser:
356
  return False
357
 
358
  prompt = (
359
- "You are a medical accuracy validator. Evaluate if the medical answer is accurate and appropriate for the question. Consider:\n"
360
- "1. Medical facts and clinical knowledge\n"
361
- "2. Appropriate medical terminology\n"
362
- "3. Clinical reasoning and logic\n"
363
- "4. Safety considerations\n\n"
364
- "Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
365
  f"Medical Question: {question}\n\n"
366
- f"Medical Answer: {answer}\n\n"
367
- "Medical Accuracy Assessment:"
368
  )
369
 
370
  result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
@@ -376,9 +358,8 @@ class LocalParaphraser:
376
  return text
377
 
378
  prompt = (
379
- "You are a medical terminology expert. Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate.\n\n"
380
- f"Original text: {text}\n\n"
381
- "Enhanced medical text:"
382
  )
383
 
384
  result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.1)
@@ -391,19 +372,19 @@ class LocalParaphraser:
391
  # Different clinical context prompts
392
  context_prompts = [
393
  (
394
- "You are a medical professional. Rewrite this medical question as if asked by a patient in an emergency room setting:\n\nOriginal question: {question}\n\nEmergency room question:",
395
  "emergency_room"
396
  ),
397
  (
398
- "You are a medical professional. Rewrite this medical question as if asked by a patient during a routine checkup:\n\nOriginal question: {question}\n\nRoutine checkup question:",
399
  "routine_checkup"
400
  ),
401
  (
402
- "You are a medical professional. Rewrite this medical question as if asked by a patient with chronic conditions:\n\nOriginal question: {question}\n\nChronic care question:",
403
  "chronic_care"
404
  ),
405
  (
406
- "You are a medical professional. Rewrite this medical question as if asked by a patient's family member:\n\nOriginal question: {question}\n\nFamily inquiry question:",
407
  "family_inquiry"
408
  )
409
  ]
 
268
  # Medical-specific paraphrasing prompts based on difficulty
269
  if difficulty == "easy":
270
  prompt = (
271
+ "Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy. Return only the rewritten text without any introduction or commentary.\n\n"
272
+ f"{text}"
 
273
  )
274
  else: # hard difficulty
275
  prompt = (
276
+ "Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone. Return only the rewritten text without any introduction or commentary.\n\n"
277
+ f"{text}"
 
278
  )
279
 
280
  # Adjust temperature based on difficulty
 
290
  # Medical-specific translation prompt
291
  if target_lang == "vi":
292
  prompt = (
293
+ "Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms. Return only the translation without any introduction or commentary.\n\n"
294
+ f"{text}"
 
295
  )
296
  else:
297
  prompt = (
298
+ f"Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
299
+ f"{text}"
 
300
  )
301
 
302
  result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.0)
 
315
  # Then translate back to English with medical focus
316
  if via_lang == "vi":
317
  prompt = (
318
+ "Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate. Return only the translation without any introduction or commentary.\n\n"
319
+ f"{translated}"
 
320
  )
321
  else:
322
  prompt = (
323
+ f"Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
324
+ f"{translated}"
 
325
  )
326
 
327
  result = self.client.generate(prompt, max_tokens=min(900, len(text)+150), temperature=0.0)
 
330
  def consistency_check(self, user: str, output: str) -> bool:
331
  """Check consistency using MedAlpaca with medical validation focus"""
332
  prompt = (
333
+ "Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider medical accuracy, clinical appropriateness, consistency with the question, safety standards, and completeness of medical information. Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
 
 
 
 
 
334
  f"Question/Context: {user}\n\n"
335
+ f"Medical Answer: {output}"
 
336
  )
337
 
338
  result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
 
344
  return False
345
 
346
  prompt = (
347
+ "Evaluate if the medical answer is accurate and appropriate for the question. Consider medical facts, clinical knowledge, appropriate medical terminology, clinical reasoning, logic, and safety considerations. Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
 
 
 
 
 
348
  f"Medical Question: {question}\n\n"
349
+ f"Medical Answer: {answer}"
 
350
  )
351
 
352
  result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
 
358
  return text
359
 
360
  prompt = (
361
+ "Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate. Return only the improved text without any introduction or commentary.\n\n"
362
+ f"{text}"
 
363
  )
364
 
365
  result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.1)
 
372
  # Different clinical context prompts
373
  context_prompts = [
374
  (
375
+ "Rewrite this medical question as if asked by a patient in an emergency room setting. Return only the rewritten question without any introduction or commentary:\n\n{question}",
376
  "emergency_room"
377
  ),
378
  (
379
+ "Rewrite this medical question as if asked by a patient during a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
380
  "routine_checkup"
381
  ),
382
  (
383
+ "Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
384
  "chronic_care"
385
  ),
386
  (
387
+ "Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}",
388
  "family_inquiry"
389
  )
390
  ]
utils/processor.py CHANGED
@@ -212,24 +212,20 @@ def _get_answer_style_prompt(strategy: str, question: str, original_answer: str)
212
  """Generate style-specific prompts for answer enhancement with medical focus"""
213
  prompts = {
214
  "concise": (
215
- "You are a medical professional. Rewrite this medical answer to be more concise while preserving all key medical information, clinical facts, and diagnostic details:\n\n"
216
- f"Original answer: {original_answer}\n\n"
217
- "Concise medical answer:"
218
  ),
219
  "detailed": (
220
- "You are a medical expert. Expand this medical answer with more detailed explanations, clinical context, and additional medical information while maintaining accuracy:\n\n"
221
- f"Original answer: {original_answer}\n\n"
222
- "Detailed medical answer:"
223
  ),
224
  "clinical": (
225
- "You are a clinical specialist. Rewrite this answer using more formal clinical language, precise medical terminology, and professional medical communication style:\n\n"
226
- f"Original answer: {original_answer}\n\n"
227
- "Clinical medical answer:"
228
  ),
229
  "patient_friendly": (
230
- "You are a medical professional. Rewrite this medical answer in simpler, more patient-friendly language while keeping it medically accurate and informative:\n\n"
231
- f"Original answer: {original_answer}\n\n"
232
- "Patient-friendly medical answer:"
233
  )
234
  }
235
  return prompts.get(strategy, f"Paraphrase this medical answer: {original_answer}")
@@ -238,24 +234,20 @@ def _get_question_style_prompt(strategy: str, original_question: str, answer: st
238
  """Generate style-specific prompts for question enhancement with medical focus"""
239
  prompts = {
240
  "clarifying": (
241
- "You are a medical professional. Rewrite this medical question to ask for clarification or more specific medical information:\n\n"
242
- f"Original question: {original_question}\n\n"
243
- "Clarifying medical question:"
244
  ),
245
  "follow_up": (
246
- "You are a medical professional. Create a follow-up question that a patient might ask after this medical question, focusing on related medical concerns:\n\n"
247
- f"Original question: {original_question}\n\n"
248
- "Follow-up medical question:"
249
  ),
250
  "symptom_focused": (
251
- "You are a medical professional. Rewrite this question to focus more on symptoms, their characteristics, and clinical presentation:\n\n"
252
- f"Original question: {original_question}\n\n"
253
- "Symptom-focused medical question:"
254
  ),
255
  "treatment_focused": (
256
- "You are a medical professional. Rewrite this question to focus more on treatment options, management strategies, and therapeutic approaches:\n\n"
257
- f"Original question: {original_question}\n\n"
258
- "Treatment-focused medical question:"
259
  )
260
  }
261
  return prompts.get(strategy, f"Paraphrase this medical question: {original_question}")
 
212
  """Generate style-specific prompts for answer enhancement with medical focus"""
213
  prompts = {
214
  "concise": (
215
+ "Rewrite this medical answer to be more concise while preserving all key medical information, clinical facts, and diagnostic details. Return only the rewritten answer without any introduction or commentary:\n\n"
216
+ f"{original_answer}"
 
217
  ),
218
  "detailed": (
219
+ "Expand this medical answer with more detailed explanations, clinical context, and additional medical information while maintaining accuracy. Return only the expanded answer without any introduction or commentary:\n\n"
220
+ f"{original_answer}"
 
221
  ),
222
  "clinical": (
223
+ "Rewrite this answer using more formal clinical language, precise medical terminology, and professional medical communication style. Return only the rewritten answer without any introduction or commentary:\n\n"
224
+ f"{original_answer}"
 
225
  ),
226
  "patient_friendly": (
227
+ "Rewrite this medical answer in simpler, more patient-friendly language while keeping it medically accurate and informative. Return only the rewritten answer without any introduction or commentary:\n\n"
228
+ f"{original_answer}"
 
229
  )
230
  }
231
  return prompts.get(strategy, f"Paraphrase this medical answer: {original_answer}")
 
234
  """Generate style-specific prompts for question enhancement with medical focus"""
235
  prompts = {
236
  "clarifying": (
237
+ "Rewrite this medical question to ask for clarification or more specific medical information. Return only the rewritten question without any introduction or commentary:\n\n"
238
+ f"{original_question}"
 
239
  ),
240
  "follow_up": (
241
+ "Create a follow-up question that a patient might ask after this medical question, focusing on related medical concerns. Return only the follow-up question without any introduction or commentary:\n\n"
242
+ f"{original_question}"
 
243
  ),
244
  "symptom_focused": (
245
+ "Rewrite this question to focus more on symptoms, their characteristics, and clinical presentation. Return only the rewritten question without any introduction or commentary:\n\n"
246
+ f"{original_question}"
 
247
  ),
248
  "treatment_focused": (
249
+ "Rewrite this question to focus more on treatment options, management strategies, and therapeutic approaches. Return only the rewritten question without any introduction or commentary:\n\n"
250
+ f"{original_question}"
 
251
  )
252
  }
253
  return prompts.get(strategy, f"Paraphrase this medical question: {original_question}")
utils/rag.py CHANGED
@@ -56,19 +56,9 @@ class RAGProcessor:
56
  if not text or len(text.strip()) < 10:
57
  return text
58
 
59
- prompt = f"""
60
- You are a medical data cleaning expert. Clean the following text by:
61
- 1. Remove conversational elements (greetings, pleasantries)
62
- 2. Remove non-medical small talk and social interactions
63
- 3. Keep only medically relevant information
64
- 4. Preserve clinical facts, symptoms, diagnoses, treatments, and medical advice
65
- 5. Maintain professional medical language
66
- 6. Return only cleaned medical content in 1-2 concise sentences suitable for dense retrieval embeddings. No lists, no headers.
67
 
68
- Text to clean:
69
- {text}
70
-
71
- Cleaned medical content:"""
72
 
73
  try:
74
  if self.is_local and self.medalpaca_client:
@@ -93,13 +83,11 @@ class RAGProcessor:
93
  if not question or not answer:
94
  return ""
95
 
96
- prompt = f"""You are a medical knowledge expert. Given a medical question and its answer, generate a brief relevant medical context that helps retrieval. Limit to 1–2 sentences, concise, avoid boilerplate, no enumerations.
97
 
98
  Question: {question}
99
 
100
- Answer: {answer}
101
-
102
- Generate a concise medical context:"""
103
 
104
  try:
105
  if self.is_local and self.medalpaca_client:
 
56
  if not text or len(text.strip()) < 10:
57
  return text
58
 
59
+ prompt = f"""Clean the following text by removing conversational elements (greetings, pleasantries), non-medical small talk, and social interactions. Keep only medically relevant information while preserving clinical facts, symptoms, diagnoses, treatments, and medical advice. Maintain professional medical language. Return only cleaned medical content in 1-2 concise sentences suitable for dense retrieval embeddings. No lists, no headers, no introduction or commentary:
 
 
 
 
 
 
 
60
 
61
+ {text}"""
 
 
 
62
 
63
  try:
64
  if self.is_local and self.medalpaca_client:
 
83
  if not question or not answer:
84
  return ""
85
 
86
+ prompt = f"""Given a medical question and its answer, generate a brief relevant medical context that helps retrieval. Limit to 1–2 sentences, concise, avoid boilerplate, no enumerations. Return only the medical context without any introduction or commentary:
87
 
88
  Question: {question}
89
 
90
+ Answer: {answer}"""
 
 
91
 
92
  try:
93
  if self.is_local and self.medalpaca_client: