Spaces:
Sleeping
Sleeping
Commit
·
88e7ced
1
Parent(s):
e0b2bb1
Rm convo
Browse files- utils/augment.py +8 -9
- utils/cloud_llm.py +23 -42
- utils/local_llm.py +22 -41
- utils/processor.py +16 -24
- utils/rag.py +4 -16
utils/augment.py
CHANGED
|
@@ -224,11 +224,11 @@ def retry_invalid_response(text: str, paraphraser, max_retries: int = 3) -> str:
|
|
| 224 |
retry_text = paraphraser.paraphrase(text, difficulty="easy")
|
| 225 |
elif attempt == 1:
|
| 226 |
# Second try: More aggressive paraphrasing with medical focus
|
| 227 |
-
medical_prompt = f"Rewrite this medical response to be more professional and accurate:\n\n{text}"
|
| 228 |
retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
|
| 229 |
else:
|
| 230 |
# Third try: Direct medical content generation
|
| 231 |
-
medical_prompt = f"Provide a professional medical response to this question:\n\n{text}"
|
| 232 |
retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
|
| 233 |
|
| 234 |
if retry_text and not is_invalid_response(retry_text):
|
|
@@ -274,9 +274,8 @@ def enhance_medical_terminology(text: str, paraphraser) -> str:
|
|
| 274 |
return enhanced
|
| 275 |
else:
|
| 276 |
prompt = (
|
| 277 |
-
"Improve the medical terminology in this text while preserving all factual information:\n\n"
|
| 278 |
-
f"{text}
|
| 279 |
-
"Return only the improved text with better medical terminology:"
|
| 280 |
)
|
| 281 |
|
| 282 |
enhanced = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=prompt)
|
|
@@ -298,10 +297,10 @@ def create_clinical_scenarios(question: str, answer: str, paraphraser) -> list:
|
|
| 298 |
else:
|
| 299 |
# Fallback to original implementation
|
| 300 |
context_prompts = [
|
| 301 |
-
f"Rewrite this medical question as if asked by a patient in an emergency room:\n\n{question}",
|
| 302 |
-
f"Rewrite this medical question as if asked by a patient in a routine checkup:\n\n{question}",
|
| 303 |
-
f"Rewrite this medical question as if asked by a patient with chronic conditions:\n\n{question}",
|
| 304 |
-
f"Rewrite this medical question as if asked by a patient's family member:\n\n{question}"
|
| 305 |
]
|
| 306 |
|
| 307 |
for i, prompt in enumerate(context_prompts):
|
|
|
|
| 224 |
retry_text = paraphraser.paraphrase(text, difficulty="easy")
|
| 225 |
elif attempt == 1:
|
| 226 |
# Second try: More aggressive paraphrasing with medical focus
|
| 227 |
+
medical_prompt = f"Rewrite this medical response to be more professional and accurate. Return only the rewritten response without any introduction or commentary:\n\n{text}"
|
| 228 |
retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
|
| 229 |
else:
|
| 230 |
# Third try: Direct medical content generation
|
| 231 |
+
medical_prompt = f"Provide a professional medical response to this question. Return only the medical response without any introduction or commentary:\n\n{text}"
|
| 232 |
retry_text = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=medical_prompt)
|
| 233 |
|
| 234 |
if retry_text and not is_invalid_response(retry_text):
|
|
|
|
| 274 |
return enhanced
|
| 275 |
else:
|
| 276 |
prompt = (
|
| 277 |
+
"Improve the medical terminology in this text while preserving all factual information. Return only the improved text with better medical terminology without any introduction or commentary:\n\n"
|
| 278 |
+
f"{text}"
|
|
|
|
| 279 |
)
|
| 280 |
|
| 281 |
enhanced = paraphraser.paraphrase(text, difficulty="hard", custom_prompt=prompt)
|
|
|
|
| 297 |
else:
|
| 298 |
# Fallback to original implementation
|
| 299 |
context_prompts = [
|
| 300 |
+
f"Rewrite this medical question as if asked by a patient in an emergency room. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 301 |
+
f"Rewrite this medical question as if asked by a patient in a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 302 |
+
f"Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 303 |
+
f"Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}"
|
| 304 |
]
|
| 305 |
|
| 306 |
for i, prompt in enumerate(context_prompts):
|
utils/cloud_llm.py
CHANGED
|
@@ -153,15 +153,13 @@ class Paraphraser:
|
|
| 153 |
# Optimized medical paraphrasing prompts based on difficulty
|
| 154 |
if difficulty == "easy":
|
| 155 |
prompt = (
|
| 156 |
-
"
|
| 157 |
-
f"
|
| 158 |
-
"Rewritten medical text:"
|
| 159 |
)
|
| 160 |
else: # hard difficulty
|
| 161 |
prompt = (
|
| 162 |
-
"
|
| 163 |
-
f"
|
| 164 |
-
"Enhanced medical text:"
|
| 165 |
)
|
| 166 |
|
| 167 |
# Optimize temperature and token limits based on difficulty
|
|
@@ -187,15 +185,13 @@ class Paraphraser:
|
|
| 187 |
# Optimized medical translation prompts
|
| 188 |
if target_lang == "vi":
|
| 189 |
prompt = (
|
| 190 |
-
"
|
| 191 |
-
f"
|
| 192 |
-
"Vietnamese medical translation:"
|
| 193 |
)
|
| 194 |
else:
|
| 195 |
prompt = (
|
| 196 |
-
f"
|
| 197 |
-
f"
|
| 198 |
-
f"{target_lang} medical translation:"
|
| 199 |
)
|
| 200 |
|
| 201 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(800, len(text)+100))
|
|
@@ -210,15 +206,13 @@ class Paraphraser:
|
|
| 210 |
# Optimized backtranslation prompt with medical focus
|
| 211 |
if via_lang == "vi":
|
| 212 |
prompt = (
|
| 213 |
-
"
|
| 214 |
-
f"
|
| 215 |
-
"English medical translation:"
|
| 216 |
)
|
| 217 |
else:
|
| 218 |
prompt = (
|
| 219 |
-
f"
|
| 220 |
-
f"{
|
| 221 |
-
"English medical translation:"
|
| 222 |
)
|
| 223 |
|
| 224 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(900, len(text)+150))
|
|
@@ -230,15 +224,9 @@ class Paraphraser:
|
|
| 230 |
def consistency_check(self, user: str, output: str) -> bool:
|
| 231 |
"""Return True if 'output' appears supported by 'user' (context/question). Optimized medical validation."""
|
| 232 |
prompt = (
|
| 233 |
-
"
|
| 234 |
-
"1. Medical accuracy and clinical appropriateness\n"
|
| 235 |
-
"2. Consistency with the question asked\n"
|
| 236 |
-
"3. Safety and professional medical standards\n"
|
| 237 |
-
"4. Completeness of the medical information\n\n"
|
| 238 |
-
"Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
|
| 239 |
f"Question/Context: {user}\n\n"
|
| 240 |
-
f"Medical Answer: {output}
|
| 241 |
-
"Evaluation:"
|
| 242 |
)
|
| 243 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
|
| 244 |
if not out:
|
|
@@ -251,15 +239,9 @@ class Paraphraser:
|
|
| 251 |
return False
|
| 252 |
|
| 253 |
prompt = (
|
| 254 |
-
"
|
| 255 |
-
"1. Medical facts and clinical knowledge\n"
|
| 256 |
-
"2. Appropriate medical terminology\n"
|
| 257 |
-
"3. Clinical reasoning and logic\n"
|
| 258 |
-
"4. Safety considerations\n\n"
|
| 259 |
-
"Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
|
| 260 |
f"Medical Question: {question}\n\n"
|
| 261 |
-
f"Medical Answer: {answer}
|
| 262 |
-
"Medical Accuracy Assessment:"
|
| 263 |
)
|
| 264 |
|
| 265 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
|
|
@@ -273,9 +255,8 @@ class Paraphraser:
|
|
| 273 |
return text
|
| 274 |
|
| 275 |
prompt = (
|
| 276 |
-
"
|
| 277 |
-
f"
|
| 278 |
-
"Enhanced medical text:"
|
| 279 |
)
|
| 280 |
|
| 281 |
out = self.nv.generate(prompt, temperature=0.1, max_tokens=min(800, len(text)+100))
|
|
@@ -290,26 +271,26 @@ class Paraphraser:
|
|
| 290 |
# Different clinical context prompts
|
| 291 |
context_prompts = [
|
| 292 |
(
|
| 293 |
-
"Rewrite this medical question as if asked by a patient in an emergency room setting
|
| 294 |
"emergency_room"
|
| 295 |
),
|
| 296 |
(
|
| 297 |
-
"Rewrite this medical question as if asked by a patient during a routine checkup
|
| 298 |
"routine_checkup"
|
| 299 |
),
|
| 300 |
(
|
| 301 |
-
"Rewrite this medical question as if asked by a patient with chronic conditions
|
| 302 |
"chronic_care"
|
| 303 |
),
|
| 304 |
(
|
| 305 |
-
"Rewrite this medical question as if asked by a patient's family member
|
| 306 |
"family_inquiry"
|
| 307 |
)
|
| 308 |
]
|
| 309 |
|
| 310 |
for prompt_template, scenario_type in context_prompts:
|
| 311 |
try:
|
| 312 |
-
prompt =
|
| 313 |
scenario_question = self.paraphrase(question, difficulty="hard", custom_prompt=prompt)
|
| 314 |
|
| 315 |
if scenario_question and not self._is_invalid_response(scenario_question):
|
|
|
|
| 153 |
# Optimized medical paraphrasing prompts based on difficulty
|
| 154 |
if difficulty == "easy":
|
| 155 |
prompt = (
|
| 156 |
+
"Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy. Return only the rewritten text without any introduction or commentary.\n\n"
|
| 157 |
+
f"{text}"
|
|
|
|
| 158 |
)
|
| 159 |
else: # hard difficulty
|
| 160 |
prompt = (
|
| 161 |
+
"Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone. Return only the rewritten text without any introduction or commentary.\n\n"
|
| 162 |
+
f"{text}"
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
# Optimize temperature and token limits based on difficulty
|
|
|
|
| 185 |
# Optimized medical translation prompts
|
| 186 |
if target_lang == "vi":
|
| 187 |
prompt = (
|
| 188 |
+
"Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms. Return only the translation without any introduction or commentary.\n\n"
|
| 189 |
+
f"{text}"
|
|
|
|
| 190 |
)
|
| 191 |
else:
|
| 192 |
prompt = (
|
| 193 |
+
f"Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
|
| 194 |
+
f"{text}"
|
|
|
|
| 195 |
)
|
| 196 |
|
| 197 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(800, len(text)+100))
|
|
|
|
| 206 |
# Optimized backtranslation prompt with medical focus
|
| 207 |
if via_lang == "vi":
|
| 208 |
prompt = (
|
| 209 |
+
"Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate. Return only the translation without any introduction or commentary.\n\n"
|
| 210 |
+
f"{mid}"
|
|
|
|
| 211 |
)
|
| 212 |
else:
|
| 213 |
prompt = (
|
| 214 |
+
f"Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
|
| 215 |
+
f"{mid}"
|
|
|
|
| 216 |
)
|
| 217 |
|
| 218 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=min(900, len(text)+150))
|
|
|
|
| 224 |
def consistency_check(self, user: str, output: str) -> bool:
|
| 225 |
"""Return True if 'output' appears supported by 'user' (context/question). Optimized medical validation."""
|
| 226 |
prompt = (
|
| 227 |
+
"Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider medical accuracy, clinical appropriateness, consistency with the question, safety standards, and completeness of medical information. Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
f"Question/Context: {user}\n\n"
|
| 229 |
+
f"Medical Answer: {output}"
|
|
|
|
| 230 |
)
|
| 231 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
|
| 232 |
if not out:
|
|
|
|
| 239 |
return False
|
| 240 |
|
| 241 |
prompt = (
|
| 242 |
+
"Evaluate if the medical answer is accurate and appropriate for the question. Consider medical facts, clinical knowledge, appropriate medical terminology, clinical reasoning, logic, and safety considerations. Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
f"Medical Question: {question}\n\n"
|
| 244 |
+
f"Medical Answer: {answer}"
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
out = self.nv.generate(prompt, temperature=0.0, max_tokens=5)
|
|
|
|
| 255 |
return text
|
| 256 |
|
| 257 |
prompt = (
|
| 258 |
+
"Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate. Return only the improved text without any introduction or commentary.\n\n"
|
| 259 |
+
f"{text}"
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
out = self.nv.generate(prompt, temperature=0.1, max_tokens=min(800, len(text)+100))
|
|
|
|
| 271 |
# Different clinical context prompts
|
| 272 |
context_prompts = [
|
| 273 |
(
|
| 274 |
+
"Rewrite this medical question as if asked by a patient in an emergency room setting. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 275 |
"emergency_room"
|
| 276 |
),
|
| 277 |
(
|
| 278 |
+
"Rewrite this medical question as if asked by a patient during a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 279 |
"routine_checkup"
|
| 280 |
),
|
| 281 |
(
|
| 282 |
+
"Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 283 |
"chronic_care"
|
| 284 |
),
|
| 285 |
(
|
| 286 |
+
"Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 287 |
"family_inquiry"
|
| 288 |
)
|
| 289 |
]
|
| 290 |
|
| 291 |
for prompt_template, scenario_type in context_prompts:
|
| 292 |
try:
|
| 293 |
+
prompt = prompt_template.format(question=question)
|
| 294 |
scenario_question = self.paraphrase(question, difficulty="hard", custom_prompt=prompt)
|
| 295 |
|
| 296 |
if scenario_question and not self._is_invalid_response(scenario_question):
|
utils/local_llm.py
CHANGED
|
@@ -268,15 +268,13 @@ class LocalParaphraser:
|
|
| 268 |
# Medical-specific paraphrasing prompts based on difficulty
|
| 269 |
if difficulty == "easy":
|
| 270 |
prompt = (
|
| 271 |
-
"
|
| 272 |
-
f"
|
| 273 |
-
"Rewritten medical text:"
|
| 274 |
)
|
| 275 |
else: # hard difficulty
|
| 276 |
prompt = (
|
| 277 |
-
"
|
| 278 |
-
f"
|
| 279 |
-
"Enhanced medical text:"
|
| 280 |
)
|
| 281 |
|
| 282 |
# Adjust temperature based on difficulty
|
|
@@ -292,15 +290,13 @@ class LocalParaphraser:
|
|
| 292 |
# Medical-specific translation prompt
|
| 293 |
if target_lang == "vi":
|
| 294 |
prompt = (
|
| 295 |
-
"
|
| 296 |
-
f"
|
| 297 |
-
"Vietnamese medical translation:"
|
| 298 |
)
|
| 299 |
else:
|
| 300 |
prompt = (
|
| 301 |
-
f"
|
| 302 |
-
f"
|
| 303 |
-
f"{target_lang} medical translation:"
|
| 304 |
)
|
| 305 |
|
| 306 |
result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.0)
|
|
@@ -319,15 +315,13 @@ class LocalParaphraser:
|
|
| 319 |
# Then translate back to English with medical focus
|
| 320 |
if via_lang == "vi":
|
| 321 |
prompt = (
|
| 322 |
-
"
|
| 323 |
-
f"
|
| 324 |
-
"English medical translation:"
|
| 325 |
)
|
| 326 |
else:
|
| 327 |
prompt = (
|
| 328 |
-
f"
|
| 329 |
-
f"{
|
| 330 |
-
"English medical translation:"
|
| 331 |
)
|
| 332 |
|
| 333 |
result = self.client.generate(prompt, max_tokens=min(900, len(text)+150), temperature=0.0)
|
|
@@ -336,15 +330,9 @@ class LocalParaphraser:
|
|
| 336 |
def consistency_check(self, user: str, output: str) -> bool:
|
| 337 |
"""Check consistency using MedAlpaca with medical validation focus"""
|
| 338 |
prompt = (
|
| 339 |
-
"
|
| 340 |
-
"1. Medical accuracy and clinical appropriateness\n"
|
| 341 |
-
"2. Consistency with the question asked\n"
|
| 342 |
-
"3. Safety and professional medical standards\n"
|
| 343 |
-
"4. Completeness of the medical information\n\n"
|
| 344 |
-
"Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
|
| 345 |
f"Question/Context: {user}\n\n"
|
| 346 |
-
f"Medical Answer: {output}
|
| 347 |
-
"Evaluation:"
|
| 348 |
)
|
| 349 |
|
| 350 |
result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
|
|
@@ -356,15 +344,9 @@ class LocalParaphraser:
|
|
| 356 |
return False
|
| 357 |
|
| 358 |
prompt = (
|
| 359 |
-
"
|
| 360 |
-
"1. Medical facts and clinical knowledge\n"
|
| 361 |
-
"2. Appropriate medical terminology\n"
|
| 362 |
-
"3. Clinical reasoning and logic\n"
|
| 363 |
-
"4. Safety considerations\n\n"
|
| 364 |
-
"Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
|
| 365 |
f"Medical Question: {question}\n\n"
|
| 366 |
-
f"Medical Answer: {answer}
|
| 367 |
-
"Medical Accuracy Assessment:"
|
| 368 |
)
|
| 369 |
|
| 370 |
result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
|
|
@@ -376,9 +358,8 @@ class LocalParaphraser:
|
|
| 376 |
return text
|
| 377 |
|
| 378 |
prompt = (
|
| 379 |
-
"
|
| 380 |
-
f"
|
| 381 |
-
"Enhanced medical text:"
|
| 382 |
)
|
| 383 |
|
| 384 |
result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.1)
|
|
@@ -391,19 +372,19 @@ class LocalParaphraser:
|
|
| 391 |
# Different clinical context prompts
|
| 392 |
context_prompts = [
|
| 393 |
(
|
| 394 |
-
"
|
| 395 |
"emergency_room"
|
| 396 |
),
|
| 397 |
(
|
| 398 |
-
"
|
| 399 |
"routine_checkup"
|
| 400 |
),
|
| 401 |
(
|
| 402 |
-
"
|
| 403 |
"chronic_care"
|
| 404 |
),
|
| 405 |
(
|
| 406 |
-
"
|
| 407 |
"family_inquiry"
|
| 408 |
)
|
| 409 |
]
|
|
|
|
| 268 |
# Medical-specific paraphrasing prompts based on difficulty
|
| 269 |
if difficulty == "easy":
|
| 270 |
prompt = (
|
| 271 |
+
"Rewrite the following medical text using different words while preserving all medical facts, clinical terms, and meaning. Keep the same level of detail and accuracy. Return only the rewritten text without any introduction or commentary.\n\n"
|
| 272 |
+
f"{text}"
|
|
|
|
| 273 |
)
|
| 274 |
else: # hard difficulty
|
| 275 |
prompt = (
|
| 276 |
+
"Rewrite the following medical text using more sophisticated medical language and different sentence structures while preserving all clinical facts, medical terminology, and diagnostic information. Maintain professional medical tone. Return only the rewritten text without any introduction or commentary.\n\n"
|
| 277 |
+
f"{text}"
|
|
|
|
| 278 |
)
|
| 279 |
|
| 280 |
# Adjust temperature based on difficulty
|
|
|
|
| 290 |
# Medical-specific translation prompt
|
| 291 |
if target_lang == "vi":
|
| 292 |
prompt = (
|
| 293 |
+
"Translate the following English medical text to Vietnamese while preserving all medical terminology, clinical facts, and professional medical language. Use appropriate Vietnamese medical terms. Return only the translation without any introduction or commentary.\n\n"
|
| 294 |
+
f"{text}"
|
|
|
|
| 295 |
)
|
| 296 |
else:
|
| 297 |
prompt = (
|
| 298 |
+
f"Translate the following medical text to {target_lang} while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
|
| 299 |
+
f"{text}"
|
|
|
|
| 300 |
)
|
| 301 |
|
| 302 |
result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.0)
|
|
|
|
| 315 |
# Then translate back to English with medical focus
|
| 316 |
if via_lang == "vi":
|
| 317 |
prompt = (
|
| 318 |
+
"Translate the following Vietnamese medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Ensure the translation is medically accurate. Return only the translation without any introduction or commentary.\n\n"
|
| 319 |
+
f"{translated}"
|
|
|
|
| 320 |
)
|
| 321 |
else:
|
| 322 |
prompt = (
|
| 323 |
+
f"Translate the following {via_lang} medical text back to English while preserving all medical terminology, clinical facts, and professional medical language. Return only the translation without any introduction or commentary.\n\n"
|
| 324 |
+
f"{translated}"
|
|
|
|
| 325 |
)
|
| 326 |
|
| 327 |
result = self.client.generate(prompt, max_tokens=min(900, len(text)+150), temperature=0.0)
|
|
|
|
| 330 |
def consistency_check(self, user: str, output: str) -> bool:
|
| 331 |
"""Check consistency using MedAlpaca with medical validation focus"""
|
| 332 |
prompt = (
|
| 333 |
+
"Evaluate if the medical answer is consistent with the question/context and medically accurate. Consider medical accuracy, clinical appropriateness, consistency with the question, safety standards, and completeness of medical information. Reply with exactly 'PASS' if the answer is medically sound and consistent, otherwise 'FAIL'.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
f"Question/Context: {user}\n\n"
|
| 335 |
+
f"Medical Answer: {output}"
|
|
|
|
| 336 |
)
|
| 337 |
|
| 338 |
result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
|
|
|
|
| 344 |
return False
|
| 345 |
|
| 346 |
prompt = (
|
| 347 |
+
"Evaluate if the medical answer is accurate and appropriate for the question. Consider medical facts, clinical knowledge, appropriate medical terminology, clinical reasoning, logic, and safety considerations. Reply with exactly 'ACCURATE' if the answer is medically correct, otherwise 'INACCURATE'.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
f"Medical Question: {question}\n\n"
|
| 349 |
+
f"Medical Answer: {answer}"
|
|
|
|
| 350 |
)
|
| 351 |
|
| 352 |
result = self.client.generate(prompt, max_tokens=5, temperature=0.0)
|
|
|
|
| 358 |
return text
|
| 359 |
|
| 360 |
prompt = (
|
| 361 |
+
"Improve the medical terminology in the following text while preserving all factual information and clinical accuracy. Use more precise medical terms where appropriate. Return only the improved text without any introduction or commentary.\n\n"
|
| 362 |
+
f"{text}"
|
|
|
|
| 363 |
)
|
| 364 |
|
| 365 |
result = self.client.generate(prompt, max_tokens=min(800, len(text)+100), temperature=0.1)
|
|
|
|
| 372 |
# Different clinical context prompts
|
| 373 |
context_prompts = [
|
| 374 |
(
|
| 375 |
+
"Rewrite this medical question as if asked by a patient in an emergency room setting. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 376 |
"emergency_room"
|
| 377 |
),
|
| 378 |
(
|
| 379 |
+
"Rewrite this medical question as if asked by a patient during a routine checkup. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 380 |
"routine_checkup"
|
| 381 |
),
|
| 382 |
(
|
| 383 |
+
"Rewrite this medical question as if asked by a patient with chronic conditions. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 384 |
"chronic_care"
|
| 385 |
),
|
| 386 |
(
|
| 387 |
+
"Rewrite this medical question as if asked by a patient's family member. Return only the rewritten question without any introduction or commentary:\n\n{question}",
|
| 388 |
"family_inquiry"
|
| 389 |
)
|
| 390 |
]
|
utils/processor.py
CHANGED
|
@@ -212,24 +212,20 @@ def _get_answer_style_prompt(strategy: str, question: str, original_answer: str)
|
|
| 212 |
"""Generate style-specific prompts for answer enhancement with medical focus"""
|
| 213 |
prompts = {
|
| 214 |
"concise": (
|
| 215 |
-
"
|
| 216 |
-
f"
|
| 217 |
-
"Concise medical answer:"
|
| 218 |
),
|
| 219 |
"detailed": (
|
| 220 |
-
"
|
| 221 |
-
f"
|
| 222 |
-
"Detailed medical answer:"
|
| 223 |
),
|
| 224 |
"clinical": (
|
| 225 |
-
"
|
| 226 |
-
f"
|
| 227 |
-
"Clinical medical answer:"
|
| 228 |
),
|
| 229 |
"patient_friendly": (
|
| 230 |
-
"
|
| 231 |
-
f"
|
| 232 |
-
"Patient-friendly medical answer:"
|
| 233 |
)
|
| 234 |
}
|
| 235 |
return prompts.get(strategy, f"Paraphrase this medical answer: {original_answer}")
|
|
@@ -238,24 +234,20 @@ def _get_question_style_prompt(strategy: str, original_question: str, answer: st
|
|
| 238 |
"""Generate style-specific prompts for question enhancement with medical focus"""
|
| 239 |
prompts = {
|
| 240 |
"clarifying": (
|
| 241 |
-
"
|
| 242 |
-
f"
|
| 243 |
-
"Clarifying medical question:"
|
| 244 |
),
|
| 245 |
"follow_up": (
|
| 246 |
-
"
|
| 247 |
-
f"
|
| 248 |
-
"Follow-up medical question:"
|
| 249 |
),
|
| 250 |
"symptom_focused": (
|
| 251 |
-
"
|
| 252 |
-
f"
|
| 253 |
-
"Symptom-focused medical question:"
|
| 254 |
),
|
| 255 |
"treatment_focused": (
|
| 256 |
-
"
|
| 257 |
-
f"
|
| 258 |
-
"Treatment-focused medical question:"
|
| 259 |
)
|
| 260 |
}
|
| 261 |
return prompts.get(strategy, f"Paraphrase this medical question: {original_question}")
|
|
|
|
| 212 |
"""Generate style-specific prompts for answer enhancement with medical focus"""
|
| 213 |
prompts = {
|
| 214 |
"concise": (
|
| 215 |
+
"Rewrite this medical answer to be more concise while preserving all key medical information, clinical facts, and diagnostic details. Return only the rewritten answer without any introduction or commentary:\n\n"
|
| 216 |
+
f"{original_answer}"
|
|
|
|
| 217 |
),
|
| 218 |
"detailed": (
|
| 219 |
+
"Expand this medical answer with more detailed explanations, clinical context, and additional medical information while maintaining accuracy. Return only the expanded answer without any introduction or commentary:\n\n"
|
| 220 |
+
f"{original_answer}"
|
|
|
|
| 221 |
),
|
| 222 |
"clinical": (
|
| 223 |
+
"Rewrite this answer using more formal clinical language, precise medical terminology, and professional medical communication style. Return only the rewritten answer without any introduction or commentary:\n\n"
|
| 224 |
+
f"{original_answer}"
|
|
|
|
| 225 |
),
|
| 226 |
"patient_friendly": (
|
| 227 |
+
"Rewrite this medical answer in simpler, more patient-friendly language while keeping it medically accurate and informative. Return only the rewritten answer without any introduction or commentary:\n\n"
|
| 228 |
+
f"{original_answer}"
|
|
|
|
| 229 |
)
|
| 230 |
}
|
| 231 |
return prompts.get(strategy, f"Paraphrase this medical answer: {original_answer}")
|
|
|
|
| 234 |
"""Generate style-specific prompts for question enhancement with medical focus"""
|
| 235 |
prompts = {
|
| 236 |
"clarifying": (
|
| 237 |
+
"Rewrite this medical question to ask for clarification or more specific medical information. Return only the rewritten question without any introduction or commentary:\n\n"
|
| 238 |
+
f"{original_question}"
|
|
|
|
| 239 |
),
|
| 240 |
"follow_up": (
|
| 241 |
+
"Create a follow-up question that a patient might ask after this medical question, focusing on related medical concerns. Return only the follow-up question without any introduction or commentary:\n\n"
|
| 242 |
+
f"{original_question}"
|
|
|
|
| 243 |
),
|
| 244 |
"symptom_focused": (
|
| 245 |
+
"Rewrite this question to focus more on symptoms, their characteristics, and clinical presentation. Return only the rewritten question without any introduction or commentary:\n\n"
|
| 246 |
+
f"{original_question}"
|
|
|
|
| 247 |
),
|
| 248 |
"treatment_focused": (
|
| 249 |
+
"Rewrite this question to focus more on treatment options, management strategies, and therapeutic approaches. Return only the rewritten question without any introduction or commentary:\n\n"
|
| 250 |
+
f"{original_question}"
|
|
|
|
| 251 |
)
|
| 252 |
}
|
| 253 |
return prompts.get(strategy, f"Paraphrase this medical question: {original_question}")
|
utils/rag.py
CHANGED
|
@@ -56,19 +56,9 @@ class RAGProcessor:
|
|
| 56 |
if not text or len(text.strip()) < 10:
|
| 57 |
return text
|
| 58 |
|
| 59 |
-
prompt = f"""
|
| 60 |
-
You are a medical data cleaning expert. Clean the following text by:
|
| 61 |
-
1. Remove conversational elements (greetings, pleasantries)
|
| 62 |
-
2. Remove non-medical small talk and social interactions
|
| 63 |
-
3. Keep only medically relevant information
|
| 64 |
-
4. Preserve clinical facts, symptoms, diagnoses, treatments, and medical advice
|
| 65 |
-
5. Maintain professional medical language
|
| 66 |
-
6. Return only cleaned medical content in 1-2 concise sentences suitable for dense retrieval embeddings. No lists, no headers.
|
| 67 |
|
| 68 |
-
|
| 69 |
-
{text}
|
| 70 |
-
|
| 71 |
-
Cleaned medical content:"""
|
| 72 |
|
| 73 |
try:
|
| 74 |
if self.is_local and self.medalpaca_client:
|
|
@@ -93,13 +83,11 @@ class RAGProcessor:
|
|
| 93 |
if not question or not answer:
|
| 94 |
return ""
|
| 95 |
|
| 96 |
-
prompt = f"""
|
| 97 |
|
| 98 |
Question: {question}
|
| 99 |
|
| 100 |
-
Answer: {answer}
|
| 101 |
-
|
| 102 |
-
Generate a concise medical context:"""
|
| 103 |
|
| 104 |
try:
|
| 105 |
if self.is_local and self.medalpaca_client:
|
|
|
|
| 56 |
if not text or len(text.strip()) < 10:
|
| 57 |
return text
|
| 58 |
|
| 59 |
+
prompt = f"""Clean the following text by removing conversational elements (greetings, pleasantries), non-medical small talk, and social interactions. Keep only medically relevant information while preserving clinical facts, symptoms, diagnoses, treatments, and medical advice. Maintain professional medical language. Return only cleaned medical content in 1-2 concise sentences suitable for dense retrieval embeddings. No lists, no headers, no introduction or commentary:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
{text}"""
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
try:
|
| 64 |
if self.is_local and self.medalpaca_client:
|
|
|
|
| 83 |
if not question or not answer:
|
| 84 |
return ""
|
| 85 |
|
| 86 |
+
prompt = f"""Given a medical question and its answer, generate a brief relevant medical context that helps retrieval. Limit to 1–2 sentences, concise, avoid boilerplate, no enumerations. Return only the medical context without any introduction or commentary:
|
| 87 |
|
| 88 |
Question: {question}
|
| 89 |
|
| 90 |
+
Answer: {answer}"""
|
|
|
|
|
|
|
| 91 |
|
| 92 |
try:
|
| 93 |
if self.is_local and self.medalpaca_client:
|