formatting fixes
Browse files- src/model.py +69 -19
src/model.py
CHANGED
|
@@ -1192,6 +1192,9 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1192 |
# Function to parse doctor agent responses
|
| 1193 |
def parse_doctor_response(response_text):
|
| 1194 |
"""Parse the doctor agent's response into structured components"""
|
|
|
|
|
|
|
|
|
|
| 1195 |
# Initialize structure
|
| 1196 |
parsed = {
|
| 1197 |
"main_response": response_text,
|
|
@@ -1220,8 +1223,10 @@ def parse_doctor_response(response_text):
|
|
| 1220 |
if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
|
| 1221 |
# Split on any bullet point marker
|
| 1222 |
bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
|
| 1223 |
-
# Remove any empty items
|
| 1224 |
-
|
|
|
|
|
|
|
| 1225 |
elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
|
| 1226 |
# Split on numbered items
|
| 1227 |
numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
|
|
@@ -1237,10 +1242,15 @@ def parse_doctor_response(response_text):
|
|
| 1237 |
reasoning_text = reasoning_match.group(1).strip()
|
| 1238 |
# Split into bullet points if present
|
| 1239 |
if '\n-' in reasoning_text:
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1244 |
else:
|
| 1245 |
parsed["reasoning"] = [reasoning_text]
|
| 1246 |
|
|
@@ -1254,6 +1264,30 @@ def parse_doctor_response(response_text):
|
|
| 1254 |
else:
|
| 1255 |
parsed["sources"] = [sources_text]
|
| 1256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1257 |
# Extract citations in the text (format: [source_id])
|
| 1258 |
citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
|
| 1259 |
for citation in citation_matches:
|
|
@@ -1352,13 +1386,13 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1352 |
output_instructions = """
|
| 1353 |
Please structure your response clearly.
|
| 1354 |
**Priority 1: Direct Answer First**
|
| 1355 |
-
Begin by providing your best assessment based on the available information.
|
| 1356 |
|
| 1357 |
**Priority 2: Follow-up Questions**
|
| 1358 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1359 |
|
| 1360 |
**Main Response Structure:**
|
| 1361 |
-
1. A direct answer to the patient's concerns.
|
| 1362 |
2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
|
| 1363 |
3. Recommendations for a treatment plan or next steps.
|
| 1364 |
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
|
|
@@ -1366,15 +1400,22 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1366 |
• [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
|
| 1367 |
|
| 1368 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1369 |
-
|
| 1370 |
**After your main response, ALWAYS include these sections:**
|
| 1371 |
-
- **Follow-up Questions**: Specific questions
|
| 1372 |
-
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1373 |
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1374 |
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1375 |
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
| 1376 |
- DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1377 |
URL: https://doi.org/10.xxxx/yyyy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1378 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1379 |
"""
|
| 1380 |
else:
|
|
@@ -1382,19 +1423,25 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1382 |
output_instructions = """
|
| 1383 |
Please structure your response clearly.
|
| 1384 |
**Priority 1: Direct Answer First**
|
| 1385 |
-
Begin by providing your best assessment based on the available information.
|
| 1386 |
|
| 1387 |
**Priority 2: Follow-up Questions**
|
| 1388 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1389 |
|
| 1390 |
**Main Response Structure:**
|
| 1391 |
-
1. A direct answer to the patient's concerns.
|
| 1392 |
2. If appropriate, a clear diagnosis or differential diagnosis.
|
| 1393 |
3. Recommendations for a treatment plan or next steps.
|
| 1394 |
|
| 1395 |
**After your main response, ALWAYS include these sections:**
|
| 1396 |
-
- **Follow-up Questions**: Specific questions to gather additional information.
|
| 1397 |
-
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1398 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1399 |
"""
|
| 1400 |
|
|
@@ -1404,6 +1451,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1404 |
# Get response from doctor agent
|
| 1405 |
response = doctor_agent(msgs)
|
| 1406 |
|
|
|
|
|
|
|
|
|
|
| 1407 |
# Extract and process sources
|
| 1408 |
explanation = None
|
| 1409 |
evidence = None
|
|
@@ -1417,7 +1467,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1417 |
parsed_response = parse_doctor_response(linked_response)
|
| 1418 |
|
| 1419 |
# Get the main response
|
| 1420 |
-
main_response =
|
| 1421 |
|
| 1422 |
# Extract reasoning for display
|
| 1423 |
reasoning = parsed_response.get("reasoning", [])
|
|
@@ -1431,7 +1481,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1431 |
questions = parsed_response.get("follow_up_questions", [])
|
| 1432 |
if questions:
|
| 1433 |
if isinstance(questions, list):
|
| 1434 |
-
# Format as a numbered list
|
| 1435 |
follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
|
| 1436 |
else:
|
| 1437 |
follow_up_questions = questions
|
|
@@ -1441,7 +1491,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1441 |
else:
|
| 1442 |
# If RAG is disabled, just parse the response without source processing
|
| 1443 |
parsed_response = parse_doctor_response(response)
|
| 1444 |
-
main_response =
|
| 1445 |
|
| 1446 |
# Extract reasoning
|
| 1447 |
reasoning = parsed_response.get("reasoning", [])
|
|
@@ -1455,7 +1505,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1455 |
questions = parsed_response.get("follow_up_questions", [])
|
| 1456 |
if questions:
|
| 1457 |
if isinstance(questions, list):
|
| 1458 |
-
# Format as a numbered list
|
| 1459 |
follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
|
| 1460 |
else:
|
| 1461 |
follow_up_questions = questions
|
|
|
|
| 1192 |
# Function to parse doctor agent responses
|
| 1193 |
def parse_doctor_response(response_text):
|
| 1194 |
"""Parse the doctor agent's response into structured components"""
|
| 1195 |
+
# First, remove "Direct Answer:" prefix that might appear at the beginning of the response
|
| 1196 |
+
response_text = re.sub(r'^Direct Answer:\s*', '', response_text)
|
| 1197 |
+
|
| 1198 |
# Initialize structure
|
| 1199 |
parsed = {
|
| 1200 |
"main_response": response_text,
|
|
|
|
| 1223 |
if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
|
| 1224 |
# Split on any bullet point marker
|
| 1225 |
bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
|
| 1226 |
+
# Remove any empty items and ensure first item is properly formatted
|
| 1227 |
+
questions = [item.strip() for item in bullet_items if item.strip()]
|
| 1228 |
+
# The first item might not start with a bullet point
|
| 1229 |
+
parsed["follow_up_questions"] = questions
|
| 1230 |
elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
|
| 1231 |
# Split on numbered items
|
| 1232 |
numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
|
|
|
|
| 1242 |
reasoning_text = reasoning_match.group(1).strip()
|
| 1243 |
# Split into bullet points if present
|
| 1244 |
if '\n-' in reasoning_text:
|
| 1245 |
+
# Split by newline + dash, but ensure we don't lose any content
|
| 1246 |
+
reasoning_points = []
|
| 1247 |
+
for item in reasoning_text.split('\n-'):
|
| 1248 |
+
if item.strip():
|
| 1249 |
+
# Clean up any dash at the beginning
|
| 1250 |
+
cleaned_item = item.lstrip('- ').strip()
|
| 1251 |
+
if cleaned_item:
|
| 1252 |
+
reasoning_points.append(cleaned_item)
|
| 1253 |
+
parsed["reasoning"] = reasoning_points
|
| 1254 |
else:
|
| 1255 |
parsed["reasoning"] = [reasoning_text]
|
| 1256 |
|
|
|
|
| 1264 |
else:
|
| 1265 |
parsed["sources"] = [sources_text]
|
| 1266 |
|
| 1267 |
+
# Clean up the main response - remove URLs, PMIDs and DOIs from the text if they're already in the sources section
|
| 1268 |
+
if parsed["sources"]:
|
| 1269 |
+
# Remove URL lines
|
| 1270 |
+
main_response_lines = []
|
| 1271 |
+
skip_lines = 0
|
| 1272 |
+
for line in parsed["main_response"].split('\n'):
|
| 1273 |
+
if skip_lines > 0:
|
| 1274 |
+
skip_lines -= 1
|
| 1275 |
+
continue
|
| 1276 |
+
|
| 1277 |
+
# Skip lines with just URLs
|
| 1278 |
+
if re.match(r'^URL:\s*https?://', line.strip()):
|
| 1279 |
+
skip_lines = 0
|
| 1280 |
+
continue
|
| 1281 |
+
|
| 1282 |
+
# Skip lines with PMIDs or DOIs being displayed alone
|
| 1283 |
+
if re.match(r'^(PMID|DOI):', line.strip()):
|
| 1284 |
+
skip_lines = 0
|
| 1285 |
+
continue
|
| 1286 |
+
|
| 1287 |
+
main_response_lines.append(line)
|
| 1288 |
+
|
| 1289 |
+
parsed["main_response"] = '\n'.join(main_response_lines)
|
| 1290 |
+
|
| 1291 |
# Extract citations in the text (format: [source_id])
|
| 1292 |
citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
|
| 1293 |
for citation in citation_matches:
|
|
|
|
| 1386 |
output_instructions = """
|
| 1387 |
Please structure your response clearly.
|
| 1388 |
**Priority 1: Direct Answer First**
|
| 1389 |
+
Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
|
| 1390 |
|
| 1391 |
**Priority 2: Follow-up Questions**
|
| 1392 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1393 |
|
| 1394 |
**Main Response Structure:**
|
| 1395 |
+
1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
|
| 1396 |
2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
|
| 1397 |
3. Recommendations for a treatment plan or next steps.
|
| 1398 |
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
|
|
|
|
| 1400 |
• [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
|
| 1401 |
|
| 1402 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1403 |
+
|
| 1404 |
**After your main response, ALWAYS include these sections:**
|
| 1405 |
+
- **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
|
| 1406 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first point is properly formatted.
|
| 1407 |
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1408 |
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1409 |
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
| 1410 |
- DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1411 |
URL: https://doi.org/10.xxxx/yyyy
|
| 1412 |
+
|
| 1413 |
+
**IMPORTANT FORMATTING NOTES:**
|
| 1414 |
+
1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
|
| 1415 |
+
2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1416 |
+
3. Number the follow-up questions starting from 1, not from any other number.
|
| 1417 |
+
4. For the reasoning section, make sure the first bullet point is properly formatted.
|
| 1418 |
+
|
| 1419 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1420 |
"""
|
| 1421 |
else:
|
|
|
|
| 1423 |
output_instructions = """
|
| 1424 |
Please structure your response clearly.
|
| 1425 |
**Priority 1: Direct Answer First**
|
| 1426 |
+
Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
|
| 1427 |
|
| 1428 |
**Priority 2: Follow-up Questions**
|
| 1429 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1430 |
|
| 1431 |
**Main Response Structure:**
|
| 1432 |
+
1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
|
| 1433 |
2. If appropriate, a clear diagnosis or differential diagnosis.
|
| 1434 |
3. Recommendations for a treatment plan or next steps.
|
| 1435 |
|
| 1436 |
**After your main response, ALWAYS include these sections:**
|
| 1437 |
+
- **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
|
| 1438 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first bullet point is properly formatted.
|
| 1439 |
+
|
| 1440 |
+
**IMPORTANT FORMATTING NOTES:**
|
| 1441 |
+
1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1442 |
+
2. Number the follow-up questions starting from 1, not from any other number.
|
| 1443 |
+
3. For the reasoning section, make sure the first bullet point is properly formatted.
|
| 1444 |
+
|
| 1445 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1446 |
"""
|
| 1447 |
|
|
|
|
| 1451 |
# Get response from doctor agent
|
| 1452 |
response = doctor_agent(msgs)
|
| 1453 |
|
| 1454 |
+
# Remove "Direct Answer:" prefix if it appears
|
| 1455 |
+
response = re.sub(r'^Direct Answer:\s*', '', response)
|
| 1456 |
+
|
| 1457 |
# Extract and process sources
|
| 1458 |
explanation = None
|
| 1459 |
evidence = None
|
|
|
|
| 1467 |
parsed_response = parse_doctor_response(linked_response)
|
| 1468 |
|
| 1469 |
# Get the main response
|
| 1470 |
+
main_response = parsed_response["main_response"]
|
| 1471 |
|
| 1472 |
# Extract reasoning for display
|
| 1473 |
reasoning = parsed_response.get("reasoning", [])
|
|
|
|
| 1481 |
questions = parsed_response.get("follow_up_questions", [])
|
| 1482 |
if questions:
|
| 1483 |
if isinstance(questions, list):
|
| 1484 |
+
# Format as a numbered list starting with 1
|
| 1485 |
follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
|
| 1486 |
else:
|
| 1487 |
follow_up_questions = questions
|
|
|
|
| 1491 |
else:
|
| 1492 |
# If RAG is disabled, just parse the response without source processing
|
| 1493 |
parsed_response = parse_doctor_response(response)
|
| 1494 |
+
main_response = parsed_response["main_response"]
|
| 1495 |
|
| 1496 |
# Extract reasoning
|
| 1497 |
reasoning = parsed_response.get("reasoning", [])
|
|
|
|
| 1505 |
questions = parsed_response.get("follow_up_questions", [])
|
| 1506 |
if questions:
|
| 1507 |
if isinstance(questions, list):
|
| 1508 |
+
# Format as a numbered list starting with 1
|
| 1509 |
follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
|
| 1510 |
else:
|
| 1511 |
follow_up_questions = questions
|