Dhruv-Ty commited on
Commit
7d8679d
·
verified ·
1 Parent(s): 1cf15d0

formatting fixes

Browse files
Files changed (1) hide show
  1. src/model.py +69 -19
src/model.py CHANGED
@@ -1192,6 +1192,9 @@ def fetch_medical_evidence(query, max_results=3):
1192
  # Function to parse doctor agent responses
1193
  def parse_doctor_response(response_text):
1194
  """Parse the doctor agent's response into structured components"""
 
 
 
1195
  # Initialize structure
1196
  parsed = {
1197
  "main_response": response_text,
@@ -1220,8 +1223,10 @@ def parse_doctor_response(response_text):
1220
  if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1221
  # Split on any bullet point marker
1222
  bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1223
- # Remove any empty items
1224
- parsed["follow_up_questions"] = [item.strip() for item in bullet_items if item.strip()]
 
 
1225
  elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1226
  # Split on numbered items
1227
  numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
@@ -1237,10 +1242,15 @@ def parse_doctor_response(response_text):
1237
  reasoning_text = reasoning_match.group(1).strip()
1238
  # Split into bullet points if present
1239
  if '\n-' in reasoning_text:
1240
- parsed["reasoning"] = [item.strip() for item in reasoning_text.split('\n-') if item.strip()]
1241
- # Clean up first item which might not have a dash
1242
- if parsed["reasoning"]:
1243
- parsed["reasoning"][0] = parsed["reasoning"][0].lstrip('- ')
 
 
 
 
 
1244
  else:
1245
  parsed["reasoning"] = [reasoning_text]
1246
 
@@ -1254,6 +1264,30 @@ def parse_doctor_response(response_text):
1254
  else:
1255
  parsed["sources"] = [sources_text]
1256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1257
  # Extract citations in the text (format: [source_id])
1258
  citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
1259
  for citation in citation_matches:
@@ -1352,13 +1386,13 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1352
  output_instructions = """
1353
  Please structure your response clearly.
1354
  **Priority 1: Direct Answer First**
1355
- Begin by providing your best assessment based on the available information. Even if the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
1356
 
1357
  **Priority 2: Follow-up Questions**
1358
  After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
1359
 
1360
  **Main Response Structure:**
1361
- 1. A direct answer to the patient's concerns.
1362
  2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
1363
  3. Recommendations for a treatment plan or next steps.
1364
  4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
@@ -1366,15 +1400,22 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1366
  • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
1367
 
1368
  Use no more than 3 sources and no fewer than 2 sources.
1369
-
1370
  **After your main response, ALWAYS include these sections:**
1371
- - **Follow-up Questions**: Specific questions to gather additional information.
1372
- - **Reasoning**: Bullet points detailing your clinical reasoning.
1373
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1374
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1375
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
1376
  - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
1377
  URL: https://doi.org/10.xxxx/yyyy
 
 
 
 
 
 
 
1378
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1379
  """
1380
  else:
@@ -1382,19 +1423,25 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1382
  output_instructions = """
1383
  Please structure your response clearly.
1384
  **Priority 1: Direct Answer First**
1385
- Begin by providing your best assessment based on the available information. Even if the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
1386
 
1387
  **Priority 2: Follow-up Questions**
1388
  After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
1389
 
1390
  **Main Response Structure:**
1391
- 1. A direct answer to the patient's concerns.
1392
  2. If appropriate, a clear diagnosis or differential diagnosis.
1393
  3. Recommendations for a treatment plan or next steps.
1394
 
1395
  **After your main response, ALWAYS include these sections:**
1396
- - **Follow-up Questions**: Specific questions to gather additional information.
1397
- - **Reasoning**: Bullet points detailing your clinical reasoning.
 
 
 
 
 
 
1398
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1399
  """
1400
 
@@ -1404,6 +1451,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1404
  # Get response from doctor agent
1405
  response = doctor_agent(msgs)
1406
 
 
 
 
1407
  # Extract and process sources
1408
  explanation = None
1409
  evidence = None
@@ -1417,7 +1467,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1417
  parsed_response = parse_doctor_response(linked_response)
1418
 
1419
  # Get the main response
1420
- main_response = linked_response
1421
 
1422
  # Extract reasoning for display
1423
  reasoning = parsed_response.get("reasoning", [])
@@ -1431,7 +1481,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1431
  questions = parsed_response.get("follow_up_questions", [])
1432
  if questions:
1433
  if isinstance(questions, list):
1434
- # Format as a numbered list
1435
  follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1436
  else:
1437
  follow_up_questions = questions
@@ -1441,7 +1491,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1441
  else:
1442
  # If RAG is disabled, just parse the response without source processing
1443
  parsed_response = parse_doctor_response(response)
1444
- main_response = response
1445
 
1446
  # Extract reasoning
1447
  reasoning = parsed_response.get("reasoning", [])
@@ -1455,7 +1505,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1455
  questions = parsed_response.get("follow_up_questions", [])
1456
  if questions:
1457
  if isinstance(questions, list):
1458
- # Format as a numbered list
1459
  follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1460
  else:
1461
  follow_up_questions = questions
 
1192
  # Function to parse doctor agent responses
1193
  def parse_doctor_response(response_text):
1194
  """Parse the doctor agent's response into structured components"""
1195
+ # First, remove "Direct Answer:" prefix that might appear at the beginning of the response
1196
+ response_text = re.sub(r'^Direct Answer:\s*', '', response_text)
1197
+
1198
  # Initialize structure
1199
  parsed = {
1200
  "main_response": response_text,
 
1223
  if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1224
  # Split on any bullet point marker
1225
  bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1226
+ # Remove any empty items and ensure first item is properly formatted
1227
+ questions = [item.strip() for item in bullet_items if item.strip()]
1228
+ # The first item might not start with a bullet point
1229
+ parsed["follow_up_questions"] = questions
1230
  elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1231
  # Split on numbered items
1232
  numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
 
1242
  reasoning_text = reasoning_match.group(1).strip()
1243
  # Split into bullet points if present
1244
  if '\n-' in reasoning_text:
1245
+ # Split by newline + dash, but ensure we don't lose any content
1246
+ reasoning_points = []
1247
+ for item in reasoning_text.split('\n-'):
1248
+ if item.strip():
1249
+ # Clean up any dash at the beginning
1250
+ cleaned_item = item.lstrip('- ').strip()
1251
+ if cleaned_item:
1252
+ reasoning_points.append(cleaned_item)
1253
+ parsed["reasoning"] = reasoning_points
1254
  else:
1255
  parsed["reasoning"] = [reasoning_text]
1256
 
 
1264
  else:
1265
  parsed["sources"] = [sources_text]
1266
 
1267
+ # Clean up the main response - remove URLs, PMIDs and DOIs from the text if they're already in the sources section
1268
+ if parsed["sources"]:
1269
+ # Remove URL lines
1270
+ main_response_lines = []
1271
+ skip_lines = 0
1272
+ for line in parsed["main_response"].split('\n'):
1273
+ if skip_lines > 0:
1274
+ skip_lines -= 1
1275
+ continue
1276
+
1277
+ # Skip lines with just URLs
1278
+ if re.match(r'^URL:\s*https?://', line.strip()):
1279
+ skip_lines = 0
1280
+ continue
1281
+
1282
+ # Skip lines with PMIDs or DOIs being displayed alone
1283
+ if re.match(r'^(PMID|DOI):', line.strip()):
1284
+ skip_lines = 0
1285
+ continue
1286
+
1287
+ main_response_lines.append(line)
1288
+
1289
+ parsed["main_response"] = '\n'.join(main_response_lines)
1290
+
1291
  # Extract citations in the text (format: [source_id])
1292
  citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
1293
  for citation in citation_matches:
 
1386
  output_instructions = """
1387
  Please structure your response clearly.
1388
  **Priority 1: Direct Answer First**
1389
+ Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
1390
 
1391
  **Priority 2: Follow-up Questions**
1392
  After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
1393
 
1394
  **Main Response Structure:**
1395
+ 1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
1396
  2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
1397
  3. Recommendations for a treatment plan or next steps.
1398
  4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
 
1400
  • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
1401
 
1402
  Use no more than 3 sources and no fewer than 2 sources.
1403
+
1404
  **After your main response, ALWAYS include these sections:**
1405
+ - **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
1406
+ - **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first point is properly formatted.
1407
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1408
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1409
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
1410
  - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
1411
  URL: https://doi.org/10.xxxx/yyyy
1412
+
1413
+ **IMPORTANT FORMATTING NOTES:**
1414
+ 1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
1415
+ 2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1416
+ 3. Number the follow-up questions starting from 1, not from any other number.
1417
+ 4. For the reasoning section, make sure the first bullet point is properly formatted.
1418
+
1419
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1420
  """
1421
  else:
 
1423
  output_instructions = """
1424
  Please structure your response clearly.
1425
  **Priority 1: Direct Answer First**
1426
+ Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
1427
 
1428
  **Priority 2: Follow-up Questions**
1429
  After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
1430
 
1431
  **Main Response Structure:**
1432
+ 1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
1433
  2. If appropriate, a clear diagnosis or differential diagnosis.
1434
  3. Recommendations for a treatment plan or next steps.
1435
 
1436
  **After your main response, ALWAYS include these sections:**
1437
+ - **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
1438
+ - **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first bullet point is properly formatted.
1439
+
1440
+ **IMPORTANT FORMATTING NOTES:**
1441
+ 1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1442
+ 2. Number the follow-up questions starting from 1, not from any other number.
1443
+ 3. For the reasoning section, make sure the first bullet point is properly formatted.
1444
+
1445
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1446
  """
1447
 
 
1451
  # Get response from doctor agent
1452
  response = doctor_agent(msgs)
1453
 
1454
+ # Remove "Direct Answer:" prefix if it appears
1455
+ response = re.sub(r'^Direct Answer:\s*', '', response)
1456
+
1457
  # Extract and process sources
1458
  explanation = None
1459
  evidence = None
 
1467
  parsed_response = parse_doctor_response(linked_response)
1468
 
1469
  # Get the main response
1470
+ main_response = parsed_response["main_response"]
1471
 
1472
  # Extract reasoning for display
1473
  reasoning = parsed_response.get("reasoning", [])
 
1481
  questions = parsed_response.get("follow_up_questions", [])
1482
  if questions:
1483
  if isinstance(questions, list):
1484
+ # Format as a numbered list starting with 1
1485
  follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1486
  else:
1487
  follow_up_questions = questions
 
1491
  else:
1492
  # If RAG is disabled, just parse the response without source processing
1493
  parsed_response = parse_doctor_response(response)
1494
+ main_response = parsed_response["main_response"]
1495
 
1496
  # Extract reasoning
1497
  reasoning = parsed_response.get("reasoning", [])
 
1505
  questions = parsed_response.get("follow_up_questions", [])
1506
  if questions:
1507
  if isinstance(questions, list):
1508
+ # Format as a numbered list starting with 1
1509
  follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1510
  else:
1511
  follow_up_questions = questions