Dhruv-Ty commited on
Commit
748fd1d
·
verified ·
1 Parent(s): 5db0d9d
Files changed (1) hide show
  1. src/model.py +110 -151
src/model.py CHANGED
@@ -1215,115 +1215,74 @@ def parse_doctor_response(response_text):
1215
  if treatment_match:
1216
  parsed["treatment"] = treatment_match.group(2).strip()
1217
 
1218
- # Special case handling for "Additional Questions" followed by "Show Reasoning" format
1219
- if "Additional Questions" in response_text and "Show Reasoning" in response_text:
1220
- # Extract all content between "Additional Questions" and "Show Reasoning"
1221
- additional_questions_match = re.search(r'Additional Questions\s*\n\s*\n(.*?)(?=\nShow Reasoning)', response_text, re.DOTALL)
1222
- if additional_questions_match:
1223
- questions_text = additional_questions_match.group(1).strip()
1224
- # If it has numbered questions, parse those
1225
- if re.search(r'^\d+\.', questions_text):
1226
- # It's a numbered list - split by the numbers
1227
- questions = []
1228
- for q in re.split(r'\d+\.\s+', questions_text):
1229
- if q.strip():
1230
- questions.append(q.strip())
1231
- parsed["follow_up_questions"] = questions
1232
- else:
1233
- # Try to split by question marks
1234
- questions = []
1235
- for q in re.split(r'\?\s+', questions_text):
1236
- if q.strip():
1237
- questions.append(q.strip() + "?")
1238
- parsed["follow_up_questions"] = questions
1239
-
1240
- # Extract all content after "Show Reasoning"
1241
- show_reasoning_match = re.search(r'Show Reasoning\s*\n\s*\n(.*?)(?:\n\n|\Z)', response_text, re.DOTALL)
1242
- if show_reasoning_match:
1243
- reasoning_text = show_reasoning_match.group(1).strip()
1244
- # If it has bullet points, parse those
1245
- if reasoning_text.startswith('-') or '\n-' in reasoning_text:
1246
- reasoning_points = []
1247
- for line in reasoning_text.split('\n'):
1248
- if line.strip().startswith('-'):
1249
- # Remove the bullet point
1250
- reasoning_points.append(line.strip()[1:].strip())
1251
- elif line.strip():
1252
- reasoning_points.append(line.strip())
1253
- parsed["reasoning"] = reasoning_points
1254
- else:
1255
- # Just split by sentences
1256
- sentences = re.split(r'(?<=[.!])\s+', reasoning_text)
1257
- parsed["reasoning"] = [s.strip() for s in sentences if s.strip()]
1258
- else:
1259
- # Try standard format for follow-up questions
1260
- follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1261
- if follow_up_match:
1262
- follow_up_text = follow_up_match.group(1).strip()
1263
- # Remove any leading markdown formatting (like ** for bold)
1264
- follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
1265
 
1266
- # Check if questions are formatted as a list
1267
- if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1268
- # Split on any bullet point marker
1269
- bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1270
- # Remove any empty items and ensure first item is properly formatted
1271
- questions = []
1272
- for item in bullet_items:
1273
- if item.strip():
1274
- # Remove any markdown formatting from each item
1275
- cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1276
- questions.append(cleaned_item)
1277
- parsed["follow_up_questions"] = questions
1278
- elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1279
- # Split on numbered items
1280
- numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1281
- # Clean each item and remove any empty ones
1282
- questions = []
1283
- for item in numbered_items:
1284
- if item.strip():
1285
- # Remove any markdown formatting
1286
- cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1287
- questions.append(cleaned_item)
1288
- parsed["follow_up_questions"] = questions
1289
- else:
1290
- # Just use the raw text if no clear list format is detected
1291
- cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
1292
- parsed["follow_up_questions"] = [cleaned_text]
1293
-
1294
- # Try standard format for reasoning
1295
- reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1296
- if reasoning_match:
1297
- reasoning_text = reasoning_match.group(1).strip()
1298
- # Remove any leading markdown formatting (like ** for bold)
1299
- reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
1300
 
1301
- # Split into bullet points if present
1302
- if '\n-' in reasoning_text:
1303
- # Split by newline + dash, but ensure we don't lose any content
1304
- reasoning_points = []
1305
- lines = reasoning_text.split('\n-')
1306
-
1307
- # Process the first item which might not have a dash prefix
1308
- if lines and lines[0].strip():
1309
- # Clean up any leading/trailing asterisks
1310
- first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
1311
- if first_item:
1312
- reasoning_points.append(first_item)
1313
-
1314
- # Process the rest of the items
1315
- for i in range(1, len(lines)):
1316
- if lines[i].strip():
1317
- # Clean up leading/trailing asterisks and dashes
1318
- cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
1319
- if cleaned_item:
1320
- reasoning_points.append(cleaned_item)
1321
-
1322
- parsed["reasoning"] = reasoning_points
1323
- else:
1324
- # If there are no bullet points, still clean up any markdown
1325
- cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
1326
- parsed["reasoning"] = [cleaned_text]
1327
 
1328
  # Extract sources/references
1329
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1473,12 +1432,10 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1473
  Use no more than 3 sources and no fewer than 2 sources.
1474
 
1475
  **After your main response, ALWAYS include these sections:**
1476
- - **Follow-up Questions**: List your questions as plain text without any numbering or bullet points.
1477
- DO NOT include numbers like "1." or bullets like "•" or "-" before your questions.
1478
- Each question should be on a new line.
1479
- - **Reasoning**: List your reasoning as plain text without any bullet points.
1480
- DO NOT include bullets like "•" or "-" before your points.
1481
- Each point should be on a new line.
1482
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1483
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1484
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
@@ -1487,9 +1444,10 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1487
 
1488
  **IMPORTANT FORMATTING NOTES:**
1489
  1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
1490
- 2. DO NOT add any numbering to your follow-up questions or bullet points to your reasoning - these will be added automatically by the system.
1491
- 3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1492
- 4. Make sure all text is clean, with no markdown formatting, no numbers, and no bullet points.
 
1493
 
1494
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1495
  """
@@ -1509,17 +1467,16 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1509
  3. Recommendations for a treatment plan or next steps.
1510
 
1511
  **After your main response, ALWAYS include these sections:**
1512
- - **Follow-up Questions**: List your questions as plain text without any numbering or bullet points.
1513
- DO NOT include numbers like "1." or bullets like "•" or "-" before your questions.
1514
- Each question should be on a new line.
1515
- - **Reasoning**: List your reasoning as plain text without any bullet points.
1516
- DO NOT include bullets like "•" or "-" before your points.
1517
- Each point should be on a new line.
1518
 
1519
  **IMPORTANT FORMATTING NOTES:**
1520
- 1. DO NOT add any numbering to your follow-up questions or bullet points to your reasoning - these will be added automatically by the system.
1521
- 2. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1522
- 3. Make sure all text is clean, with no markdown formatting, no numbers, and no bullet points.
 
1523
 
1524
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1525
  """
@@ -1535,10 +1492,6 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1535
 
1536
  # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
1537
  response = re.sub(r'\n\s*\*\*\s*', '\n', response)
1538
-
1539
- # Remove any numbering or bullets the model might have included
1540
- response = re.sub(r'\n\s*\d+\.\s+', '\n', response) # Remove numbered lists
1541
- response = re.sub(r'\n\s*[-•*]\s+', '\n', response) # Remove bullet points
1542
 
1543
  # Extract and process sources
1544
  explanation = None
@@ -1557,31 +1510,47 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1557
 
1558
  # Extract reasoning for display
1559
  reasoning = parsed_response.get("reasoning", [])
1560
-
1561
  if reasoning:
1562
  if isinstance(reasoning, list):
1563
- # Join with bullet points if it's a list
1564
- explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
1565
  else:
1566
- # It's a single string - format as bullet points
1567
- explanation = f"- {reasoning.strip()}"
1568
 
1569
  # Extract follow-up questions
1570
  questions = parsed_response.get("follow_up_questions", [])
1571
-
1572
  if questions:
1573
  if isinstance(questions, list):
1574
- # Format as a numbered list
1575
- follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])
1576
  else:
1577
- # It's a single string - format as a single question
1578
- follow_up_questions = f"1. {questions.strip()}"
 
 
1579
  else:
1580
  # If RAG is disabled, just parse the response without source processing
1581
  parsed_response = parse_doctor_response(response)
1582
  main_response = parsed_response["main_response"]
1583
 
1584
- # Extract reasoning reasoning = parsed_response.get("reasoning", []) if reasoning: if isinstance(reasoning, list): # Join with bullet points if it's a list explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()]) else: # It's a single string - format as bullet points explanation = f"- {reasoning.strip()}" # Extract follow-up questions questions = parsed_response.get("follow_up_questions", []) if questions: if isinstance(questions, list): # Format as a numbered list follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()]) else: # It's a single string - format as a single question follow_up_questions = f"1. {questions.strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1585
 
1586
  # Return four values: main response, explanation, follow-up questions, and evidence
1587
  return main_response, explanation, follow_up_questions, evidence_snippets
@@ -1624,11 +1593,6 @@ def run_consultation(use_rag=True):
1624
  print("=" * 30)
1625
  # Ensure explanation is not empty before printing, or print a default message
1626
  if explanation and explanation.strip() and explanation.strip() != "="*50:
1627
- # Format explanation as bullet points if it's not already
1628
- if isinstance(explanation, list):
1629
- explanation = "\n".join([f"- {point}" for point in explanation])
1630
- elif not explanation.startswith("-"):
1631
- explanation = "\n".join([f"- {point}" for point in explanation.split("\n")])
1632
  print(explanation)
1633
  else:
1634
  print("No detailed explanation or sources were generated for this response.")
@@ -1638,17 +1602,12 @@ def run_consultation(use_rag=True):
1638
  print("\n" + "=" * 30)
1639
  print("FOLLOW-UP QUESTIONS")
1640
  print("=" * 30)
1641
- # Format questions as numbered list if it's not already
1642
- if isinstance(follow_up_questions, list):
1643
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions)])
1644
- elif not follow_up_questions.startswith("1."):
1645
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions.split("\n"))])
1646
  print(follow_up_questions)
1647
 
1648
  # Add Open Access Legend if evidence sources were found
1649
  if evidence:
1650
  print("\nLEGEND: 🔓 = Open Access (full text available)")
1651
-
1652
  # Check if we need to continue with follow-up or start a new case
1653
  next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")
1654
 
 
1215
  if treatment_match:
1216
  parsed["treatment"] = treatment_match.group(2).strip()
1217
 
1218
+ # Try to extract follow-up questions
1219
+ follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1220
+ if follow_up_match:
1221
+ follow_up_text = follow_up_match.group(1).strip()
1222
+ # Remove any leading markdown formatting (like ** for bold)
1223
+ follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
1224
+
1225
+ # Check if questions are formatted as a list
1226
+ if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1227
+ # Split on any bullet point marker
1228
+ bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1229
+ # Remove any empty items and ensure first item is properly formatted
1230
+ questions = []
1231
+ for item in bullet_items:
1232
+ if item.strip():
1233
+ # Remove any markdown formatting from each item
1234
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1235
+ questions.append(cleaned_item)
1236
+ parsed["follow_up_questions"] = questions
1237
+ elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1238
+ # Split on numbered items
1239
+ numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1240
+ # Clean each item and remove any empty ones
1241
+ questions = []
1242
+ for item in numbered_items:
1243
+ if item.strip():
1244
+ # Remove any markdown formatting
1245
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1246
+ questions.append(cleaned_item)
1247
+ parsed["follow_up_questions"] = questions
1248
+ else:
1249
+ # Just use the raw text if no clear list format is detected
1250
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
1251
+ parsed["follow_up_questions"] = [cleaned_text]
1252
+
1253
+ # Try to extract reasoning if present
1254
+ reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1255
+ if reasoning_match:
1256
+ reasoning_text = reasoning_match.group(1).strip()
1257
+ # Remove any leading markdown formatting (like ** for bold)
1258
+ reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
1259
+
1260
+ # Split into bullet points if present
1261
+ if '\n-' in reasoning_text:
1262
+ # Split by newline + dash, but ensure we don't lose any content
1263
+ reasoning_points = []
1264
+ lines = reasoning_text.split('\n-')
1265
 
1266
+ # Process the first item which might not have a dash prefix
1267
+ if lines and lines[0].strip():
1268
+ # Clean up any leading/trailing asterisks
1269
+ first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
1270
+ if first_item:
1271
+ reasoning_points.append(first_item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
 
1273
+ # Process the rest of the items
1274
+ for i in range(1, len(lines)):
1275
+ if lines[i].strip():
1276
+ # Clean up leading/trailing asterisks and dashes
1277
+ cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
1278
+ if cleaned_item:
1279
+ reasoning_points.append(cleaned_item)
1280
+
1281
+ parsed["reasoning"] = reasoning_points
1282
+ else:
1283
+ # If there are no bullet points, still clean up any markdown
1284
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
1285
+ parsed["reasoning"] = [cleaned_text]
 
 
 
 
 
 
 
 
 
 
 
 
 
1286
 
1287
  # Extract sources/references
1288
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
 
1432
  Use no more than 3 sources and no fewer than 2 sources.
1433
 
1434
  **After your main response, ALWAYS include these sections:**
1435
+ - **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
1436
+ Do NOT start the first question with asterisks (**). Format each question properly with just a number.
1437
+ - **Reasoning**: Bullet points detailing your clinical reasoning.
1438
+ Do NOT start the first point with asterisks (**). Format each bullet point properly.
 
 
1439
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1440
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1441
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
 
1444
 
1445
  **IMPORTANT FORMATTING NOTES:**
1446
  1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
1447
+ 2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1448
+ 3. Number the follow-up questions starting from 1, not from any other number.
1449
+ 4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1450
+ 5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
1451
 
1452
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1453
  """
 
1467
  3. Recommendations for a treatment plan or next steps.
1468
 
1469
  **After your main response, ALWAYS include these sections:**
1470
+ - **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
1471
+ Do NOT start the first question with asterisks (**). Format each question properly with just a number.
1472
+ - **Reasoning**: Bullet points detailing your clinical reasoning.
1473
+ Do NOT start the first bullet point with asterisks (**). Format each point properly.
 
 
1474
 
1475
  **IMPORTANT FORMATTING NOTES:**
1476
+ 1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1477
+ 2. Number the follow-up questions starting from 1, not from any other number.
1478
+ 3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1479
+ 4. Make sure all bullet points and numbered items are clean, with no markdown formatting.
1480
 
1481
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1482
  """
 
1492
 
1493
  # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
1494
  response = re.sub(r'\n\s*\*\*\s*', '\n', response)
 
 
 
 
1495
 
1496
  # Extract and process sources
1497
  explanation = None
 
1510
 
1511
  # Extract reasoning for display
1512
  reasoning = parsed_response.get("reasoning", [])
 
1513
  if reasoning:
1514
  if isinstance(reasoning, list):
1515
+ explanation = "\n".join([f"- {r}" for r in reasoning])
 
1516
  else:
1517
+ explanation = reasoning
 
1518
 
1519
  # Extract follow-up questions
1520
  questions = parsed_response.get("follow_up_questions", [])
 
1521
  if questions:
1522
  if isinstance(questions, list):
1523
+ # Format as a numbered list starting with 1
1524
+ follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1525
  else:
1526
+ follow_up_questions = questions
1527
+
1528
+ # Debug: Print follow-up questions
1529
+ print(f"Follow-up questions generated: {follow_up_questions}")
1530
  else:
1531
  # If RAG is disabled, just parse the response without source processing
1532
  parsed_response = parse_doctor_response(response)
1533
  main_response = parsed_response["main_response"]
1534
 
1535
+ # Extract reasoning
1536
+ reasoning = parsed_response.get("reasoning", [])
1537
+ if reasoning:
1538
+ if isinstance(reasoning, list):
1539
+ explanation = "\n".join([f"- {r}" for r in reasoning])
1540
+ else:
1541
+ explanation = reasoning
1542
+
1543
+ # Extract follow-up questions
1544
+ questions = parsed_response.get("follow_up_questions", [])
1545
+ if questions:
1546
+ if isinstance(questions, list):
1547
+ # Format as a numbered list starting with 1
1548
+ follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
1549
+ else:
1550
+ follow_up_questions = questions
1551
+
1552
+ # Debug: Print follow-up questions
1553
+ print(f"Follow-up questions generated: {follow_up_questions}")
1554
 
1555
  # Return four values: main response, explanation, follow-up questions, and evidence
1556
  return main_response, explanation, follow_up_questions, evidence_snippets
 
1593
  print("=" * 30)
1594
  # Ensure explanation is not empty before printing, or print a default message
1595
  if explanation and explanation.strip() and explanation.strip() != "="*50:
 
 
 
 
 
1596
  print(explanation)
1597
  else:
1598
  print("No detailed explanation or sources were generated for this response.")
 
1602
  print("\n" + "=" * 30)
1603
  print("FOLLOW-UP QUESTIONS")
1604
  print("=" * 30)
 
 
 
 
 
1605
  print(follow_up_questions)
1606
 
1607
  # Add Open Access Legend if evidence sources were found
1608
  if evidence:
1609
  print("\nLEGEND: 🔓 = Open Access (full text available)")
1610
+
1611
  # Check if we need to continue with follow-up or start a new case
1612
  next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")
1613