Dhruv-Ty commited on
Commit
5db0d9d
·
verified ·
1 Parent(s): d284c4b

Update src/model.py

Browse files
Files changed (1) hide show
  1. src/model.py +128 -185
src/model.py CHANGED
@@ -1215,74 +1215,115 @@ def parse_doctor_response(response_text):
1215
  if treatment_match:
1216
  parsed["treatment"] = treatment_match.group(2).strip()
1217
 
1218
- # Try to extract follow-up questions
1219
- follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1220
- if follow_up_match:
1221
- follow_up_text = follow_up_match.group(1).strip()
1222
- # Remove any leading markdown formatting (like ** for bold)
1223
- follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
1224
-
1225
- # Check if questions are formatted as a list
1226
- if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1227
- # Split on any bullet point marker
1228
- bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1229
- # Remove any empty items and ensure first item is properly formatted
1230
- questions = []
1231
- for item in bullet_items:
1232
- if item.strip():
1233
- # Remove any markdown formatting from each item
1234
- cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1235
- questions.append(cleaned_item)
1236
- parsed["follow_up_questions"] = questions
1237
- elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1238
- # Split on numbered items
1239
- numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1240
- # Clean each item and remove any empty ones
1241
- questions = []
1242
- for item in numbered_items:
1243
- if item.strip():
1244
- # Remove any markdown formatting
1245
- cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1246
- questions.append(cleaned_item)
1247
- parsed["follow_up_questions"] = questions
1248
- else:
1249
- # Just use the raw text if no clear list format is detected
1250
- cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
1251
- parsed["follow_up_questions"] = [cleaned_text]
1252
-
1253
- # Try to extract reasoning if present
1254
- reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1255
- if reasoning_match:
1256
- reasoning_text = reasoning_match.group(1).strip()
1257
- # Remove any leading markdown formatting (like ** for bold)
1258
- reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
1259
-
1260
- # Split into bullet points if present
1261
- if '\n-' in reasoning_text:
1262
- # Split by newline + dash, but ensure we don't lose any content
1263
- reasoning_points = []
1264
- lines = reasoning_text.split('\n-')
1265
 
1266
- # Process the first item which might not have a dash prefix
1267
- if lines and lines[0].strip():
1268
- # Clean up any leading/trailing asterisks
1269
- first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
1270
- if first_item:
1271
- reasoning_points.append(first_item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
 
1273
- # Process the rest of the items
1274
- for i in range(1, len(lines)):
1275
- if lines[i].strip():
1276
- # Clean up leading/trailing asterisks and dashes
1277
- cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
1278
- if cleaned_item:
1279
- reasoning_points.append(cleaned_item)
1280
-
1281
- parsed["reasoning"] = reasoning_points
1282
- else:
1283
- # If there are no bullet points, still clean up any markdown
1284
- cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
1285
- parsed["reasoning"] = [cleaned_text]
 
 
 
 
 
 
 
 
 
 
 
 
 
1286
 
1287
  # Extract sources/references
1288
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1516,89 +1557,31 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1516
 
1517
  # Extract reasoning for display
1518
  reasoning = parsed_response.get("reasoning", [])
 
1519
  if reasoning:
1520
  if isinstance(reasoning, list):
1521
- # Add bullet points to reasoning items
1522
  explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
1523
  else:
1524
- explanation = reasoning
 
1525
 
1526
  # Extract follow-up questions
1527
  questions = parsed_response.get("follow_up_questions", [])
 
1528
  if questions:
1529
  if isinstance(questions, list):
1530
- # Format as a numbered list starting with 1
1531
- # Remove any existing numbers or bullets first
1532
- clean_questions = []
1533
- for q in questions:
1534
- if q.strip():
1535
- # Remove any existing numbering or bullets
1536
- clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
1537
- clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
1538
- clean_questions.append(clean_q)
1539
-
1540
- # Add proper numbering
1541
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
1542
  else:
1543
- # If it's a string, split by newlines and format each line as a question
1544
- question_lines = questions.split('\n')
1545
- clean_questions = []
1546
- for q in question_lines:
1547
- if q.strip():
1548
- # Remove any existing numbering or bullets
1549
- clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
1550
- clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
1551
- clean_questions.append(clean_q)
1552
-
1553
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
1554
-
1555
- # Debug: Print follow-up questions
1556
- print(f"Follow-up questions generated: {follow_up_questions}")
1557
  else:
1558
  # If RAG is disabled, just parse the response without source processing
1559
  parsed_response = parse_doctor_response(response)
1560
  main_response = parsed_response["main_response"]
1561
 
1562
- # Extract reasoning
1563
- reasoning = parsed_response.get("reasoning", [])
1564
- if reasoning:
1565
- if isinstance(reasoning, list):
1566
- # Add bullet points to reasoning items
1567
- explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
1568
- else:
1569
- # If it's a string, split by newlines and format each line as a bullet point
1570
- reasoning_lines = reasoning.split('\n')
1571
- explanation = "\n".join([f"- {r.strip()}" for r in reasoning_lines if r.strip()])
1572
-
1573
- # Extract follow-up questions
1574
- questions = parsed_response.get("follow_up_questions", [])
1575
- if questions:
1576
- if isinstance(questions, list):
1577
- # Clean each question and add proper numbering
1578
- clean_questions = []
1579
- for q in questions:
1580
- if q.strip():
1581
- # Remove any existing numbering or bullets
1582
- clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
1583
- clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
1584
- clean_questions.append(clean_q)
1585
-
1586
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
1587
- else:
1588
- # If it's a string, split by newlines and format each line as a question
1589
- question_lines = questions.split('\n')
1590
- clean_questions = []
1591
- for q in question_lines:
1592
- if q.strip():
1593
- # Remove any existing numbering or bullets
1594
- clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
1595
- clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
1596
- clean_questions.append(clean_q)
1597
-
1598
- follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
1599
-
1600
- # Debug: Print follow-up questions
1601
- print(f"Follow-up questions generated: {follow_up_questions}")
1602
 
1603
  # Return four values: main response, explanation, follow-up questions, and evidence
1604
  return main_response, explanation, follow_up_questions, evidence_snippets
@@ -1641,26 +1624,11 @@ def run_consultation(use_rag=True):
1641
  print("=" * 30)
1642
  # Ensure explanation is not empty before printing, or print a default message
1643
  if explanation and explanation.strip() and explanation.strip() != "="*50:
1644
- # Format explanation for display - ensure bullet points
1645
- if not explanation.startswith("-"):
1646
- # Split the paragraph by periods to create separate points
1647
- formatted_explanation = []
1648
-
1649
- # Better sentence splitting with regex
1650
- sentences = re.split(r'(?<=[.!])\s+', explanation)
1651
- for point in sentences:
1652
- if point.strip() and len(point.strip()) > 10: # Avoid very short fragments
1653
- formatted_explanation.append(f"- {point.strip()}" + ("." if not point.strip().endswith((".", "!", "?")) else ""))
1654
-
1655
- # If we didn't get good splitting, try a simpler approach
1656
- if len(formatted_explanation) <= 1 and len(explanation) > 50:
1657
- formatted_explanation = []
1658
- lines = explanation.split(". ")
1659
- for line in lines:
1660
- if line.strip():
1661
- formatted_explanation.append(f"- {line.strip()}.")
1662
-
1663
- explanation = "\n".join(formatted_explanation)
1664
  print(explanation)
1665
  else:
1666
  print("No detailed explanation or sources were generated for this response.")
@@ -1670,38 +1638,13 @@ def run_consultation(use_rag=True):
1670
  print("\n" + "=" * 30)
1671
  print("FOLLOW-UP QUESTIONS")
1672
  print("=" * 30)
1673
- # Format questions for display - ensure numbering
1674
- if not follow_up_questions.startswith("1."):
1675
- # Split paragraph by question marks to separate questions
1676
- formatted_questions = []
1677
- i = 1
1678
- # Modified regex to handle different question formats
1679
- for q in re.split(r'(?<=[?])\s+', follow_up_questions):
1680
- if q.strip():
1681
- if not q.strip().endswith("?"):
1682
- # If this doesn't end with a question mark, it might be multiple questions
1683
- subquestions = re.split(r'(?<=\?)\s+', q)
1684
- for sq in subquestions:
1685
- if sq.strip():
1686
- formatted_questions.append(f"{i}. {sq.strip()}" + ("?" if not sq.strip().endswith("?") else ""))
1687
- i += 1
1688
- else:
1689
- formatted_questions.append(f"{i}. {q.strip()}")
1690
- i += 1
1691
-
1692
- # If we couldn't split properly, try another approach - just add line breaks after each sentence
1693
- if len(formatted_questions) <= 1 and len(follow_up_questions) > 50:
1694
- formatted_questions = []
1695
- i = 1
1696
- sentences = re.split(r'[.?]\s+', follow_up_questions)
1697
- for s in sentences:
1698
- if s.strip() and len(s.strip()) > 10: # Avoid very short fragments
1699
- formatted_questions.append(f"{i}. {s.strip()}?")
1700
- i += 1
1701
-
1702
- follow_up_questions = "\n".join(formatted_questions)
1703
  print(follow_up_questions)
1704
-
1705
  # Add Open Access Legend if evidence sources were found
1706
  if evidence:
1707
  print("\nLEGEND: 🔓 = Open Access (full text available)")
 
1215
  if treatment_match:
1216
  parsed["treatment"] = treatment_match.group(2).strip()
1217
 
1218
+ # Special case handling for "Additional Questions" followed by "Show Reasoning" format
1219
+ if "Additional Questions" in response_text and "Show Reasoning" in response_text:
1220
+ # Extract all content between "Additional Questions" and "Show Reasoning"
1221
+ additional_questions_match = re.search(r'Additional Questions\s*\n\s*\n(.*?)(?=\nShow Reasoning)', response_text, re.DOTALL)
1222
+ if additional_questions_match:
1223
+ questions_text = additional_questions_match.group(1).strip()
1224
+ # If it has numbered questions, parse those
1225
+ if re.search(r'^\d+\.', questions_text):
1226
+ # It's a numbered list - split by the numbers
1227
+ questions = []
1228
+ for q in re.split(r'\d+\.\s+', questions_text):
1229
+ if q.strip():
1230
+ questions.append(q.strip())
1231
+ parsed["follow_up_questions"] = questions
1232
+ else:
1233
+ # Try to split by question marks
1234
+ questions = []
1235
+ for q in re.split(r'\?\s+', questions_text):
1236
+ if q.strip():
1237
+ questions.append(q.strip() + "?")
1238
+ parsed["follow_up_questions"] = questions
1239
+
1240
+ # Extract all content after "Show Reasoning"
1241
+ show_reasoning_match = re.search(r'Show Reasoning\s*\n\s*\n(.*?)(?:\n\n|\Z)', response_text, re.DOTALL)
1242
+ if show_reasoning_match:
1243
+ reasoning_text = show_reasoning_match.group(1).strip()
1244
+ # If it has bullet points, parse those
1245
+ if reasoning_text.startswith('-') or '\n-' in reasoning_text:
1246
+ reasoning_points = []
1247
+ for line in reasoning_text.split('\n'):
1248
+ if line.strip().startswith('-'):
1249
+ # Remove the bullet point
1250
+ reasoning_points.append(line.strip()[1:].strip())
1251
+ elif line.strip():
1252
+ reasoning_points.append(line.strip())
1253
+ parsed["reasoning"] = reasoning_points
1254
+ else:
1255
+ # Just split by sentences
1256
+ sentences = re.split(r'(?<=[.!])\s+', reasoning_text)
1257
+ parsed["reasoning"] = [s.strip() for s in sentences if s.strip()]
1258
+ else:
1259
+ # Try standard format for follow-up questions
1260
+ follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1261
+ if follow_up_match:
1262
+ follow_up_text = follow_up_match.group(1).strip()
1263
+ # Remove any leading markdown formatting (like ** for bold)
1264
+ follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
1265
 
1266
+ # Check if questions are formatted as a list
1267
+ if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1268
+ # Split on any bullet point marker
1269
+ bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1270
+ # Remove any empty items and ensure first item is properly formatted
1271
+ questions = []
1272
+ for item in bullet_items:
1273
+ if item.strip():
1274
+ # Remove any markdown formatting from each item
1275
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1276
+ questions.append(cleaned_item)
1277
+ parsed["follow_up_questions"] = questions
1278
+ elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1279
+ # Split on numbered items
1280
+ numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1281
+ # Clean each item and remove any empty ones
1282
+ questions = []
1283
+ for item in numbered_items:
1284
+ if item.strip():
1285
+ # Remove any markdown formatting
1286
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1287
+ questions.append(cleaned_item)
1288
+ parsed["follow_up_questions"] = questions
1289
+ else:
1290
+ # Just use the raw text if no clear list format is detected
1291
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
1292
+ parsed["follow_up_questions"] = [cleaned_text]
1293
+
1294
+ # Try standard format for reasoning
1295
+ reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1296
+ if reasoning_match:
1297
+ reasoning_text = reasoning_match.group(1).strip()
1298
+ # Remove any leading markdown formatting (like ** for bold)
1299
+ reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
1300
 
1301
+ # Split into bullet points if present
1302
+ if '\n-' in reasoning_text:
1303
+ # Split by newline + dash, but ensure we don't lose any content
1304
+ reasoning_points = []
1305
+ lines = reasoning_text.split('\n-')
1306
+
1307
+ # Process the first item which might not have a dash prefix
1308
+ if lines and lines[0].strip():
1309
+ # Clean up any leading/trailing asterisks
1310
+ first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
1311
+ if first_item:
1312
+ reasoning_points.append(first_item)
1313
+
1314
+ # Process the rest of the items
1315
+ for i in range(1, len(lines)):
1316
+ if lines[i].strip():
1317
+ # Clean up leading/trailing asterisks and dashes
1318
+ cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
1319
+ if cleaned_item:
1320
+ reasoning_points.append(cleaned_item)
1321
+
1322
+ parsed["reasoning"] = reasoning_points
1323
+ else:
1324
+ # If there are no bullet points, still clean up any markdown
1325
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
1326
+ parsed["reasoning"] = [cleaned_text]
1327
 
1328
  # Extract sources/references
1329
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
 
1557
 
1558
  # Extract reasoning for display
1559
  reasoning = parsed_response.get("reasoning", [])
1560
+
1561
  if reasoning:
1562
  if isinstance(reasoning, list):
1563
+ # Join with bullet points if it's a list
1564
  explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
1565
  else:
1566
+ # It's a single string - format as bullet points
1567
+ explanation = f"- {reasoning.strip()}"
1568
 
1569
  # Extract follow-up questions
1570
  questions = parsed_response.get("follow_up_questions", [])
1571
+
1572
  if questions:
1573
  if isinstance(questions, list):
1574
+ # Format as a numbered list
1575
+ follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])
 
 
 
 
 
 
 
 
 
 
1576
  else:
1577
+ # It's a single string - format as a single question
1578
+ follow_up_questions = f"1. {questions.strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
1579
  else:
1580
  # If RAG is disabled, just parse the response without source processing
1581
  parsed_response = parse_doctor_response(response)
1582
  main_response = parsed_response["main_response"]
1583
 
1584
+ # Extract reasoning reasoning = parsed_response.get("reasoning", []) if reasoning: if isinstance(reasoning, list): # Join with bullet points if it's a list explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()]) else: # It's a single string - format as bullet points explanation = f"- {reasoning.strip()}" # Extract follow-up questions questions = parsed_response.get("follow_up_questions", []) if questions: if isinstance(questions, list): # Format as a numbered list follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()]) else: # It's a single string - format as a single question follow_up_questions = f"1. {questions.strip()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1585
 
1586
  # Return four values: main response, explanation, follow-up questions, and evidence
1587
  return main_response, explanation, follow_up_questions, evidence_snippets
 
1624
  print("=" * 30)
1625
  # Ensure explanation is not empty before printing, or print a default message
1626
  if explanation and explanation.strip() and explanation.strip() != "="*50:
1627
+ # Format explanation as bullet points if it's not already
1628
+ if isinstance(explanation, list):
1629
+ explanation = "\n".join([f"- {point}" for point in explanation])
1630
+ elif not explanation.startswith("-"):
1631
+ explanation = "\n".join([f"- {point}" for point in explanation.split("\n")])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1632
  print(explanation)
1633
  else:
1634
  print("No detailed explanation or sources were generated for this response.")
 
1638
  print("\n" + "=" * 30)
1639
  print("FOLLOW-UP QUESTIONS")
1640
  print("=" * 30)
1641
+ # Format questions as numbered list if it's not already
1642
+ if isinstance(follow_up_questions, list):
1643
+ follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions)])
1644
+ elif not follow_up_questions.startswith("1."):
1645
+ follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions.split("\n"))])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1646
  print(follow_up_questions)
1647
+
1648
  # Add Open Access Legend if evidence sources were found
1649
  if evidence:
1650
  print("\nLEGEND: 🔓 = Open Access (full text available)")