Dhruv-Ty commited on
Commit
1689af9
·
verified ·
1 Parent(s): 7d8679d

** formatting fix

Browse files
Files changed (1) hide show
  1. src/model.py +53 -14
src/model.py CHANGED
@@ -1219,40 +1219,70 @@ def parse_doctor_response(response_text):
1219
  follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1220
  if follow_up_match:
1221
  follow_up_text = follow_up_match.group(1).strip()
 
 
 
1222
  # Check if questions are formatted as a list
1223
  if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1224
  # Split on any bullet point marker
1225
  bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1226
  # Remove any empty items and ensure first item is properly formatted
1227
- questions = [item.strip() for item in bullet_items if item.strip()]
1228
- # The first item might not start with a bullet point
 
 
 
 
1229
  parsed["follow_up_questions"] = questions
1230
  elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1231
  # Split on numbered items
1232
  numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1233
- # Remove any empty items
1234
- parsed["follow_up_questions"] = [item.strip() for item in numbered_items if item.strip()]
 
 
 
 
 
 
1235
  else:
1236
  # Just use the raw text if no clear list format is detected
1237
- parsed["follow_up_questions"] = [follow_up_text]
 
1238
 
1239
  # Try to extract reasoning if present
1240
  reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1241
  if reasoning_match:
1242
  reasoning_text = reasoning_match.group(1).strip()
 
 
 
1243
  # Split into bullet points if present
1244
  if '\n-' in reasoning_text:
1245
  # Split by newline + dash, but ensure we don't lose any content
1246
  reasoning_points = []
1247
- for item in reasoning_text.split('\n-'):
1248
- if item.strip():
1249
- # Clean up any dash at the beginning
1250
- cleaned_item = item.lstrip('- ').strip()
 
 
 
 
 
 
 
 
 
 
1251
  if cleaned_item:
1252
  reasoning_points.append(cleaned_item)
 
1253
  parsed["reasoning"] = reasoning_points
1254
  else:
1255
- parsed["reasoning"] = [reasoning_text]
 
 
1256
 
1257
  # Extract sources/references
1258
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1403,7 +1433,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1403
 
1404
  **After your main response, ALWAYS include these sections:**
1405
  - **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
1406
- - **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first point is properly formatted.
 
 
1407
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1408
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1409
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
@@ -1414,7 +1446,8 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1414
  1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
1415
  2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1416
  3. Number the follow-up questions starting from 1, not from any other number.
1417
- 4. For the reasoning section, make sure the first bullet point is properly formatted.
 
1418
 
1419
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1420
  """
@@ -1435,12 +1468,15 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1435
 
1436
  **After your main response, ALWAYS include these sections:**
1437
  - **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
1438
- - **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first bullet point is properly formatted.
 
 
1439
 
1440
  **IMPORTANT FORMATTING NOTES:**
1441
  1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1442
  2. Number the follow-up questions starting from 1, not from any other number.
1443
- 3. For the reasoning section, make sure the first bullet point is properly formatted.
 
1444
 
1445
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1446
  """
@@ -1453,6 +1489,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
1453
 
1454
  # Remove "Direct Answer:" prefix if it appears
1455
  response = re.sub(r'^Direct Answer:\s*', '', response)
 
 
 
1456
 
1457
  # Extract and process sources
1458
  explanation = None
 
1219
  follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
1220
  if follow_up_match:
1221
  follow_up_text = follow_up_match.group(1).strip()
1222
+ # Remove any leading markdown formatting (like ** for bold)
1223
+ follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
1224
+
1225
  # Check if questions are formatted as a list
1226
  if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
1227
  # Split on any bullet point marker
1228
  bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
1229
  # Remove any empty items and ensure first item is properly formatted
1230
+ questions = []
1231
+ for item in bullet_items:
1232
+ if item.strip():
1233
+ # Remove any markdown formatting from each item
1234
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1235
+ questions.append(cleaned_item)
1236
  parsed["follow_up_questions"] = questions
1237
  elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
1238
  # Split on numbered items
1239
  numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
1240
+ # Clean each item and remove any empty ones
1241
+ questions = []
1242
+ for item in numbered_items:
1243
+ if item.strip():
1244
+ # Remove any markdown formatting
1245
+ cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
1246
+ questions.append(cleaned_item)
1247
+ parsed["follow_up_questions"] = questions
1248
  else:
1249
  # Just use the raw text if no clear list format is detected
1250
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
1251
+ parsed["follow_up_questions"] = [cleaned_text]
1252
 
1253
  # Try to extract reasoning if present
1254
  reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
1255
  if reasoning_match:
1256
  reasoning_text = reasoning_match.group(1).strip()
1257
+ # Remove any leading markdown formatting (like ** for bold)
1258
+ reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
1259
+
1260
  # Split into bullet points if present
1261
  if '\n-' in reasoning_text:
1262
  # Split by newline + dash, but ensure we don't lose any content
1263
  reasoning_points = []
1264
+ lines = reasoning_text.split('\n-')
1265
+
1266
+ # Process the first item which might not have a dash prefix
1267
+ if lines and lines[0].strip():
1268
+ # Clean up any leading/trailing asterisks
1269
+ first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
1270
+ if first_item:
1271
+ reasoning_points.append(first_item)
1272
+
1273
+ # Process the rest of the items
1274
+ for i in range(1, len(lines)):
1275
+ if lines[i].strip():
1276
+ # Clean up leading/trailing asterisks and dashes
1277
+ cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
1278
  if cleaned_item:
1279
  reasoning_points.append(cleaned_item)
1280
+
1281
  parsed["reasoning"] = reasoning_points
1282
  else:
1283
+ # If there are no bullet points, still clean up any markdown
1284
+ cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
1285
+ parsed["reasoning"] = [cleaned_text]
1286
 
1287
  # Extract sources/references
1288
  sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
 
1433
 
1434
  **After your main response, ALWAYS include these sections:**
1435
  - **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
1436
+ Do NOT start the first question with asterisks (**). Format each question properly with just a number.
1437
+ - **Reasoning**: Bullet points detailing your clinical reasoning.
1438
+ Do NOT start the first point with asterisks (**). Format each bullet point properly.
1439
  - **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
1440
  - PMID: 12345678 - Author et al. (Year). Title. Journal.
1441
  URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
 
1446
  1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
1447
  2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1448
  3. Number the follow-up questions starting from 1, not from any other number.
1449
+ 4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1450
+ 5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
1451
 
1452
  IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
1453
  """
 
1468
 
1469
  **After your main response, ALWAYS include these sections:**
1470
  - **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
1471
+ Do NOT start the first question with asterisks (**). Format each question properly with just a number.
1472
+ - **Reasoning**: Bullet points detailing your clinical reasoning.
1473
+ Do NOT start the first bullet point with asterisks (**). Format each point properly.
1474
 
1475
  **IMPORTANT FORMATTING NOTES:**
1476
  1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
1477
  2. Number the follow-up questions starting from 1, not from any other number.
1478
+ 3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
1479
+ 4. Make sure all bullet points and numbered items are clean, with no markdown formatting.
1480
 
1481
  IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
1482
  """
 
1489
 
1490
  # Remove "Direct Answer:" prefix if it appears
1491
  response = re.sub(r'^Direct Answer:\s*', '', response)
1492
+
1493
+ # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
1494
+ response = re.sub(r'\n\s*\*\*\s*', '\n', response)
1495
 
1496
  # Extract and process sources
1497
  explanation = None