** formatting fix
Browse files- src/model.py +53 -14
src/model.py
CHANGED
|
@@ -1219,40 +1219,70 @@ def parse_doctor_response(response_text):
|
|
| 1219 |
follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
|
| 1220 |
if follow_up_match:
|
| 1221 |
follow_up_text = follow_up_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
| 1222 |
# Check if questions are formatted as a list
|
| 1223 |
if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
|
| 1224 |
# Split on any bullet point marker
|
| 1225 |
bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
|
| 1226 |
# Remove any empty items and ensure first item is properly formatted
|
| 1227 |
-
questions = [
|
| 1228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1229 |
parsed["follow_up_questions"] = questions
|
| 1230 |
elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
|
| 1231 |
# Split on numbered items
|
| 1232 |
numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
|
| 1233 |
-
#
|
| 1234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1235 |
else:
|
| 1236 |
# Just use the raw text if no clear list format is detected
|
| 1237 |
-
|
|
|
|
| 1238 |
|
| 1239 |
# Try to extract reasoning if present
|
| 1240 |
reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
|
| 1241 |
if reasoning_match:
|
| 1242 |
reasoning_text = reasoning_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
| 1243 |
# Split into bullet points if present
|
| 1244 |
if '\n-' in reasoning_text:
|
| 1245 |
# Split by newline + dash, but ensure we don't lose any content
|
| 1246 |
reasoning_points = []
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1251 |
if cleaned_item:
|
| 1252 |
reasoning_points.append(cleaned_item)
|
|
|
|
| 1253 |
parsed["reasoning"] = reasoning_points
|
| 1254 |
else:
|
| 1255 |
-
|
|
|
|
|
|
|
| 1256 |
|
| 1257 |
# Extract sources/references
|
| 1258 |
sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
|
|
@@ -1403,7 +1433,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1403 |
|
| 1404 |
**After your main response, ALWAYS include these sections:**
|
| 1405 |
- **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
|
| 1406 |
-
|
|
|
|
|
|
|
| 1407 |
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1408 |
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1409 |
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
|
@@ -1414,7 +1446,8 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1414 |
1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
|
| 1415 |
2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1416 |
3. Number the follow-up questions starting from 1, not from any other number.
|
| 1417 |
-
4.
|
|
|
|
| 1418 |
|
| 1419 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1420 |
"""
|
|
@@ -1435,12 +1468,15 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1435 |
|
| 1436 |
**After your main response, ALWAYS include these sections:**
|
| 1437 |
- **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
|
| 1438 |
-
|
|
|
|
|
|
|
| 1439 |
|
| 1440 |
**IMPORTANT FORMATTING NOTES:**
|
| 1441 |
1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1442 |
2. Number the follow-up questions starting from 1, not from any other number.
|
| 1443 |
-
3.
|
|
|
|
| 1444 |
|
| 1445 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1446 |
"""
|
|
@@ -1453,6 +1489,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1453 |
|
| 1454 |
# Remove "Direct Answer:" prefix if it appears
|
| 1455 |
response = re.sub(r'^Direct Answer:\s*', '', response)
|
|
|
|
|
|
|
|
|
|
| 1456 |
|
| 1457 |
# Extract and process sources
|
| 1458 |
explanation = None
|
|
|
|
| 1219 |
follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
|
| 1220 |
if follow_up_match:
|
| 1221 |
follow_up_text = follow_up_match.group(1).strip()
|
| 1222 |
+
# Remove any leading markdown formatting (like ** for bold)
|
| 1223 |
+
follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
|
| 1224 |
+
|
| 1225 |
# Check if questions are formatted as a list
|
| 1226 |
if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
|
| 1227 |
# Split on any bullet point marker
|
| 1228 |
bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
|
| 1229 |
# Remove any empty items and ensure first item is properly formatted
|
| 1230 |
+
questions = []
|
| 1231 |
+
for item in bullet_items:
|
| 1232 |
+
if item.strip():
|
| 1233 |
+
# Remove any markdown formatting from each item
|
| 1234 |
+
cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
|
| 1235 |
+
questions.append(cleaned_item)
|
| 1236 |
parsed["follow_up_questions"] = questions
|
| 1237 |
elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
|
| 1238 |
# Split on numbered items
|
| 1239 |
numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
|
| 1240 |
+
# Clean each item and remove any empty ones
|
| 1241 |
+
questions = []
|
| 1242 |
+
for item in numbered_items:
|
| 1243 |
+
if item.strip():
|
| 1244 |
+
# Remove any markdown formatting
|
| 1245 |
+
cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
|
| 1246 |
+
questions.append(cleaned_item)
|
| 1247 |
+
parsed["follow_up_questions"] = questions
|
| 1248 |
else:
|
| 1249 |
# Just use the raw text if no clear list format is detected
|
| 1250 |
+
cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
|
| 1251 |
+
parsed["follow_up_questions"] = [cleaned_text]
|
| 1252 |
|
| 1253 |
# Try to extract reasoning if present
|
| 1254 |
reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
|
| 1255 |
if reasoning_match:
|
| 1256 |
reasoning_text = reasoning_match.group(1).strip()
|
| 1257 |
+
# Remove any leading markdown formatting (like ** for bold)
|
| 1258 |
+
reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
|
| 1259 |
+
|
| 1260 |
# Split into bullet points if present
|
| 1261 |
if '\n-' in reasoning_text:
|
| 1262 |
# Split by newline + dash, but ensure we don't lose any content
|
| 1263 |
reasoning_points = []
|
| 1264 |
+
lines = reasoning_text.split('\n-')
|
| 1265 |
+
|
| 1266 |
+
# Process the first item which might not have a dash prefix
|
| 1267 |
+
if lines and lines[0].strip():
|
| 1268 |
+
# Clean up any leading/trailing asterisks
|
| 1269 |
+
first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
|
| 1270 |
+
if first_item:
|
| 1271 |
+
reasoning_points.append(first_item)
|
| 1272 |
+
|
| 1273 |
+
# Process the rest of the items
|
| 1274 |
+
for i in range(1, len(lines)):
|
| 1275 |
+
if lines[i].strip():
|
| 1276 |
+
# Clean up leading/trailing asterisks and dashes
|
| 1277 |
+
cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
|
| 1278 |
if cleaned_item:
|
| 1279 |
reasoning_points.append(cleaned_item)
|
| 1280 |
+
|
| 1281 |
parsed["reasoning"] = reasoning_points
|
| 1282 |
else:
|
| 1283 |
+
# If there are no bullet points, still clean up any markdown
|
| 1284 |
+
cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
|
| 1285 |
+
parsed["reasoning"] = [cleaned_text]
|
| 1286 |
|
| 1287 |
# Extract sources/references
|
| 1288 |
sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
|
|
|
|
| 1433 |
|
| 1434 |
**After your main response, ALWAYS include these sections:**
|
| 1435 |
- **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
|
| 1436 |
+
Do NOT start the first question with asterisks (**). Format each question properly with just a number.
|
| 1437 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1438 |
+
Do NOT start the first point with asterisks (**). Format each bullet point properly.
|
| 1439 |
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1440 |
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1441 |
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
|
|
|
| 1446 |
1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
|
| 1447 |
2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1448 |
3. Number the follow-up questions starting from 1, not from any other number.
|
| 1449 |
+
4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
|
| 1450 |
+
5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
|
| 1451 |
|
| 1452 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1453 |
"""
|
|
|
|
| 1468 |
|
| 1469 |
**After your main response, ALWAYS include these sections:**
|
| 1470 |
- **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
|
| 1471 |
+
Do NOT start the first question with asterisks (**). Format each question properly with just a number.
|
| 1472 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1473 |
+
Do NOT start the first bullet point with asterisks (**). Format each point properly.
|
| 1474 |
|
| 1475 |
**IMPORTANT FORMATTING NOTES:**
|
| 1476 |
1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1477 |
2. Number the follow-up questions starting from 1, not from any other number.
|
| 1478 |
+
3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
|
| 1479 |
+
4. Make sure all bullet points and numbered items are clean, with no markdown formatting.
|
| 1480 |
|
| 1481 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1482 |
"""
|
|
|
|
| 1489 |
|
| 1490 |
# Remove "Direct Answer:" prefix if it appears
|
| 1491 |
response = re.sub(r'^Direct Answer:\s*', '', response)
|
| 1492 |
+
|
| 1493 |
+
# Remove any markdown formatting (** for bold) that might appear at the beginning of lines
|
| 1494 |
+
response = re.sub(r'\n\s*\*\*\s*', '\n', response)
|
| 1495 |
|
| 1496 |
# Extract and process sources
|
| 1497 |
explanation = None
|