Update src/model.py
Browse files- src/model.py +105 -81
src/model.py
CHANGED
|
@@ -147,73 +147,70 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 147 |
for snippet in evidence_snippets:
|
| 148 |
if source_id_match == snippet["id"]:
|
| 149 |
source_map[source_id_match] = {
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
"citation": snippet["citation"],
|
| 154 |
"pmid": snippet.get("pmid", ""),
|
| 155 |
"doi": snippet.get("doi", "")
|
| 156 |
-
|
| 157 |
break
|
| 158 |
|
| 159 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
linked_text = text
|
| 161 |
-
for
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
if
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
# Create a replacement with title and URL
|
| 181 |
-
short_title = source_data['title'][:60] + "..." if len(source_data['title']) > 60 else source_data['title']
|
| 182 |
-
replacement = f"[{short_title}]({source_data['url']})"
|
| 183 |
-
|
| 184 |
-
linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
|
| 185 |
-
|
| 186 |
-
# Replace other citation formats
|
| 187 |
-
for source_id_key, source_data in source_map.items():
|
| 188 |
-
if not (source_id_key.startswith("PMID:") or source_id_key.startswith("DOI:")):
|
| 189 |
-
safe_id = re.escape(source_id_key)
|
| 190 |
-
pattern = f"\\[{safe_id}\\]"
|
| 191 |
-
replacement = f"[{source_data['title']}]({source_data['url']})"
|
| 192 |
-
linked_text = re.sub(pattern, replacement, linked_text)
|
| 193 |
-
|
| 194 |
-
# Handle generic [source_id] placeholder
|
| 195 |
-
if "source_id" in source_matches:
|
| 196 |
-
# Use the first snippet available if we have any
|
| 197 |
-
if evidence_snippets and "source_id" not in source_map:
|
| 198 |
-
snippet = evidence_snippets[0] # Use the first snippet
|
| 199 |
-
if snippet.get("url") and snippet.get("title"):
|
| 200 |
-
source_map["source_id"] = {
|
| 201 |
-
"id": snippet["id"],
|
| 202 |
-
"title": snippet["title"].strip(),
|
| 203 |
-
"url": snippet["url"],
|
| 204 |
-
"citation": snippet["citation"],
|
| 205 |
-
"pmid": snippet.get("pmid", ""),
|
| 206 |
-
"doi": snippet.get("doi", "")
|
| 207 |
-
}
|
| 208 |
-
replacement = f"[{snippet['title']}]({snippet['url']})"
|
| 209 |
linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
# Final fallback for any remaining placeholders
|
| 212 |
-
linked_text = re.sub(r'\[source_id\]', "[
|
| 213 |
-
linked_text = re.sub(r'\[PMID:(\d+)\]', r'[
|
| 214 |
-
linked_text = re.sub(r'\[DOI:(10\.\d+\/[^\]]+)\]', r'[
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# Implement PubMed API integration for medical evidence retrieval
|
| 219 |
def fetch_from_pubmed_api(query, max_results=3, api_key=None):
|
|
@@ -1290,7 +1287,20 @@ def parse_doctor_response(response_text):
|
|
| 1290 |
sources_text = sources_match.group(2).strip()
|
| 1291 |
# Split into individual sources
|
| 1292 |
if '\n' in sources_text:
|
| 1293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1294 |
else:
|
| 1295 |
parsed["sources"] = [sources_text]
|
| 1296 |
|
|
@@ -1318,11 +1328,19 @@ def parse_doctor_response(response_text):
|
|
| 1318 |
|
| 1319 |
parsed["main_response"] = '\n'.join(main_response_lines)
|
| 1320 |
|
| 1321 |
-
# Extract citations in the text
|
| 1322 |
-
|
| 1323 |
-
|
|
|
|
|
|
|
| 1324 |
if citation not in parsed["sources"]:
|
| 1325 |
parsed["sources"].append(citation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1326 |
|
| 1327 |
return parsed
|
| 1328 |
|
|
@@ -1386,16 +1404,19 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1386 |
evidence_text += """CITATION INSTRUCTIONS:
|
| 1387 |
1. IMPORTANT: Provide a direct answer first before asking follow-up questions. Even with limited information, give your best assessment.
|
| 1388 |
2. You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
|
| 1389 |
-
3. When citing information from these articles, use
|
| 1390 |
-
|
| 1391 |
-
|
|
|
|
|
|
|
| 1392 |
|
| 1393 |
-
|
| 1394 |
-
|
| 1395 |
4. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
|
| 1396 |
5. When multiple sources support a claim, cite all of them for stronger evidence.
|
| 1397 |
-
|
| 1398 |
-
|
|
|
|
| 1399 |
7. If the abstracts have conflicting information, acknowledge this and present both perspectives with citations.
|
| 1400 |
8. Use the most recent sources when available, especially for treatment recommendations.
|
| 1401 |
9. If full text is available (marked as "Open Access" or "Full Text Available"), prioritize information from those sources as they contain more complete data.
|
|
@@ -1421,13 +1442,17 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1421 |
**Priority 2: Follow-up Questions**
|
| 1422 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1423 |
|
|
|
|
|
|
|
| 1424 |
**Main Response Structure:**
|
| 1425 |
1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
|
| 1426 |
2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
|
| 1427 |
3. Recommendations for a treatment plan or next steps.
|
| 1428 |
-
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using
|
| 1429 |
-
|
| 1430 |
-
|
|
|
|
|
|
|
| 1431 |
|
| 1432 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1433 |
|
|
@@ -1436,18 +1461,17 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1436 |
Do NOT start the first question with asterisks (**). Format each question properly with just a number.
|
| 1437 |
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1438 |
Do NOT start the first point with asterisks (**). Format each bullet point properly.
|
| 1439 |
-
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1440 |
-
|
| 1441 |
-
|
| 1442 |
-
- DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1443 |
-
URL: https://doi.org/10.xxxx/yyyy
|
| 1444 |
|
| 1445 |
**IMPORTANT FORMATTING NOTES:**
|
| 1446 |
-
1.
|
| 1447 |
-
2.
|
| 1448 |
-
3.
|
| 1449 |
-
4.
|
| 1450 |
-
5.
|
|
|
|
| 1451 |
|
| 1452 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1453 |
"""
|
|
|
|
| 147 |
for snippet in evidence_snippets:
|
| 148 |
if source_id_match == snippet["id"]:
|
| 149 |
source_map[source_id_match] = {
|
| 150 |
+
"id": snippet["id"],
|
| 151 |
+
"title": snippet["title"].strip(),
|
| 152 |
+
"url": snippet["url"],
|
| 153 |
"citation": snippet["citation"],
|
| 154 |
"pmid": snippet.get("pmid", ""),
|
| 155 |
"doi": snippet.get("doi", "")
|
| 156 |
+
}
|
| 157 |
break
|
| 158 |
|
| 159 |
+
# Create a numbered citation system
|
| 160 |
+
numbered_sources = {}
|
| 161 |
+
citation_number = 1
|
| 162 |
+
citation_map = {} # Maps original citation keys to numbers
|
| 163 |
+
|
| 164 |
+
# First create a numbering system for all sources
|
| 165 |
+
for key in source_map.keys():
|
| 166 |
+
citation_map[key] = citation_number
|
| 167 |
+
numbered_sources[citation_number] = source_map[key]
|
| 168 |
+
citation_number += 1
|
| 169 |
+
|
| 170 |
+
# Replace citations with numbered format
|
| 171 |
linked_text = text
|
| 172 |
+
for source_key, number in citation_map.items():
|
| 173 |
+
source_data = source_map[source_key]
|
| 174 |
+
safe_key = re.escape(source_key)
|
| 175 |
+
pattern = f"\\[{safe_key}\\]"
|
| 176 |
+
|
| 177 |
+
# Create a colored, clickable numbered reference
|
| 178 |
+
colored_ref = f"<span style='color:#3366cc;'>[{number}]</span>"
|
| 179 |
+
replacement = f"<a href='{source_data['url']}' target='_blank'>{colored_ref}</a>"
|
| 180 |
+
|
| 181 |
+
# Replace all instances of this citation with the numbered format
|
| 182 |
+
linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
|
| 183 |
+
|
| 184 |
+
# Handle any remaining source placeholders
|
| 185 |
+
if "source_id" in source_matches and "source_id" not in citation_map:
|
| 186 |
+
if evidence_snippets:
|
| 187 |
+
snippet = evidence_snippets[0]
|
| 188 |
+
next_number = len(numbered_sources) + 1
|
| 189 |
+
colored_ref = f"<span style='color:#3366cc;'>[{next_number}]</span>"
|
| 190 |
+
replacement = f"<a href='{snippet['url']}' target='_blank'>{colored_ref}</a>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
|
| 192 |
+
|
| 193 |
+
# Add to numbered sources
|
| 194 |
+
numbered_sources[next_number] = {
|
| 195 |
+
"id": snippet["id"],
|
| 196 |
+
"title": snippet["title"].strip(),
|
| 197 |
+
"url": snippet["url"],
|
| 198 |
+
"citation": snippet["citation"],
|
| 199 |
+
"pmid": snippet.get("pmid", ""),
|
| 200 |
+
"doi": snippet.get("doi", "")
|
| 201 |
+
}
|
| 202 |
|
| 203 |
# Final fallback for any remaining placeholders
|
| 204 |
+
linked_text = re.sub(r'\[source_id\]', "<span style='color:#999999;'>[?]</span>", linked_text)
|
| 205 |
+
linked_text = re.sub(r'\[PMID:(\d+)\]', r"<span style='color:#999999;'>[?]</span>", linked_text)
|
| 206 |
+
linked_text = re.sub(r'\[DOI:(10\.\d+\/[^\]]+)\]', r"<span style='color:#999999;'>[?]</span>", linked_text)
|
| 207 |
|
| 208 |
+
# Now update source_map to use the numbered format for the sources section
|
| 209 |
+
numbered_source_map = {}
|
| 210 |
+
for number, data in numbered_sources.items():
|
| 211 |
+
numbered_source_map[str(number)] = data
|
| 212 |
+
|
| 213 |
+
return linked_text, numbered_source_map
|
| 214 |
|
| 215 |
# Implement PubMed API integration for medical evidence retrieval
|
| 216 |
def fetch_from_pubmed_api(query, max_results=3, api_key=None):
|
|
|
|
| 1287 |
sources_text = sources_match.group(2).strip()
|
| 1288 |
# Split into individual sources
|
| 1289 |
if '\n' in sources_text:
|
| 1290 |
+
# Parse each line as a potential source
|
| 1291 |
+
source_lines = [item.strip() for item in sources_text.split('\n') if item.strip()]
|
| 1292 |
+
|
| 1293 |
+
# Process source lines to adapt to new numbered format
|
| 1294 |
+
formatted_sources = []
|
| 1295 |
+
for line in source_lines:
|
| 1296 |
+
# Look for numbered source pattern: [1] or 1. or similar
|
| 1297 |
+
if re.match(r'^\d+[\.\)]|^\[\d+\]', line):
|
| 1298 |
+
formatted_sources.append(line)
|
| 1299 |
+
else:
|
| 1300 |
+
# If no number detected, just add the source
|
| 1301 |
+
formatted_sources.append(line)
|
| 1302 |
+
|
| 1303 |
+
parsed["sources"] = formatted_sources
|
| 1304 |
else:
|
| 1305 |
parsed["sources"] = [sources_text]
|
| 1306 |
|
|
|
|
| 1328 |
|
| 1329 |
parsed["main_response"] = '\n'.join(main_response_lines)
|
| 1330 |
|
| 1331 |
+
# Extract citations in the text - both numbered [1] and PMID/DOI formats
|
| 1332 |
+
# Standard citation formats
|
| 1333 |
+
pmid_doi_citation_matches = re.findall(r'\[(PMID|DOI):([\w\d:\.\/]+)\]', response_text)
|
| 1334 |
+
for match in pmid_doi_citation_matches:
|
| 1335 |
+
citation = f"{match[0]}:{match[1]}"
|
| 1336 |
if citation not in parsed["sources"]:
|
| 1337 |
parsed["sources"].append(citation)
|
| 1338 |
+
|
| 1339 |
+
# Numbered citations like [1]
|
| 1340 |
+
numbered_citation_matches = re.findall(r'\[(\d+)\]', response_text)
|
| 1341 |
+
for num in numbered_citation_matches:
|
| 1342 |
+
if num not in parsed["sources"]:
|
| 1343 |
+
parsed["sources"].append(num)
|
| 1344 |
|
| 1345 |
return parsed
|
| 1346 |
|
|
|
|
| 1404 |
evidence_text += """CITATION INSTRUCTIONS:
|
| 1405 |
1. IMPORTANT: Provide a direct answer first before asking follow-up questions. Even with limited information, give your best assessment.
|
| 1406 |
2. You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
|
| 1407 |
+
3. When citing information from these articles, use NUMBERED references [1], [2], [3], etc.
|
| 1408 |
+
|
| 1409 |
+
Example: "Recent studies have shown improved outcomes with early intervention [1]."
|
| 1410 |
+
Example: "Current guidelines recommend a multidisciplinary approach [2]."
|
| 1411 |
+
Example: "This approach is supported by multiple studies [1][3]."
|
| 1412 |
|
| 1413 |
+
DO NOT use formats like [PMID:123456] or [DOI:10.xxxx/yyyy] in the main text.
|
| 1414 |
+
|
| 1415 |
4. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
|
| 1416 |
5. When multiple sources support a claim, cite all of them for stronger evidence.
|
| 1417 |
+
6. Include full citations in your Sources section with the format:
|
| 1418 |
+
[1] PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1419 |
+
[2] DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1420 |
7. If the abstracts have conflicting information, acknowledge this and present both perspectives with citations.
|
| 1421 |
8. Use the most recent sources when available, especially for treatment recommendations.
|
| 1422 |
9. If full text is available (marked as "Open Access" or "Full Text Available"), prioritize information from those sources as they contain more complete data.
|
|
|
|
| 1442 |
**Priority 2: Follow-up Questions**
|
| 1443 |
After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
|
| 1444 |
|
| 1445 |
+
**IMPORTANT CITATION FORMAT CHANGE: Use numbered references [1], [2], [3] instead of PMID/DOI format**
|
| 1446 |
+
|
| 1447 |
**Main Response Structure:**
|
| 1448 |
1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
|
| 1449 |
2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
|
| 1450 |
3. Recommendations for a treatment plan or next steps.
|
| 1451 |
+
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using numbered references [1], [2], [3].
|
| 1452 |
+
Each time you use information from a source, cite it with [1], [2], etc.
|
| 1453 |
+
|
| 1454 |
+
Example: "Recent studies have shown improved outcomes with early intervention [1]."
|
| 1455 |
+
Example: "This approach is supported by multiple studies [1][2]."
|
| 1456 |
|
| 1457 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1458 |
|
|
|
|
| 1461 |
Do NOT start the first question with asterisks (**). Format each question properly with just a number.
|
| 1462 |
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1463 |
Do NOT start the first point with asterisks (**). Format each bullet point properly.
|
| 1464 |
+
- **Sources**: A numbered list of all references cited in your main response (2-3 sources), formatted as:
|
| 1465 |
+
[1] PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1466 |
+
[2] DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
|
|
|
|
|
|
| 1467 |
|
| 1468 |
**IMPORTANT FORMATTING NOTES:**
|
| 1469 |
+
1. Use numbered citations [1], [2], [3] instead of [PMID:12345678] or [DOI:10.xxxx/yyyy] in the main text.
|
| 1470 |
+
2. In the Sources section, include the full citation details with the PMID or DOI.
|
| 1471 |
+
3. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
|
| 1472 |
+
4. Number the follow-up questions starting from 1, not from any other number.
|
| 1473 |
+
5. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
|
| 1474 |
+
6. Make sure all bullet points and numbered items are clean, with no markdown formatting.
|
| 1475 |
|
| 1476 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1477 |
"""
|