Update src/model.py
Browse files- src/model.py +43 -57
src/model.py
CHANGED
|
@@ -117,7 +117,7 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 117 |
|
| 118 |
# Process DOI citations
|
| 119 |
for doi in doi_matches:
|
| 120 |
-
for snippet in evidence_snippets:
|
| 121 |
# Check if this is a direct DOI match
|
| 122 |
if 'doi' in snippet and snippet['doi'] == doi:
|
| 123 |
source_map[f"DOI:{doi}"] = {
|
|
@@ -131,13 +131,13 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 131 |
# Also check the ID field which might contain DOI
|
| 132 |
elif snippet.get("id") == f"DOI:{doi}":
|
| 133 |
source_map[f"DOI:{doi}"] = {
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
"citation": snippet["citation"],
|
| 138 |
"doi": doi
|
| 139 |
-
|
| 140 |
-
|
| 141 |
|
| 142 |
# Process other citation formats for backward compatibility
|
| 143 |
for source_id_match in source_matches:
|
|
@@ -145,15 +145,15 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 145 |
for snippet in evidence_snippets:
|
| 146 |
if source_id_match == snippet["id"]:
|
| 147 |
source_map[source_id_match] = {
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
"citation": snippet["citation"],
|
| 152 |
"pmid": snippet.get("pmid", ""),
|
| 153 |
"doi": snippet.get("doi", "")
|
| 154 |
-
|
| 155 |
break
|
| 156 |
-
|
| 157 |
# Replace PMID citations with links
|
| 158 |
linked_text = text
|
| 159 |
for pmid_key in [f"PMID:{pmid}" for pmid in pmid_matches]:
|
|
@@ -184,10 +184,10 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 184 |
# Replace other citation formats
|
| 185 |
for source_id_key, source_data in source_map.items():
|
| 186 |
if not (source_id_key.startswith("PMID:") or source_id_key.startswith("DOI:")):
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
|
| 192 |
# Handle generic [source_id] placeholder
|
| 193 |
if "source_id" in source_matches:
|
|
@@ -204,7 +204,7 @@ def extract_and_link_sources(text, evidence_snippets):
|
|
| 204 |
"doi": snippet.get("doi", "")
|
| 205 |
}
|
| 206 |
replacement = f"[{snippet['title']}]({snippet['url']})"
|
| 207 |
-
|
| 208 |
|
| 209 |
# Final fallback for any remaining placeholders
|
| 210 |
linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
|
|
@@ -1323,55 +1323,41 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1323 |
# Add instructions for structured output
|
| 1324 |
if use_rag:
|
| 1325 |
output_instructions = """
|
| 1326 |
-
Please structure your response
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
|
| 1330 |
-
|
| 1331 |
-
|
| 1332 |
-
|
|
|
|
| 1333 |
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
|
| 1334 |
• [PMID:123456] format for PubMed articles
|
| 1335 |
• [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
|
| 1336 |
|
| 1337 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1338 |
-
|
| 1339 |
-
**
|
| 1340 |
-
|
| 1341 |
-
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
Bullet points detailing your clinical reasoning.
|
| 1346 |
-
|
| 1347 |
-
**Sources**
|
| 1348 |
-
List all references cited in your main response (2-3 sources), formatted as:
|
| 1349 |
-
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1350 |
-
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
| 1351 |
-
- DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1352 |
-
URL: https://doi.org/10.xxxx/yyyy
|
| 1353 |
-
|
| 1354 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1355 |
"""
|
| 1356 |
else:
|
| 1357 |
# Different instructions when RAG is disabled - no mention of sources or citations
|
| 1358 |
output_instructions = """
|
| 1359 |
-
Please structure your response
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
**
|
| 1368 |
-
|
| 1369 |
-
List 2-4 specific questions that would help you provide a more precise assessment.
|
| 1370 |
-
Format these as a numbered list.
|
| 1371 |
-
|
| 1372 |
-
**Reasoning**
|
| 1373 |
-
Bullet points detailing your clinical reasoning.
|
| 1374 |
-
|
| 1375 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1376 |
"""
|
| 1377 |
|
|
@@ -1400,13 +1386,13 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1400 |
if reasoning:
|
| 1401 |
if isinstance(reasoning, list):
|
| 1402 |
explanation = "\n".join([f"- {r}" for r in reasoning])
|
| 1403 |
-
|
| 1404 |
explanation = reasoning
|
| 1405 |
else:
|
| 1406 |
# If RAG is disabled, just parse the response without source processing
|
| 1407 |
parsed_response = parse_doctor_response(response)
|
| 1408 |
main_response = response
|
| 1409 |
-
|
| 1410 |
# Extract reasoning
|
| 1411 |
reasoning = parsed_response.get("reasoning", [])
|
| 1412 |
if reasoning:
|
|
|
|
| 117 |
|
| 118 |
# Process DOI citations
|
| 119 |
for doi in doi_matches:
|
| 120 |
+
for snippet in evidence_snippets:
|
| 121 |
# Check if this is a direct DOI match
|
| 122 |
if 'doi' in snippet and snippet['doi'] == doi:
|
| 123 |
source_map[f"DOI:{doi}"] = {
|
|
|
|
| 131 |
# Also check the ID field which might contain DOI
|
| 132 |
elif snippet.get("id") == f"DOI:{doi}":
|
| 133 |
source_map[f"DOI:{doi}"] = {
|
| 134 |
+
"id": snippet["id"],
|
| 135 |
+
"title": snippet["title"].strip(),
|
| 136 |
+
"url": snippet["url"],
|
| 137 |
"citation": snippet["citation"],
|
| 138 |
"doi": doi
|
| 139 |
+
}
|
| 140 |
+
break
|
| 141 |
|
| 142 |
# Process other citation formats for backward compatibility
|
| 143 |
for source_id_match in source_matches:
|
|
|
|
| 145 |
for snippet in evidence_snippets:
|
| 146 |
if source_id_match == snippet["id"]:
|
| 147 |
source_map[source_id_match] = {
|
| 148 |
+
"id": snippet["id"],
|
| 149 |
+
"title": snippet["title"].strip(),
|
| 150 |
+
"url": snippet["url"],
|
| 151 |
"citation": snippet["citation"],
|
| 152 |
"pmid": snippet.get("pmid", ""),
|
| 153 |
"doi": snippet.get("doi", "")
|
| 154 |
+
}
|
| 155 |
break
|
| 156 |
+
|
| 157 |
# Replace PMID citations with links
|
| 158 |
linked_text = text
|
| 159 |
for pmid_key in [f"PMID:{pmid}" for pmid in pmid_matches]:
|
|
|
|
| 184 |
# Replace other citation formats
|
| 185 |
for source_id_key, source_data in source_map.items():
|
| 186 |
if not (source_id_key.startswith("PMID:") or source_id_key.startswith("DOI:")):
|
| 187 |
+
safe_id = re.escape(source_id_key)
|
| 188 |
+
pattern = f"\\[{safe_id}\\]"
|
| 189 |
+
replacement = f"[{source_data['title']}]({source_data['url']})"
|
| 190 |
+
linked_text = re.sub(pattern, replacement, linked_text)
|
| 191 |
|
| 192 |
# Handle generic [source_id] placeholder
|
| 193 |
if "source_id" in source_matches:
|
|
|
|
| 204 |
"doi": snippet.get("doi", "")
|
| 205 |
}
|
| 206 |
replacement = f"[{snippet['title']}]({snippet['url']})"
|
| 207 |
+
linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
|
| 208 |
|
| 209 |
# Final fallback for any remaining placeholders
|
| 210 |
linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
|
|
|
|
| 1323 |
# Add instructions for structured output
|
| 1324 |
if use_rag:
|
| 1325 |
output_instructions = """
|
| 1326 |
+
Please structure your response clearly.
|
| 1327 |
+
**Priority 1: Ask Clarifying Questions**
|
| 1328 |
+
If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
|
| 1329 |
+
**Priority 2: Main Response (After Clarification)**
|
| 1330 |
+
Once sufficient information is available (either initially or after asking questions), provide:
|
| 1331 |
+
1. A direct answer to the patient's concerns.
|
| 1332 |
+
2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
|
| 1333 |
+
3. Recommendations for a treatment plan or next steps.
|
| 1334 |
4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
|
| 1335 |
• [PMID:123456] format for PubMed articles
|
| 1336 |
• [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
|
| 1337 |
|
| 1338 |
Use no more than 3 sources and no fewer than 2 sources.
|
| 1339 |
+
**After your main response, ALWAYS include these sections:**
|
| 1340 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 1341 |
+
- **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
|
| 1342 |
+
- PMID: 12345678 - Author et al. (Year). Title. Journal.
|
| 1343 |
+
URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
|
| 1344 |
+
- DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
|
| 1345 |
+
URL: https://doi.org/10.xxxx/yyyy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1346 |
IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
|
| 1347 |
"""
|
| 1348 |
else:
|
| 1349 |
# Different instructions when RAG is disabled - no mention of sources or citations
|
| 1350 |
output_instructions = """
|
| 1351 |
+
Please structure your response clearly.
|
| 1352 |
+
**Priority 1: Ask Clarifying Questions**
|
| 1353 |
+
If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
|
| 1354 |
+
**Priority 2: Main Response (After Clarification)**
|
| 1355 |
+
Once sufficient information is available (either initially or after asking questions), provide:
|
| 1356 |
+
1. A direct answer to the patient's concerns.
|
| 1357 |
+
2. If appropriate, a clear diagnosis or differential diagnosis.
|
| 1358 |
+
3. Recommendations for a treatment plan or next steps.
|
| 1359 |
+
**After your main response, ALWAYS include this section:**
|
| 1360 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1361 |
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 1362 |
"""
|
| 1363 |
|
|
|
|
| 1386 |
if reasoning:
|
| 1387 |
if isinstance(reasoning, list):
|
| 1388 |
explanation = "\n".join([f"- {r}" for r in reasoning])
|
| 1389 |
+
else:
|
| 1390 |
explanation = reasoning
|
| 1391 |
else:
|
| 1392 |
# If RAG is disabled, just parse the response without source processing
|
| 1393 |
parsed_response = parse_doctor_response(response)
|
| 1394 |
main_response = response
|
| 1395 |
+
|
| 1396 |
# Extract reasoning
|
| 1397 |
reasoning = parsed_response.get("reasoning", [])
|
| 1398 |
if reasoning:
|