Update src/model.py
Browse files- src/model.py +167 -64
src/model.py
CHANGED
|
@@ -692,7 +692,7 @@ def fetch_from_core_api(query, max_results=2, api_key=None):
|
|
| 692 |
return []
|
| 693 |
|
| 694 |
# Enhanced PubMed search function
|
| 695 |
-
def enhanced_search_pubmed(query, retmax=
|
| 696 |
"""
|
| 697 |
Enhanced PubMed search using E-utilities API with improved parsing and error handling.
|
| 698 |
|
|
@@ -857,7 +857,7 @@ def enhanced_search_pubmed(query, retmax=2, api_key=None):
|
|
| 857 |
return []
|
| 858 |
|
| 859 |
# Europe PMC search function
|
| 860 |
-
def search_europe_pmc(query, max_results=
|
| 861 |
"""
|
| 862 |
Search Europe PMC for biomedical articles, with a focus on retrieving full text when available.
|
| 863 |
Europe PMC provides more open access content than standard PubMed.
|
|
@@ -865,6 +865,8 @@ def search_europe_pmc(query, max_results=2):
|
|
| 865 |
Args:
|
| 866 |
query (str): Search query string
|
| 867 |
max_results (int): Maximum number of results to return
|
|
|
|
|
|
|
| 868 |
|
| 869 |
Returns:
|
| 870 |
list: List of article dictionaries with title, abstract, PMID, URL, and full text URL
|
|
@@ -878,21 +880,29 @@ def search_europe_pmc(query, max_results=2):
|
|
| 878 |
# Europe PMC API base URL
|
| 879 |
base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 881 |
# Search parameters - specifically looking for open access when possible
|
| 882 |
search_params = {
|
| 883 |
-
"query": f"({
|
| 884 |
"format": "json",
|
| 885 |
"pageSize": max_results,
|
| 886 |
"resultType": "core" # Get core metadata
|
| 887 |
}
|
| 888 |
|
| 889 |
-
print(f"Searching Europe PMC with query: {
|
| 890 |
response = requests.get(base_url, params=search_params)
|
| 891 |
|
| 892 |
if response.status_code != 200:
|
| 893 |
print(f"Europe PMC search error: {response.status_code}")
|
| 894 |
# Try again without open access restriction if no results
|
| 895 |
-
search_params["query"] =
|
| 896 |
response = requests.get(base_url, params=search_params)
|
| 897 |
if response.status_code != 200:
|
| 898 |
return []
|
|
@@ -903,6 +913,10 @@ def search_europe_pmc(query, max_results=2):
|
|
| 903 |
hit_count = data.get("hitCount", 0)
|
| 904 |
if hit_count == 0:
|
| 905 |
print("No Europe PMC results found")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
return []
|
| 907 |
|
| 908 |
# Process results
|
|
@@ -1007,8 +1021,9 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1007 |
"""
|
| 1008 |
Fetch medical evidence using a multi-source approach:
|
| 1009 |
1. Search with extracted medical terms in PubMed
|
| 1010 |
-
2. Search with
|
| 1011 |
-
3. Search
|
|
|
|
| 1012 |
|
| 1013 |
This provides better coverage and relevance from multiple sources.
|
| 1014 |
|
|
@@ -1024,42 +1039,51 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1024 |
|
| 1025 |
# Step 1: Extract medical terms from the query
|
| 1026 |
medical_terms = extract_medical_terms(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
|
| 1028 |
# Only use extracted terms if we found any
|
| 1029 |
-
if
|
| 1030 |
-
# Join terms with commas for
|
| 1031 |
terms_query = ", ".join(medical_terms)
|
| 1032 |
print(f"Searching PubMed with extracted terms: {terms_query}")
|
| 1033 |
|
| 1034 |
-
# Search with extracted terms
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
|
|
|
|
|
|
|
|
|
| 1039 |
|
| 1040 |
-
#
|
| 1041 |
-
# Increase from 2 to 3 results from this search
|
| 1042 |
print(f"Searching PubMed with full query")
|
| 1043 |
-
|
| 1044 |
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
europepmc_results = search_europe_pmc(query, max_results=2)
|
| 1048 |
|
| 1049 |
-
# Step
|
| 1050 |
all_results = []
|
| 1051 |
seen_pmids = set()
|
| 1052 |
seen_dois = set()
|
| 1053 |
|
| 1054 |
# Process results in order of preference:
|
| 1055 |
-
# 1. Terms search from PubMed
|
| 1056 |
-
# 2. Europe PMC
|
| 1057 |
-
# 3. Full query
|
|
|
|
| 1058 |
|
| 1059 |
# Add results from terms search first (often more relevant)
|
| 1060 |
-
for result in
|
| 1061 |
-
pmid = result
|
| 1062 |
-
if pmid not in seen_pmids and len(all_results) < max_results:
|
| 1063 |
seen_pmids.add(pmid)
|
| 1064 |
# Format for compatibility with existing code
|
| 1065 |
all_results.append({
|
|
@@ -1068,23 +1092,19 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1068 |
"text": result["abstract"],
|
| 1069 |
"citation": result["citation"],
|
| 1070 |
"url": result["url"],
|
| 1071 |
-
"source_type": "PubMed" + (" (Full Text Available)" if result
|
| 1072 |
-
"is_open_access": result
|
| 1073 |
"pmid": pmid # Keep the original PMID for direct access
|
| 1074 |
})
|
| 1075 |
|
| 1076 |
-
# Add Europe PMC results
|
| 1077 |
-
for result in
|
| 1078 |
# Some Europe PMC articles may not have a PMID, use DOI as fallback
|
| 1079 |
pmid = result.get("pmid")
|
| 1080 |
doi = result.get("doi")
|
| 1081 |
|
| 1082 |
-
# Skip if we've already seen this article via PMID
|
| 1083 |
-
if pmid and pmid in seen_pmids:
|
| 1084 |
-
continue
|
| 1085 |
-
|
| 1086 |
-
# Skip if we've already seen this article via DOI
|
| 1087 |
-
if doi and doi in seen_dois:
|
| 1088 |
continue
|
| 1089 |
|
| 1090 |
# Skip if we've reached our max
|
|
@@ -1097,12 +1117,12 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1097 |
if doi:
|
| 1098 |
seen_dois.add(doi)
|
| 1099 |
|
| 1100 |
-
#
|
| 1101 |
-
|
| 1102 |
|
| 1103 |
# Add to results
|
| 1104 |
all_results.append({
|
| 1105 |
-
"id":
|
| 1106 |
"title": result["title"],
|
| 1107 |
"text": result["abstract"],
|
| 1108 |
"citation": result["citation"],
|
|
@@ -1110,33 +1130,62 @@ def fetch_medical_evidence(query, max_results=3):
|
|
| 1110 |
"source_type": result["source_type"],
|
| 1111 |
"is_open_access": result["is_open_access"],
|
| 1112 |
"pmid": pmid, # May be None
|
| 1113 |
-
"doi": doi #
|
| 1114 |
})
|
| 1115 |
|
| 1116 |
-
#
|
| 1117 |
-
for result in
|
| 1118 |
-
pmid = result
|
| 1119 |
-
if pmid not in seen_pmids and len(all_results) < max_results:
|
| 1120 |
seen_pmids.add(pmid)
|
| 1121 |
-
# Format for compatibility with existing code
|
| 1122 |
all_results.append({
|
| 1123 |
"id": f"PMID:{pmid}",
|
| 1124 |
"title": result["title"],
|
| 1125 |
"text": result["abstract"],
|
| 1126 |
"citation": result["citation"],
|
| 1127 |
"url": result["url"],
|
| 1128 |
-
"source_type": "PubMed" + (" (Full Text Available)" if result
|
| 1129 |
-
"is_open_access": result
|
| 1130 |
-
"pmid": pmid
|
| 1131 |
})
|
| 1132 |
|
| 1133 |
-
#
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1138 |
|
| 1139 |
-
|
|
|
|
| 1140 |
|
| 1141 |
# Function to parse doctor agent responses
|
| 1142 |
def parse_doctor_response(response_text):
|
|
@@ -1223,13 +1272,21 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1223 |
|
| 1224 |
# Format evidence for the model
|
| 1225 |
if evidence_snippets:
|
| 1226 |
-
evidence_text = "MEDICAL EVIDENCE FROM
|
| 1227 |
|
| 1228 |
for i, snippet in enumerate(evidence_snippets):
|
| 1229 |
-
# Format the evidence with clear PMID for citation
|
| 1230 |
pmid = snippet.get("pmid", "")
|
|
|
|
|
|
|
| 1231 |
evidence_text += f"--- ARTICLE {i+1} ---\n"
|
| 1232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1233 |
evidence_text += f"Title: {snippet['title']}\n"
|
| 1234 |
evidence_text += f"Source: {snippet['source_type']}\n"
|
| 1235 |
evidence_text += f"Content: {snippet['text']}\n"
|
|
@@ -1258,7 +1315,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
|
| 1258 |
msgs.append({"role": "system", "content": evidence_text})
|
| 1259 |
else:
|
| 1260 |
# If no evidence was found, inform the model
|
| 1261 |
-
no_evidence_msg = ("Note: No specific medical evidence was found in PubMed
|
| 1262 |
"Please rely on your general medical knowledge and be sure to recommend "
|
| 1263 |
"appropriate diagnostic steps and medical consultation.")
|
| 1264 |
msgs.append({"role": "system", "content": no_evidence_msg})
|
|
@@ -1355,7 +1412,7 @@ def run_consultation(use_rag=True):
|
|
| 1355 |
print("Type 'exit' to end or 'next' for a new case.\n")
|
| 1356 |
|
| 1357 |
if use_rag:
|
| 1358 |
-
print("Using medical evidence from: PubMed, PMC,
|
| 1359 |
print("Sources marked with 🔓 provide full text access\n")
|
| 1360 |
|
| 1361 |
consultation_id = str(uuid.uuid4())[:8]
|
|
@@ -1506,8 +1563,8 @@ SEARCH_PUBMED_SCHEMA = {
|
|
| 1506 |
},
|
| 1507 |
"retmax": {
|
| 1508 |
"type": "integer",
|
| 1509 |
-
"description": "Maximum number of results to return (default:
|
| 1510 |
-
"default":
|
| 1511 |
},
|
| 1512 |
"api_key": {
|
| 1513 |
"type": "string",
|
|
@@ -1530,6 +1587,52 @@ EXAMPLE_FUNCTION_CALL = {
|
|
| 1530 |
"name": "search_pubmed",
|
| 1531 |
"arguments": {
|
| 1532 |
"query": "headaches, fatigue, dizziness",
|
| 1533 |
-
"retmax":
|
| 1534 |
}
|
| 1535 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 692 |
return []
|
| 693 |
|
| 694 |
# Enhanced PubMed search function
|
| 695 |
+
def enhanced_search_pubmed(query, retmax=3, api_key=None):
|
| 696 |
"""
|
| 697 |
Enhanced PubMed search using E-utilities API with improved parsing and error handling.
|
| 698 |
|
|
|
|
| 857 |
return []
|
| 858 |
|
| 859 |
# Europe PMC search function
|
| 860 |
+
def search_europe_pmc(query, max_results=3, use_extracted_terms=False, extracted_terms=None):
|
| 861 |
"""
|
| 862 |
Search Europe PMC for biomedical articles, with a focus on retrieving full text when available.
|
| 863 |
Europe PMC provides more open access content than standard PubMed.
|
|
|
|
| 865 |
Args:
|
| 866 |
query (str): Search query string
|
| 867 |
max_results (int): Maximum number of results to return
|
| 868 |
+
use_extracted_terms (bool): Whether to use the extracted medical terms
|
| 869 |
+
extracted_terms (list): List of extracted medical terms from the query
|
| 870 |
|
| 871 |
Returns:
|
| 872 |
list: List of article dictionaries with title, abstract, PMID, URL, and full text URL
|
|
|
|
| 880 |
# Europe PMC API base URL
|
| 881 |
base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 882 |
|
| 883 |
+
# Construct search query based on parameters
|
| 884 |
+
search_query = query
|
| 885 |
+
if use_extracted_terms and extracted_terms and len(extracted_terms) > 0:
|
| 886 |
+
# Join terms with AND for better search
|
| 887 |
+
terms_query = " AND ".join(extracted_terms)
|
| 888 |
+
search_query = terms_query
|
| 889 |
+
print(f"Searching Europe PMC with extracted terms: {terms_query}")
|
| 890 |
+
|
| 891 |
# Search parameters - specifically looking for open access when possible
|
| 892 |
search_params = {
|
| 893 |
+
"query": f"({search_query}) AND OPEN_ACCESS:y", # Prioritize open access
|
| 894 |
"format": "json",
|
| 895 |
"pageSize": max_results,
|
| 896 |
"resultType": "core" # Get core metadata
|
| 897 |
}
|
| 898 |
|
| 899 |
+
print(f"Searching Europe PMC with query: {search_query}")
|
| 900 |
response = requests.get(base_url, params=search_params)
|
| 901 |
|
| 902 |
if response.status_code != 200:
|
| 903 |
print(f"Europe PMC search error: {response.status_code}")
|
| 904 |
# Try again without open access restriction if no results
|
| 905 |
+
search_params["query"] = search_query
|
| 906 |
response = requests.get(base_url, params=search_params)
|
| 907 |
if response.status_code != 200:
|
| 908 |
return []
|
|
|
|
| 913 |
hit_count = data.get("hitCount", 0)
|
| 914 |
if hit_count == 0:
|
| 915 |
print("No Europe PMC results found")
|
| 916 |
+
# If we used extracted terms and got no results, try with the original query
|
| 917 |
+
if use_extracted_terms and extracted_terms:
|
| 918 |
+
print("Retrying Europe PMC search with original query")
|
| 919 |
+
return search_europe_pmc(query, max_results, False, None)
|
| 920 |
return []
|
| 921 |
|
| 922 |
# Process results
|
|
|
|
| 1021 |
"""
|
| 1022 |
Fetch medical evidence using a multi-source approach:
|
| 1023 |
1. Search with extracted medical terms in PubMed
|
| 1024 |
+
2. Search with extracted medical terms in Europe PMC
|
| 1025 |
+
3. Search with the original query in PubMed
|
| 1026 |
+
4. Search with the original query in Europe PMC
|
| 1027 |
|
| 1028 |
This provides better coverage and relevance from multiple sources.
|
| 1029 |
|
|
|
|
| 1039 |
|
| 1040 |
# Step 1: Extract medical terms from the query
|
| 1041 |
medical_terms = extract_medical_terms(query)
|
| 1042 |
+
has_medical_terms = len(medical_terms) > 0
|
| 1043 |
+
|
| 1044 |
+
# Initialize results containers
|
| 1045 |
+
terms_pubmed_results = []
|
| 1046 |
+
full_pubmed_results = []
|
| 1047 |
+
terms_europepmc_results = []
|
| 1048 |
+
full_europepmc_results = []
|
| 1049 |
|
| 1050 |
# Only use extracted terms if we found any
|
| 1051 |
+
if has_medical_terms:
|
| 1052 |
+
# Join terms with commas for PubMed
|
| 1053 |
terms_query = ", ".join(medical_terms)
|
| 1054 |
print(f"Searching PubMed with extracted terms: {terms_query}")
|
| 1055 |
|
| 1056 |
+
# Search PubMed with extracted terms
|
| 1057 |
+
terms_pubmed_results = enhanced_search_pubmed(terms_query, retmax=2, api_key=pubmed_api_key)
|
| 1058 |
+
|
| 1059 |
+
# Search Europe PMC with extracted terms
|
| 1060 |
+
print(f"Searching Europe PMC with extracted terms")
|
| 1061 |
+
terms_europepmc_results = search_europe_pmc(query, max_results=2,
|
| 1062 |
+
use_extracted_terms=True,
|
| 1063 |
+
extracted_terms=medical_terms)
|
| 1064 |
|
| 1065 |
+
# Search with the full original query in both sources
|
|
|
|
| 1066 |
print(f"Searching PubMed with full query")
|
| 1067 |
+
full_pubmed_results = enhanced_search_pubmed(query, retmax=2, api_key=pubmed_api_key)
|
| 1068 |
|
| 1069 |
+
print(f"Searching Europe PMC with full query")
|
| 1070 |
+
full_europepmc_results = search_europe_pmc(query, max_results=2)
|
|
|
|
| 1071 |
|
| 1072 |
+
# Step 3: Combine results, ensuring no duplicates by PMID or DOI
|
| 1073 |
all_results = []
|
| 1074 |
seen_pmids = set()
|
| 1075 |
seen_dois = set()
|
| 1076 |
|
| 1077 |
# Process results in order of preference:
|
| 1078 |
+
# 1. Terms search from PubMed (if available)
|
| 1079 |
+
# 2. Terms search from Europe PMC (if available)
|
| 1080 |
+
# 3. Full query from PubMed
|
| 1081 |
+
# 4. Full query from Europe PMC
|
| 1082 |
|
| 1083 |
# Add results from terms search first (often more relevant)
|
| 1084 |
+
for result in terms_pubmed_results:
|
| 1085 |
+
pmid = result.get("pmid")
|
| 1086 |
+
if pmid and pmid not in seen_pmids and len(all_results) < max_results:
|
| 1087 |
seen_pmids.add(pmid)
|
| 1088 |
# Format for compatibility with existing code
|
| 1089 |
all_results.append({
|
|
|
|
| 1092 |
"text": result["abstract"],
|
| 1093 |
"citation": result["citation"],
|
| 1094 |
"url": result["url"],
|
| 1095 |
+
"source_type": "PubMed" + (" (Full Text Available)" if result.get("has_full_text") else ""),
|
| 1096 |
+
"is_open_access": result.get("has_full_text", False),
|
| 1097 |
"pmid": pmid # Keep the original PMID for direct access
|
| 1098 |
})
|
| 1099 |
|
| 1100 |
+
# Add Europe PMC terms results
|
| 1101 |
+
for result in terms_europepmc_results:
|
| 1102 |
# Some Europe PMC articles may not have a PMID, use DOI as fallback
|
| 1103 |
pmid = result.get("pmid")
|
| 1104 |
doi = result.get("doi")
|
| 1105 |
|
| 1106 |
+
# Skip if we've already seen this article via PMID or DOI
|
| 1107 |
+
if (pmid and pmid in seen_pmids) or (doi and doi in seen_dois):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1108 |
continue
|
| 1109 |
|
| 1110 |
# Skip if we've reached our max
|
|
|
|
| 1117 |
if doi:
|
| 1118 |
seen_dois.add(doi)
|
| 1119 |
|
| 1120 |
+
# Determine ID format (prefer PMID if available, fall back to DOI)
|
| 1121 |
+
article_id = f"PMID:{pmid}" if pmid else (f"DOI:{doi}" if doi else str(uuid.uuid4())[:8])
|
| 1122 |
|
| 1123 |
# Add to results
|
| 1124 |
all_results.append({
|
| 1125 |
+
"id": article_id,
|
| 1126 |
"title": result["title"],
|
| 1127 |
"text": result["abstract"],
|
| 1128 |
"citation": result["citation"],
|
|
|
|
| 1130 |
"source_type": result["source_type"],
|
| 1131 |
"is_open_access": result["is_open_access"],
|
| 1132 |
"pmid": pmid, # May be None
|
| 1133 |
+
"doi": doi # May be None
|
| 1134 |
})
|
| 1135 |
|
| 1136 |
+
# Add full query PubMed results if we still need more
|
| 1137 |
+
for result in full_pubmed_results:
|
| 1138 |
+
pmid = result.get("pmid")
|
| 1139 |
+
if pmid and pmid not in seen_pmids and len(all_results) < max_results:
|
| 1140 |
seen_pmids.add(pmid)
|
|
|
|
| 1141 |
all_results.append({
|
| 1142 |
"id": f"PMID:{pmid}",
|
| 1143 |
"title": result["title"],
|
| 1144 |
"text": result["abstract"],
|
| 1145 |
"citation": result["citation"],
|
| 1146 |
"url": result["url"],
|
| 1147 |
+
"source_type": "PubMed" + (" (Full Text Available)" if result.get("has_full_text") else ""),
|
| 1148 |
+
"is_open_access": result.get("has_full_text", False),
|
| 1149 |
+
"pmid": pmid
|
| 1150 |
})
|
| 1151 |
|
| 1152 |
+
# Add full query Europe PMC results if we still need more
|
| 1153 |
+
for result in full_europepmc_results:
|
| 1154 |
+
pmid = result.get("pmid")
|
| 1155 |
+
doi = result.get("doi")
|
| 1156 |
+
|
| 1157 |
+
# Skip if we've already seen this article via PMID or DOI
|
| 1158 |
+
if (pmid and pmid in seen_pmids) or (doi and doi in seen_dois):
|
| 1159 |
+
continue
|
| 1160 |
+
|
| 1161 |
+
# Skip if we've reached our max
|
| 1162 |
+
if len(all_results) >= max_results:
|
| 1163 |
+
break
|
| 1164 |
+
|
| 1165 |
+
# Add to seen IDs
|
| 1166 |
+
if pmid:
|
| 1167 |
+
seen_pmids.add(pmid)
|
| 1168 |
+
if doi:
|
| 1169 |
+
seen_dois.add(doi)
|
| 1170 |
+
|
| 1171 |
+
# Determine ID format (prefer PMID if available, fall back to DOI)
|
| 1172 |
+
article_id = f"PMID:{pmid}" if pmid else (f"DOI:{doi}" if doi else str(uuid.uuid4())[:8])
|
| 1173 |
+
|
| 1174 |
+
# Add to results
|
| 1175 |
+
all_results.append({
|
| 1176 |
+
"id": article_id,
|
| 1177 |
+
"title": result["title"],
|
| 1178 |
+
"text": result["abstract"],
|
| 1179 |
+
"citation": result["citation"],
|
| 1180 |
+
"url": result["url"],
|
| 1181 |
+
"source_type": result["source_type"],
|
| 1182 |
+
"is_open_access": result["is_open_access"],
|
| 1183 |
+
"pmid": pmid, # May be None
|
| 1184 |
+
"doi": doi # May be None
|
| 1185 |
+
})
|
| 1186 |
|
| 1187 |
+
# Ensure we have exactly max_results results (or fewer if not enough found)
|
| 1188 |
+
return all_results[:max_results]
|
| 1189 |
|
| 1190 |
# Function to parse doctor agent responses
|
| 1191 |
def parse_doctor_response(response_text):
|
|
|
|
| 1272 |
|
| 1273 |
# Format evidence for the model
|
| 1274 |
if evidence_snippets:
|
| 1275 |
+
evidence_text = "MEDICAL EVIDENCE FROM MULTIPLE SOURCES:\n\n"
|
| 1276 |
|
| 1277 |
for i, snippet in enumerate(evidence_snippets):
|
| 1278 |
+
# Format the evidence with clear PMID or DOI for citation
|
| 1279 |
pmid = snippet.get("pmid", "")
|
| 1280 |
+
doi = snippet.get("doi", "")
|
| 1281 |
+
|
| 1282 |
evidence_text += f"--- ARTICLE {i+1} ---\n"
|
| 1283 |
+
|
| 1284 |
+
# Include the appropriate identifiers
|
| 1285 |
+
if pmid:
|
| 1286 |
+
evidence_text += f"PMID: {pmid}\n"
|
| 1287 |
+
if doi:
|
| 1288 |
+
evidence_text += f"DOI: {doi}\n"
|
| 1289 |
+
|
| 1290 |
evidence_text += f"Title: {snippet['title']}\n"
|
| 1291 |
evidence_text += f"Source: {snippet['source_type']}\n"
|
| 1292 |
evidence_text += f"Content: {snippet['text']}\n"
|
|
|
|
| 1315 |
msgs.append({"role": "system", "content": evidence_text})
|
| 1316 |
else:
|
| 1317 |
# If no evidence was found, inform the model
|
| 1318 |
+
no_evidence_msg = ("Note: No specific medical evidence was found for this query in PubMed or Europe PMC. "
|
| 1319 |
"Please rely on your general medical knowledge and be sure to recommend "
|
| 1320 |
"appropriate diagnostic steps and medical consultation.")
|
| 1321 |
msgs.append({"role": "system", "content": no_evidence_msg})
|
|
|
|
| 1412 |
print("Type 'exit' to end or 'next' for a new case.\n")
|
| 1413 |
|
| 1414 |
if use_rag:
|
| 1415 |
+
print("Using medical evidence from: PubMed, Europe PMC, and other medical databases")
|
| 1416 |
print("Sources marked with 🔓 provide full text access\n")
|
| 1417 |
|
| 1418 |
consultation_id = str(uuid.uuid4())[:8]
|
|
|
|
| 1563 |
},
|
| 1564 |
"retmax": {
|
| 1565 |
"type": "integer",
|
| 1566 |
+
"description": "Maximum number of results to return (default: 3)",
|
| 1567 |
+
"default": 3
|
| 1568 |
},
|
| 1569 |
"api_key": {
|
| 1570 |
"type": "string",
|
|
|
|
| 1587 |
"name": "search_pubmed",
|
| 1588 |
"arguments": {
|
| 1589 |
"query": "headaches, fatigue, dizziness",
|
| 1590 |
+
"retmax": 3
|
| 1591 |
}
|
| 1592 |
+
}
|
| 1593 |
+
|
| 1594 |
+
# Function to enhance medical queries using LLM
|
| 1595 |
+
def enhance_medical_query(original_query):
|
| 1596 |
+
"""
|
| 1597 |
+
Uses LLM to enhance a medical query for better search results.
|
| 1598 |
+
This function is prepared for future use but is not currently enabled.
|
| 1599 |
+
|
| 1600 |
+
Args:
|
| 1601 |
+
original_query (str): The original user query
|
| 1602 |
+
|
| 1603 |
+
Returns:
|
| 1604 |
+
str: An enhanced query optimized for medical search
|
| 1605 |
+
"""
|
| 1606 |
+
try:
|
| 1607 |
+
# System prompt for query enhancement
|
| 1608 |
+
system_prompt = """You are a medical search query optimizer.
|
| 1609 |
+
Your job is to take a user's medical question and rewrite it to be more effective for searching
|
| 1610 |
+
medical databases like PubMed and Europe PMC.
|
| 1611 |
+
|
| 1612 |
+
Guidelines:
|
| 1613 |
+
1. Extract key medical terms, conditions, symptoms, and treatments
|
| 1614 |
+
2. Use proper medical terminology where possible
|
| 1615 |
+
3. Structure the query for optimal search performance
|
| 1616 |
+
4. Return ONLY the enhanced query without explanation
|
| 1617 |
+
5. Keep the query concise but comprehensive
|
| 1618 |
+
"""
|
| 1619 |
+
|
| 1620 |
+
# Call OpenAI to enhance the query
|
| 1621 |
+
enhanced_response = openai.ChatCompletion.create(
|
| 1622 |
+
model="gpt-3.5-turbo", # Using a smaller model for speed and cost efficiency
|
| 1623 |
+
messages=[
|
| 1624 |
+
{"role": "system", "content": system_prompt},
|
| 1625 |
+
{"role": "user", "content": f"Optimize this medical query for database search: {original_query}"}
|
| 1626 |
+
],
|
| 1627 |
+
temperature=0.3,
|
| 1628 |
+
max_tokens=100
|
| 1629 |
+
)
|
| 1630 |
+
|
| 1631 |
+
enhanced_query = enhanced_response.choices[0].message['content'].strip()
|
| 1632 |
+
print(f"Enhanced query: {enhanced_query}")
|
| 1633 |
+
return enhanced_query
|
| 1634 |
+
|
| 1635 |
+
except Exception as e:
|
| 1636 |
+
print(f"Error enhancing query: {str(e)}")
|
| 1637 |
+
# Fall back to original query if there's an error
|
| 1638 |
+
return original_query
|