Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 20, 2025

Commit

312d099

verified ·

1 Parent(s): f7fc61a

Update src/model.py

Browse files

Files changed (1) hide show

src/model.py +167 -64

src/model.py CHANGED Viewed

@@ -692,7 +692,7 @@ def fetch_from_core_api(query, max_results=2, api_key=None):
         return []
 # Enhanced PubMed search function
-def enhanced_search_pubmed(query, retmax=2, api_key=None):
     """
     Enhanced PubMed search using E-utilities API with improved parsing and error handling.
@@ -857,7 +857,7 @@ def enhanced_search_pubmed(query, retmax=2, api_key=None):
         return []
 # Europe PMC search function
-def search_europe_pmc(query, max_results=2):
     """
     Search Europe PMC for biomedical articles, with a focus on retrieving full text when available.
     Europe PMC provides more open access content than standard PubMed.
@@ -865,6 +865,8 @@ def search_europe_pmc(query, max_results=2):
     Args:
         query (str): Search query string
         max_results (int): Maximum number of results to return
     Returns:
         list: List of article dictionaries with title, abstract, PMID, URL, and full text URL
@@ -878,21 +880,29 @@ def search_europe_pmc(query, max_results=2):
         # Europe PMC API base URL
         base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
         # Search parameters - specifically looking for open access when possible
         search_params = {
-            "query": f"({query}) AND OPEN_ACCESS:y",  # Prioritize open access
             "format": "json",
             "pageSize": max_results,
             "resultType": "core"  # Get core metadata
         }
-        print(f"Searching Europe PMC with query: {query}")
         response = requests.get(base_url, params=search_params)
         if response.status_code != 200:
             print(f"Europe PMC search error: {response.status_code}")
             # Try again without open access restriction if no results
-            search_params["query"] = query
             response = requests.get(base_url, params=search_params)
             if response.status_code != 200:
                 return []
@@ -903,6 +913,10 @@ def search_europe_pmc(query, max_results=2):
         hit_count = data.get("hitCount", 0)
         if hit_count == 0:
             print("No Europe PMC results found")
             return []
         # Process results
@@ -1007,8 +1021,9 @@ def fetch_medical_evidence(query, max_results=3):
     """
     Fetch medical evidence using a multi-source approach:
     1. Search with extracted medical terms in PubMed
-    2. Search with the original query in PubMed
-    3. Search in Europe PMC for additional full-text articles
     This provides better coverage and relevance from multiple sources.
@@ -1024,42 +1039,51 @@ def fetch_medical_evidence(query, max_results=3):
     # Step 1: Extract medical terms from the query
     medical_terms = extract_medical_terms(query)
     # Only use extracted terms if we found any
-    if medical_terms:
-        # Join terms with commas for better search
         terms_query = ", ".join(medical_terms)
         print(f"Searching PubMed with extracted terms: {terms_query}")
-        # Search with extracted terms (Search A)
-        # Increase from 2 to 3 results from this search
-        terms_results = enhanced_search_pubmed(terms_query, retmax=2, api_key=pubmed_api_key)
-    else:
-        terms_results = []
-    # Step 2: Search with the full original query (Search B)
-    # Increase from 2 to 3 results from this search
     print(f"Searching PubMed with full query")
-    full_query_results = enhanced_search_pubmed(query, retmax=2, api_key=pubmed_api_key)
-    # Step 3: Search in Europe PMC for additional results with full text
-    print(f"Searching Europe PMC")
-    europepmc_results = search_europe_pmc(query, max_results=2)
-    # Step 4: Combine results, ensuring no duplicates by PMID
     all_results = []
     seen_pmids = set()
     seen_dois = set()
     # Process results in order of preference:
-    # 1. Terms search from PubMed
-    # 2. Europe PMC results (likely to have more full text)
-    # 3. Full query search from PubMed
     # Add results from terms search first (often more relevant)
-    for result in terms_results:
-        pmid = result["pmid"]
-        if pmid not in seen_pmids and len(all_results) < max_results:
             seen_pmids.add(pmid)
             # Format for compatibility with existing code
             all_results.append({
@@ -1068,23 +1092,19 @@ def fetch_medical_evidence(query, max_results=3):
                 "text": result["abstract"],
                 "citation": result["citation"],
                 "url": result["url"],
-                "source_type": "PubMed" + (" (Full Text Available)" if result["has_full_text"] else ""),
-                "is_open_access": result["has_full_text"],
                 "pmid": pmid  # Keep the original PMID for direct access
             })
-    # Add Europe PMC results next (prioritizing full text articles)
-    for result in europepmc_results:
         # Some Europe PMC articles may not have a PMID, use DOI as fallback
         pmid = result.get("pmid")
         doi = result.get("doi")
-        # Skip if we've already seen this article via PMID
-        if pmid and pmid in seen_pmids:
-            continue
-        # Skip if we've already seen this article via DOI
-        if doi and doi in seen_dois:
             continue
         # Skip if we've reached our max
@@ -1097,12 +1117,12 @@ def fetch_medical_evidence(query, max_results=3):
         if doi:
             seen_dois.add(doi)
-        # Create identifier
-        identifier = f"PMID:{pmid}" if pmid else f"DOI:{doi}"
         # Add to results
         all_results.append({
-            "id": identifier,
             "title": result["title"],
             "text": result["abstract"],
             "citation": result["citation"],
@@ -1110,33 +1130,62 @@ def fetch_medical_evidence(query, max_results=3):
             "source_type": result["source_type"],
             "is_open_access": result["is_open_access"],
             "pmid": pmid,  # May be None
-            "doi": doi     # Alternative identifier
         })
-    # Then add results from full query search
-    for result in full_query_results:
-        pmid = result["pmid"]
-        if pmid not in seen_pmids and len(all_results) < max_results:
             seen_pmids.add(pmid)
-            # Format for compatibility with existing code
             all_results.append({
                 "id": f"PMID:{pmid}",
                 "title": result["title"],
                 "text": result["abstract"],
                 "citation": result["citation"],
                 "url": result["url"],
-                "source_type": "PubMed" + (" (Full Text Available)" if result["has_full_text"] else ""),
-                "is_open_access": result["has_full_text"],
-                "pmid": pmid  # Keep the original PMID for direct access
             })
-    # Step 5: Ensure we have at least some results
-    if not all_results:
-        print("No relevant medical evidence found")
-    else:
-        print(f"Found {len(all_results)} relevant medical articles across all sources")
-    return all_results
 # Function to parse doctor agent responses
 def parse_doctor_response(response_text):
@@ -1223,13 +1272,21 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         # Format evidence for the model
         if evidence_snippets:
-            evidence_text = "MEDICAL EVIDENCE FROM PUBMED:\n\n"
             for i, snippet in enumerate(evidence_snippets):
-                # Format the evidence with clear PMID for citation
                 pmid = snippet.get("pmid", "")
                 evidence_text += f"--- ARTICLE {i+1} ---\n"
-                evidence_text += f"PMID: {pmid}\n"
                 evidence_text += f"Title: {snippet['title']}\n"
                 evidence_text += f"Source: {snippet['source_type']}\n"
                 evidence_text += f"Content: {snippet['text']}\n"
@@ -1258,7 +1315,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
             msgs.append({"role": "system", "content": evidence_text})
         else:
             # If no evidence was found, inform the model
-            no_evidence_msg = ("Note: No specific medical evidence was found in PubMed for this query. "
                               "Please rely on your general medical knowledge and be sure to recommend "
                               "appropriate diagnostic steps and medical consultation.")
             msgs.append({"role": "system", "content": no_evidence_msg})
@@ -1355,7 +1412,7 @@ def run_consultation(use_rag=True):
     print("Type 'exit' to end or 'next' for a new case.\n")
     if use_rag:
-        print("Using medical evidence from: PubMed, PMC, CORE, and WHO")
         print("Sources marked with 🔓 provide full text access\n")
     consultation_id = str(uuid.uuid4())[:8]
@@ -1506,8 +1563,8 @@ SEARCH_PUBMED_SCHEMA = {
             },
             "retmax": {
                 "type": "integer",
-                "description": "Maximum number of results to return (default: 2)",
-                "default": 2
             },
             "api_key": {
                 "type": "string",
@@ -1530,6 +1587,52 @@ EXAMPLE_FUNCTION_CALL = {
     "name": "search_pubmed",
     "arguments": {
         "query": "headaches, fatigue, dizziness",
-        "retmax": 2
     }
-}

         return []
 # Enhanced PubMed search function
+def enhanced_search_pubmed(query, retmax=3, api_key=None):
     """
     Enhanced PubMed search using E-utilities API with improved parsing and error handling.
         return []
 # Europe PMC search function
+def search_europe_pmc(query, max_results=3, use_extracted_terms=False, extracted_terms=None):
     """
     Search Europe PMC for biomedical articles, with a focus on retrieving full text when available.
     Europe PMC provides more open access content than standard PubMed.
     Args:
         query (str): Search query string
         max_results (int): Maximum number of results to return
+        use_extracted_terms (bool): Whether to use the extracted medical terms
+        extracted_terms (list): List of extracted medical terms from the query
     Returns:
         list: List of article dictionaries with title, abstract, PMID, URL, and full text URL
         # Europe PMC API base URL
         base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
+        # Construct search query based on parameters
+        search_query = query
+        if use_extracted_terms and extracted_terms and len(extracted_terms) > 0:
+            # Join terms with AND for better search
+            terms_query = " AND ".join(extracted_terms)
+            search_query = terms_query
+            print(f"Searching Europe PMC with extracted terms: {terms_query}")
         # Search parameters - specifically looking for open access when possible
         search_params = {
+            "query": f"({search_query}) AND OPEN_ACCESS:y",  # Prioritize open access
             "format": "json",
             "pageSize": max_results,
             "resultType": "core"  # Get core metadata
         }
+        print(f"Searching Europe PMC with query: {search_query}")
         response = requests.get(base_url, params=search_params)
         if response.status_code != 200:
             print(f"Europe PMC search error: {response.status_code}")
             # Try again without open access restriction if no results
+            search_params["query"] = search_query
             response = requests.get(base_url, params=search_params)
             if response.status_code != 200:
                 return []
         hit_count = data.get("hitCount", 0)
         if hit_count == 0:
             print("No Europe PMC results found")
+            # If we used extracted terms and got no results, try with the original query
+            if use_extracted_terms and extracted_terms:
+                print("Retrying Europe PMC search with original query")
+                return search_europe_pmc(query, max_results, False, None)
             return []
         # Process results
     """
     Fetch medical evidence using a multi-source approach:
     1. Search with extracted medical terms in PubMed
+    2. Search with extracted medical terms in Europe PMC
+    3. Search with the original query in PubMed
+    4. Search with the original query in Europe PMC
     This provides better coverage and relevance from multiple sources.
     # Step 1: Extract medical terms from the query
     medical_terms = extract_medical_terms(query)
+    has_medical_terms = len(medical_terms) > 0
+    # Initialize results containers
+    terms_pubmed_results = []
+    full_pubmed_results = []
+    terms_europepmc_results = []
+    full_europepmc_results = []
     # Only use extracted terms if we found any
+    if has_medical_terms:
+        # Join terms with commas for PubMed
         terms_query = ", ".join(medical_terms)
         print(f"Searching PubMed with extracted terms: {terms_query}")
+        # Search PubMed with extracted terms
+        terms_pubmed_results = enhanced_search_pubmed(terms_query, retmax=2, api_key=pubmed_api_key)
+        # Search Europe PMC with extracted terms
+        print(f"Searching Europe PMC with extracted terms")
+        terms_europepmc_results = search_europe_pmc(query, max_results=2,
+                                                    use_extracted_terms=True,
+                                                    extracted_terms=medical_terms)
+    # Search with the full original query in both sources
     print(f"Searching PubMed with full query")
+    full_pubmed_results = enhanced_search_pubmed(query, retmax=2, api_key=pubmed_api_key)
+    print(f"Searching Europe PMC with full query")
+    full_europepmc_results = search_europe_pmc(query, max_results=2)
+    # Step 3: Combine results, ensuring no duplicates by PMID or DOI
     all_results = []
     seen_pmids = set()
     seen_dois = set()
     # Process results in order of preference:
+    # 1. Terms search from PubMed (if available)
+    # 2. Terms search from Europe PMC (if available)
+    # 3. Full query from PubMed
+    # 4. Full query from Europe PMC
     # Add results from terms search first (often more relevant)
+    for result in terms_pubmed_results:
+        pmid = result.get("pmid")
+        if pmid and pmid not in seen_pmids and len(all_results) < max_results:
             seen_pmids.add(pmid)
             # Format for compatibility with existing code
             all_results.append({
                 "text": result["abstract"],
                 "citation": result["citation"],
                 "url": result["url"],
+                "source_type": "PubMed" + (" (Full Text Available)" if result.get("has_full_text") else ""),
+                "is_open_access": result.get("has_full_text", False),
                 "pmid": pmid  # Keep the original PMID for direct access
             })
+    # Add Europe PMC terms results
+    for result in terms_europepmc_results:
         # Some Europe PMC articles may not have a PMID, use DOI as fallback
         pmid = result.get("pmid")
         doi = result.get("doi")
+        # Skip if we've already seen this article via PMID or DOI
+        if (pmid and pmid in seen_pmids) or (doi and doi in seen_dois):
             continue
         # Skip if we've reached our max
         if doi:
             seen_dois.add(doi)
+        # Determine ID format (prefer PMID if available, fall back to DOI)
+        article_id = f"PMID:{pmid}" if pmid else (f"DOI:{doi}" if doi else str(uuid.uuid4())[:8])
         # Add to results
         all_results.append({
+            "id": article_id,
             "title": result["title"],
             "text": result["abstract"],
             "citation": result["citation"],
             "source_type": result["source_type"],
             "is_open_access": result["is_open_access"],
             "pmid": pmid,  # May be None
+            "doi": doi     # May be None
         })
+    # Add full query PubMed results if we still need more
+    for result in full_pubmed_results:
+        pmid = result.get("pmid")
+        if pmid and pmid not in seen_pmids and len(all_results) < max_results:
             seen_pmids.add(pmid)
             all_results.append({
                 "id": f"PMID:{pmid}",
                 "title": result["title"],
                 "text": result["abstract"],
                 "citation": result["citation"],
                 "url": result["url"],
+                "source_type": "PubMed" + (" (Full Text Available)" if result.get("has_full_text") else ""),
+                "is_open_access": result.get("has_full_text", False),
+                "pmid": pmid
             })
+    # Add full query Europe PMC results if we still need more
+    for result in full_europepmc_results:
+        pmid = result.get("pmid")
+        doi = result.get("doi")
+        # Skip if we've already seen this article via PMID or DOI
+        if (pmid and pmid in seen_pmids) or (doi and doi in seen_dois):
+            continue
+        # Skip if we've reached our max
+        if len(all_results) >= max_results:
+            break
+        # Add to seen IDs
+        if pmid:
+            seen_pmids.add(pmid)
+        if doi:
+            seen_dois.add(doi)
+        # Determine ID format (prefer PMID if available, fall back to DOI)
+        article_id = f"PMID:{pmid}" if pmid else (f"DOI:{doi}" if doi else str(uuid.uuid4())[:8])
+        # Add to results
+        all_results.append({
+            "id": article_id,
+            "title": result["title"],
+            "text": result["abstract"],
+            "citation": result["citation"],
+            "url": result["url"],
+            "source_type": result["source_type"],
+            "is_open_access": result["is_open_access"],
+            "pmid": pmid,  # May be None
+            "doi": doi     # May be None
+        })
+    # Ensure we have exactly max_results results (or fewer if not enough found)
+    return all_results[:max_results]
 # Function to parse doctor agent responses
 def parse_doctor_response(response_text):
         # Format evidence for the model
         if evidence_snippets:
+            evidence_text = "MEDICAL EVIDENCE FROM MULTIPLE SOURCES:\n\n"
             for i, snippet in enumerate(evidence_snippets):
+                # Format the evidence with clear PMID or DOI for citation
                 pmid = snippet.get("pmid", "")
+                doi = snippet.get("doi", "")
                 evidence_text += f"--- ARTICLE {i+1} ---\n"
+                # Include the appropriate identifiers
+                if pmid:
+                    evidence_text += f"PMID: {pmid}\n"
+                if doi:
+                    evidence_text += f"DOI: {doi}\n"
                 evidence_text += f"Title: {snippet['title']}\n"
                 evidence_text += f"Source: {snippet['source_type']}\n"
                 evidence_text += f"Content: {snippet['text']}\n"
             msgs.append({"role": "system", "content": evidence_text})
         else:
             # If no evidence was found, inform the model
+            no_evidence_msg = ("Note: No specific medical evidence was found for this query in PubMed or Europe PMC. "
                               "Please rely on your general medical knowledge and be sure to recommend "
                               "appropriate diagnostic steps and medical consultation.")
             msgs.append({"role": "system", "content": no_evidence_msg})
     print("Type 'exit' to end or 'next' for a new case.\n")
     if use_rag:
+        print("Using medical evidence from: PubMed, Europe PMC, and other medical databases")
         print("Sources marked with 🔓 provide full text access\n")
     consultation_id = str(uuid.uuid4())[:8]
             },
             "retmax": {
                 "type": "integer",
+                "description": "Maximum number of results to return (default: 3)",
+                "default": 3
             },
             "api_key": {
                 "type": "string",
     "name": "search_pubmed",
     "arguments": {
         "query": "headaches, fatigue, dizziness",
+        "retmax": 3
     }
+}
+# Function to enhance medical queries using LLM
+def enhance_medical_query(original_query):
+    """
+    Uses LLM to enhance a medical query for better search results.
+    This function is prepared for future use but is not currently enabled.
+    Args:
+        original_query (str): The original user query
+    Returns:
+        str: An enhanced query optimized for medical search
+    """
+    try:
+        # System prompt for query enhancement
+        system_prompt = """You are a medical search query optimizer.
+        Your job is to take a user's medical question and rewrite it to be more effective for searching
+        medical databases like PubMed and Europe PMC.
+        Guidelines:
+        1. Extract key medical terms, conditions, symptoms, and treatments
+        2. Use proper medical terminology where possible
+        3. Structure the query for optimal search performance
+        4. Return ONLY the enhanced query without explanation
+        5. Keep the query concise but comprehensive
+        """
+        # Call OpenAI to enhance the query
+        enhanced_response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",  # Using a smaller model for speed and cost efficiency
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Optimize this medical query for database search: {original_query}"}
+            ],
+            temperature=0.3,
+            max_tokens=100
+        )
+        enhanced_query = enhanced_response.choices[0].message['content'].strip()
+        print(f"Enhanced query: {enhanced_query}")
+        return enhanced_query
+    except Exception as e:
+        print(f"Error enhancing query: {str(e)}")
+        # Fall back to original query if there's an error
+        return original_query