Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 19, 2025

Commit

0a6ef9c

verified ·

1 Parent(s): bbf8466

Update src/model.py

Browse files

Files changed (1) hide show

src/model.py +440 -187

src/model.py CHANGED Viewed

@@ -67,7 +67,7 @@ Remember that this is an ongoing consultation where continuity of care is import
 def extract_and_link_sources(text, evidence_snippets):
     """
     Replace [PMID:123456] citation placeholders with actual links to PubMed articles.
-    Also handles other citation formats for compatibility.
     Args:
         text (str): Text containing citations
@@ -78,16 +78,20 @@ def extract_and_link_sources(text, evidence_snippets):
     """
     # Look for [PMID:123456] format first (preferred)
     pmid_pattern = r'\[PMID:(\d+)\]'
     # Also look for older [source_id] format for compatibility
     source_pattern = r'\[([\w\d:_\-\.+]+)\]'
     # Find all PMID citations
     pmid_matches = re.findall(pmid_pattern, text)
     # Find all other citation formats
     source_matches = re.findall(source_pattern, text)
-    # Remove PMID matches from source matches to avoid duplicates
-    source_matches = [s for s in source_matches if not s.startswith('PMID:')]
     # Create source map
     source_map = {}
@@ -116,6 +120,30 @@ def extract_and_link_sources(text, evidence_snippets):
                 }
                 break
     # Process other citation formats for backward compatibility
     for source_id_match in source_matches:
         if source_id_match not in source_map and source_id_match != "source_id":
@@ -126,7 +154,8 @@ def extract_and_link_sources(text, evidence_snippets):
                         "title": snippet["title"].strip(),
                         "url": snippet["url"],
                         "citation": snippet["citation"],
-                        "pmid": snippet.get("pmid", "")
                     }
                     break
@@ -144,9 +173,22 @@ def extract_and_link_sources(text, evidence_snippets):
             linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
     # Replace other citation formats
     for source_id_key, source_data in source_map.items():
-        if not source_id_key.startswith("PMID:"):
             safe_id = re.escape(source_id_key)
             pattern = f"\\[{safe_id}\\]"
             replacement = f"[{source_data['title']}]({source_data['url']})"
@@ -163,7 +205,8 @@ def extract_and_link_sources(text, evidence_snippets):
                     "title": snippet["title"].strip(),
                     "url": snippet["url"],
                     "citation": snippet["citation"],
-                    "pmid": snippet.get("pmid", "")
                 }
                 replacement = f"[{snippet['title']}]({snippet['url']})"
                 linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
@@ -171,6 +214,7 @@ def extract_and_link_sources(text, evidence_snippets):
     # Final fallback for any remaining placeholders
     linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
     linked_text = re.sub(r'\[PMID:(\d+)\]', r'[PubMed Article]', linked_text)
     return linked_text, source_map
@@ -652,21 +696,333 @@ def fetch_from_core_api(query, max_results=2, api_key=None):
     except Exception:
         return []
 # Enhanced RAG System with focused PubMed searches
 def fetch_medical_evidence(query, max_results=3):
     """
-    Fetch medical evidence using a dual-search approach:
-    1. Search with extracted medical terms
-    2. Search with the original query
-    This provides better coverage and relevance while focusing exclusively on PubMed.
     Args:
         query (str): The user's original query
         max_results (int): Maximum number of results to return (now set to 3)
     Returns:
-        list: Combined and deduplicated results from both searches
     """
     # Define API key if available
     pubmed_api_key = os.environ.get("PUBMED_API_KEY")
@@ -682,18 +1038,28 @@ def fetch_medical_evidence(query, max_results=3):
         # Search with extracted terms (Search A)
         # Increase from 2 to 3 results from this search
-        terms_results = enhanced_search_pubmed(terms_query, retmax=3, api_key=pubmed_api_key)
     else:
         terms_results = []
     # Step 2: Search with the full original query (Search B)
     # Increase from 2 to 3 results from this search
     print(f"Searching PubMed with full query")
-    full_query_results = enhanced_search_pubmed(query, retmax=3, api_key=pubmed_api_key)
-    # Step 3: Combine results, ensuring no duplicates by PMID
     all_results = []
     seen_pmids = set()
     # Add results from terms search first (often more relevant)
     for result in terms_results:
@@ -712,6 +1078,46 @@ def fetch_medical_evidence(query, max_results=3):
                 "pmid": pmid  # Keep the original PMID for direct access
             })
     # Then add results from full query search
     for result in full_query_results:
         pmid = result["pmid"]
@@ -729,11 +1135,11 @@ def fetch_medical_evidence(query, max_results=3):
                 "pmid": pmid  # Keep the original PMID for direct access
             })
-    # Step 4: Ensure we have at least some results
     if not all_results:
-        print("No relevant medical evidence found in PubMed")
     else:
-        print(f"Found {len(all_results)} relevant medical articles")
     return all_results
@@ -840,8 +1246,12 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
 1. IMPORTANT: You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
-2. When citing information from these articles, use the format [PMID:123456] where 123456 is the actual PubMed ID.
    Example: "Recent studies have shown improved outcomes with early intervention [PMID:34567890]."
 3. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
@@ -854,7 +1264,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
 7. Use the most recent sources when available, especially for treatment recommendations.
-8. If full text is available, prioritize information from those sources as they contain more complete data.
 """
             msgs.append({"role": "system", "content": evidence_text})
@@ -878,15 +1290,21 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         1. A direct answer to the patient's concerns.
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
-        4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using the [PMID:123456] format for claims or information taken from the provided PubMed articles. Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
         -   **Reasoning**: Bullet points detailing your clinical reasoning.
-        -   **Sources**: A list of all PubMed references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
-        IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references or PMIDs.
         """
     else:
         # Different instructions when RAG is disabled - no mention of sources or citations
@@ -1097,171 +1515,6 @@ def extract_medical_terms(query, max_terms=5):
     result = list(medical_terms)[:max_terms]
     return result
-# Enhanced PubMed search function
-def enhanced_search_pubmed(query, retmax=2, api_key=None):
-    """
-    Enhanced PubMed search using E-utilities API with improved parsing and error handling.
-    Args:
-        query (str): Search query string
-        retmax (int): Maximum number of results to return
-        api_key (str, optional): NCBI API key for higher rate limits
-    Returns:
-        list: List of article dictionaries with title, abstract, PMID, URL
-    """
-    results = []
-    # Base URLs for PubMed E-utilities
-    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
-    # Rate limiting - sleep to avoid hitting rate limits
-    # NCBI allows 3 requests/second without API key, 10 with key
-    time.sleep(0.33 if api_key is None else 0.1)
-    try:
-        # Step 1: Use ESearch to get PMIDs
-        search_params = {
-            "db": "pubmed",
-            "term": query,
-            "retmax": retmax,
-            "retmode": "json",
-            "sort": "relevance"
-        }
-        if api_key:
-            search_params["api_key"] = api_key
-        search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
-        if search_response.status_code != 200:
-            print(f"PubMed search error: {search_response.status_code}")
-            return []
-        search_data = search_response.json()
-        if "esearchresult" not in search_data or "idlist" not in search_data["esearchresult"]:
-            print("No results found or invalid response format")
-            return []
-        pmids = search_data["esearchresult"]["idlist"]
-        if not pmids:
-            print("No PMIDs found for the query")
-            return []
-        # Rate limiting before second request
-        time.sleep(0.33 if api_key is None else 0.1)
-        # Step 2: Use EFetch to get article details with abstracts
-        fetch_params = {
-            "db": "pubmed",
-            "id": ",".join(pmids),
-            "retmode": "xml",
-            "rettype": "abstract"
-        }
-        if api_key:
-            fetch_params["api_key"] = api_key
-        fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
-        if fetch_response.status_code != 200:
-            print(f"PubMed fetch error: {fetch_response.status_code}")
-            return []
-        # Step 3: Parse XML response
-        root = ET.fromstring(fetch_response.text)
-        for article in root.findall(".//PubmedArticle"):
-            try:
-                # Extract PMID
-                pmid = article.findtext(".//PMID")
-                if not pmid:
-                    continue
-                # Extract title
-                title = article.findtext(".//ArticleTitle") or "No title available"
-                # Extract abstract sections with labels if available
-                abstract_sections = []
-                for abstract_text in article.findall(".//AbstractText"):
-                    label = abstract_text.get("Label", "")
-                    text = abstract_text.text or ""
-                    if label and text:
-                        abstract_sections.append(f"{label}: {text}")
-                    elif text:
-                        abstract_sections.append(text)
-                # If no structured abstract, try to get the plain abstract
-                if not abstract_sections:
-                    abstract_text = article.findtext(".//Abstract/AbstractText")
-                    if abstract_text:
-                        abstract_sections.append(abstract_text)
-                # Join all abstract sections
-                abstract = " ".join(abstract_sections) or "Abstract not available"
-                # Extract authors
-                authors = []
-                for author in article.findall(".//Author"):
-                    last_name = author.findtext(".//LastName") or ""
-                    initials = author.findtext(".//Initials") or ""
-                    if last_name and initials:
-                        authors.append(f"{last_name} {initials}")
-                # Format authors for citation
-                author_text = ""
-                if authors:
-                    if len(authors) == 1:
-                        author_text = authors[0]
-                    elif len(authors) == 2:
-                        author_text = f"{authors[0]} & {authors[1]}"
-                    else:
-                        author_text = f"{authors[0]} et al."
-                # Extract journal and publication year
-                journal = article.findtext(".//Journal/Title") or "Unknown Journal"
-                year = article.findtext(".//PubDate/Year") or ""
-                # Create direct URL to PubMed article
-                url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
-                # Create citation
-                citation = f"{author_text}{' ' if author_text else ''}({year}). {title}. {journal}. PMID: {pmid}"
-                # Check for full text availability
-                pmc_id = article.findtext(".//ArticleId[@IdType='pmc']")
-                has_full_text = bool(pmc_id)
-                full_text_url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/" if pmc_id else None
-                # Create result object
-                result = {
-                    "pmid": pmid,
-                    "title": title,
-                    "abstract": abstract,
-                    "authors": authors,
-                    "journal": journal,
-                    "year": year,
-                    "url": url,
-                    "full_text_url": full_text_url,
-                    "has_full_text": has_full_text,
-                    "citation": citation
-                }
-                results.append(result)
-            except Exception as e:
-                print(f"Error parsing article {pmid}: {str(e)}")
-                continue
-        return results
-    except Exception as e:
-        print(f"Error in PubMed search: {str(e)}")
-        return []
 # JSON schema for the search_pubmed function for API documentation
 SEARCH_PUBMED_SCHEMA = {
     "name": "search_pubmed",

 def extract_and_link_sources(text, evidence_snippets):
     """
     Replace [PMID:123456] citation placeholders with actual links to PubMed articles.
+    Also handles DOI citations and other citation formats for compatibility.
     Args:
         text (str): Text containing citations
     """
     # Look for [PMID:123456] format first (preferred)
     pmid_pattern = r'\[PMID:(\d+)\]'
+    # Look for [DOI:10.xxxx/yyyy] format for Europe PMC articles
+    doi_pattern = r'\[DOI:(10\.\d+\/[^\]]+)\]'
     # Also look for older [source_id] format for compatibility
     source_pattern = r'\[([\w\d:_\-\.+]+)\]'
     # Find all PMID citations
     pmid_matches = re.findall(pmid_pattern, text)
+    # Find all DOI citations
+    doi_matches = re.findall(doi_pattern, text)
     # Find all other citation formats
     source_matches = re.findall(source_pattern, text)
+    # Remove PMID and DOI matches from source matches to avoid duplicates
+    source_matches = [s for s in source_matches if not (s.startswith('PMID:') or s.startswith('DOI:'))]
     # Create source map
     source_map = {}
                 }
                 break
+    # Process DOI citations
+    for doi in doi_matches:
+        for snippet in evidence_snippets:
+            # Check if this is a direct DOI match
+            if 'doi' in snippet and snippet['doi'] == doi:
+                source_map[f"DOI:{doi}"] = {
+                    "id": snippet.get("id", f"DOI:{doi}"),
+                    "title": snippet["title"].strip(),
+                    "url": snippet["url"],
+                    "citation": snippet["citation"],
+                    "doi": doi
+                }
+                break
+            # Also check the ID field which might contain DOI
+            elif snippet.get("id") == f"DOI:{doi}":
+                source_map[f"DOI:{doi}"] = {
+                    "id": snippet["id"],
+                    "title": snippet["title"].strip(),
+                    "url": snippet["url"],
+                    "citation": snippet["citation"],
+                    "doi": doi
+                }
+                break
     # Process other citation formats for backward compatibility
     for source_id_match in source_matches:
         if source_id_match not in source_map and source_id_match != "source_id":
                         "title": snippet["title"].strip(),
                         "url": snippet["url"],
                         "citation": snippet["citation"],
+                        "pmid": snippet.get("pmid", ""),
+                        "doi": snippet.get("doi", "")
                     }
                     break
             linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
+    # Replace DOI citations with links
+    for doi_key in [f"DOI:{doi}" for doi in doi_matches]:
+        if doi_key in source_map:
+            source_data = source_map[doi_key]
+            safe_key = re.escape(doi_key)
+            pattern = f"\\[{safe_key}\\]"
+            # Create a replacement with title and URL
+            short_title = source_data['title'][:60] + "..." if len(source_data['title']) > 60 else source_data['title']
+            replacement = f"[{short_title}]({source_data['url']})"
+            linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
     # Replace other citation formats
     for source_id_key, source_data in source_map.items():
+        if not (source_id_key.startswith("PMID:") or source_id_key.startswith("DOI:")):
             safe_id = re.escape(source_id_key)
             pattern = f"\\[{safe_id}\\]"
             replacement = f"[{source_data['title']}]({source_data['url']})"
                     "title": snippet["title"].strip(),
                     "url": snippet["url"],
                     "citation": snippet["citation"],
+                    "pmid": snippet.get("pmid", ""),
+                    "doi": snippet.get("doi", "")
                 }
                 replacement = f"[{snippet['title']}]({snippet['url']})"
                 linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
     # Final fallback for any remaining placeholders
     linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
     linked_text = re.sub(r'\[PMID:(\d+)\]', r'[PubMed Article]', linked_text)
+    linked_text = re.sub(r'\[DOI:(10\.\d+\/[^\]]+)\]', r'[Europe PMC Article]', linked_text)
     return linked_text, source_map
     except Exception:
         return []
+# Enhanced PubMed search function
+def enhanced_search_pubmed(query, retmax=2, api_key=None):
+    """
+    Enhanced PubMed search using E-utilities API with improved parsing and error handling.
+    Args:
+        query (str): Search query string
+        retmax (int): Maximum number of results to return
+        api_key (str, optional): NCBI API key for higher rate limits
+    Returns:
+        list: List of article dictionaries with title, abstract, PMID, URL
+    """
+    results = []
+    # Base URLs for PubMed E-utilities
+    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
+    # Rate limiting - sleep to avoid hitting rate limits
+    # NCBI allows 3 requests/second without API key, 10 with key
+    time.sleep(0.33 if api_key is None else 0.1)
+    try:
+        # Step 1: Use ESearch to get PMIDs
+        search_params = {
+            "db": "pubmed",
+            "term": query,
+            "retmax": retmax,
+            "retmode": "json",
+            "sort": "relevance"
+        }
+        if api_key:
+            search_params["api_key"] = api_key
+        search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
+        if search_response.status_code != 200:
+            print(f"PubMed search error: {search_response.status_code}")
+            return []
+        search_data = search_response.json()
+        if "esearchresult" not in search_data or "idlist" not in search_data["esearchresult"]:
+            print("No results found or invalid response format")
+            return []
+        pmids = search_data["esearchresult"]["idlist"]
+        if not pmids:
+            print("No PMIDs found for the query")
+            return []
+        # Rate limiting before second request
+        time.sleep(0.33 if api_key is None else 0.1)
+        # Step 2: Use EFetch to get article details with abstracts
+        fetch_params = {
+            "db": "pubmed",
+            "id": ",".join(pmids),
+            "retmode": "xml",
+            "rettype": "abstract"
+        }
+        if api_key:
+            fetch_params["api_key"] = api_key
+        fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
+        if fetch_response.status_code != 200:
+            print(f"PubMed fetch error: {fetch_response.status_code}")
+            return []
+        # Step 3: Parse XML response
+        root = ET.fromstring(fetch_response.text)
+        for article in root.findall(".//PubmedArticle"):
+            try:
+                # Extract PMID
+                pmid = article.findtext(".//PMID")
+                if not pmid:
+                    continue
+                # Extract title
+                title = article.findtext(".//ArticleTitle") or "No title available"
+                # Extract abstract sections with labels if available
+                abstract_sections = []
+                for abstract_text in article.findall(".//AbstractText"):
+                    label = abstract_text.get("Label", "")
+                    text = abstract_text.text or ""
+                    if label and text:
+                        abstract_sections.append(f"{label}: {text}")
+                    elif text:
+                        abstract_sections.append(text)
+                # If no structured abstract, try to get the plain abstract
+                if not abstract_sections:
+                    abstract_text = article.findtext(".//Abstract/AbstractText")
+                    if abstract_text:
+                        abstract_sections.append(abstract_text)
+                # Join all abstract sections
+                abstract = " ".join(abstract_sections) or "Abstract not available"
+                # Extract authors
+                authors = []
+                for author in article.findall(".//Author"):
+                    last_name = author.findtext(".//LastName") or ""
+                    initials = author.findtext(".//Initials") or ""
+                    if last_name and initials:
+                        authors.append(f"{last_name} {initials}")
+                # Format authors for citation
+                author_text = ""
+                if authors:
+                    if len(authors) == 1:
+                        author_text = authors[0]
+                    elif len(authors) == 2:
+                        author_text = f"{authors[0]} & {authors[1]}"
+                    else:
+                        author_text = f"{authors[0]} et al."
+                # Extract journal and publication year
+                journal = article.findtext(".//Journal/Title") or "Unknown Journal"
+                year = article.findtext(".//PubDate/Year") or ""
+                # Create direct URL to PubMed article
+                url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+                # Create citation
+                citation = f"{author_text}{' ' if author_text else ''}({year}). {title}. {journal}. PMID: {pmid}"
+                # Check for full text availability
+                pmc_id = article.findtext(".//ArticleId[@IdType='pmc']")
+                has_full_text = bool(pmc_id)
+                full_text_url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/" if pmc_id else None
+                # Create result object
+                result = {
+                    "pmid": pmid,
+                    "title": title,
+                    "abstract": abstract,
+                    "authors": authors,
+                    "journal": journal,
+                    "year": year,
+                    "url": url,
+                    "full_text_url": full_text_url,
+                    "has_full_text": has_full_text,
+                    "citation": citation
+                }
+                results.append(result)
+            except Exception as e:
+                print(f"Error parsing article {pmid}: {str(e)}")
+                continue
+        return results
+    except Exception as e:
+        print(f"Error in PubMed search: {str(e)}")
+        return []
+# Europe PMC search function
+def search_europe_pmc(query, max_results=2):
+    """
+    Search Europe PMC for biomedical articles, with a focus on retrieving full text when available.
+    Europe PMC provides more open access content than standard PubMed.
+    Args:
+        query (str): Search query string
+        max_results (int): Maximum number of results to return
+    Returns:
+        list: List of article dictionaries with title, abstract, PMID, URL, and full text URL
+    """
+    results = []
+    # Rate limiting - Europe PMC allows 30 requests per minute per IP
+    time.sleep(2.0)  # Conservative rate limiting
+    try:
+        # Europe PMC API base URL
+        base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
+        # Search parameters - specifically looking for open access when possible
+        search_params = {
+            "query": f"({query}) AND OPEN_ACCESS:y",  # Prioritize open access
+            "format": "json",
+            "pageSize": max_results,
+            "resultType": "core"  # Get core metadata
+        }
+        print(f"Searching Europe PMC with query: {query}")
+        response = requests.get(base_url, params=search_params)
+        if response.status_code != 200:
+            print(f"Europe PMC search error: {response.status_code}")
+            # Try again without open access restriction if no results
+            search_params["query"] = query
+            response = requests.get(base_url, params=search_params)
+            if response.status_code != 200:
+                return []
+        data = response.json()
+        # Check if we have results
+        hit_count = data.get("hitCount", 0)
+        if hit_count == 0:
+            print("No Europe PMC results found")
+            return []
+        # Process results
+        articles = data.get("resultList", {}).get("result", [])
+        for article in articles:
+            try:
+                # Extract basic metadata
+                pmid = article.get("pmid")
+                doi = article.get("doi")
+                title = article.get("title", "No title available")
+                abstract = article.get("abstractText", "Abstract not available")
+                journal = article.get("journalTitle", "Unknown Journal")
+                pub_year = article.get("pubYear", "")
+                # Check if it's open access
+                is_open_access = article.get("isOpenAccess") == "Y"
+                # Get full text URL if available
+                full_text_url = None
+                full_text_urls = article.get("fullTextUrlList", {}).get("fullTextUrl", [])
+                for url_entry in full_text_urls:
+                    if url_entry.get("availability") == "Open access" or url_entry.get("documentStyle") == "pdf":
+                        full_text_url = url_entry.get("url")
+                        break
+                # If no specific full text URL found but we have a PMID, create Europe PMC link
+                if not full_text_url and pmid:
+                    full_text_url = f"https://europepmc.org/article/MED/{pmid}"
+                elif not full_text_url and doi:
+                    full_text_url = f"https://doi.org/{doi}"
+                # Get authors
+                author_list = article.get("authorList", {}).get("author", [])
+                authors = []
+                for author in author_list:
+                    last_name = author.get("lastName", "")
+                    initials = author.get("initials", "")
+                    if last_name:
+                        authors.append(f"{last_name} {initials}")
+                # Format author citation
+                author_text = ""
+                if authors:
+                    if len(authors) == 1:
+                        author_text = authors[0]
+                    elif len(authors) == 2:
+                        author_text = f"{authors[0]} & {authors[1]}"
+                    else:
+                        author_text = f"{authors[0]} et al."
+                # Create citation
+                citation = f"{author_text}{' ' if author_text else ''}({pub_year}). {title}. {journal}."
+                if pmid:
+                    citation += f" PMID: {pmid}"
+                if doi:
+                    citation += f" DOI: {doi}"
+                # Create a direct URL to access the article
+                url = full_text_url if full_text_url else (
+                    f"https://europepmc.org/article/MED/{pmid}" if pmid else (
+                        f"https://doi.org/{doi}" if doi else ""
+                    )
+                )
+                # Create source type with OA indicator
+                source_type = "Europe PMC" + (" (Open Access)" if is_open_access else "")
+                # Format for compatibility with existing code
+                result = {
+                    "pmid": pmid,  # May be None for some articles
+                    "doi": doi,    # Alternative identifier
+                    "title": title,
+                    "abstract": abstract,
+                    "authors": authors,
+                    "journal": journal,
+                    "year": pub_year,
+                    "url": url,
+                    "full_text_url": full_text_url,
+                    "has_full_text": is_open_access or full_text_url is not None,
+                    "citation": citation,
+                    "source_type": source_type,
+                    "is_open_access": is_open_access
+                }
+                results.append(result)
+            except Exception as e:
+                print(f"Error parsing Europe PMC article: {str(e)}")
+                continue
+        print(f"Found {len(results)} Europe PMC articles")
+        return results
+    except Exception as e:
+        print(f"Error in Europe PMC search: {str(e)}")
+        return []
 # Enhanced RAG System with focused PubMed searches
 def fetch_medical_evidence(query, max_results=3):
     """
+    Fetch medical evidence using a multi-source approach:
+    1. Search with extracted medical terms in PubMed
+    2. Search with the original query in PubMed
+    3. Search in Europe PMC for additional full-text articles
+    This provides better coverage and relevance from multiple sources.
     Args:
         query (str): The user's original query
         max_results (int): Maximum number of results to return (now set to 3)
     Returns:
+        list: Combined and deduplicated results from all searches
     """
     # Define API key if available
     pubmed_api_key = os.environ.get("PUBMED_API_KEY")
         # Search with extracted terms (Search A)
         # Increase from 2 to 3 results from this search
+        terms_results = enhanced_search_pubmed(terms_query, retmax=2, api_key=pubmed_api_key)
     else:
         terms_results = []
     # Step 2: Search with the full original query (Search B)
     # Increase from 2 to 3 results from this search
     print(f"Searching PubMed with full query")
+    full_query_results = enhanced_search_pubmed(query, retmax=2, api_key=pubmed_api_key)
+    # Step 3: Search in Europe PMC for additional results with full text
+    print(f"Searching Europe PMC")
+    europepmc_results = search_europe_pmc(query, max_results=2)
+    # Step 4: Combine results, ensuring no duplicates by PMID
     all_results = []
     seen_pmids = set()
+    seen_dois = set()
+    # Process results in order of preference:
+    # 1. Terms search from PubMed
+    # 2. Europe PMC results (likely to have more full text)
+    # 3. Full query search from PubMed
     # Add results from terms search first (often more relevant)
     for result in terms_results:
                 "pmid": pmid  # Keep the original PMID for direct access
             })
+    # Add Europe PMC results next (prioritizing full text articles)
+    for result in europepmc_results:
+        # Some Europe PMC articles may not have a PMID, use DOI as fallback
+        pmid = result.get("pmid")
+        doi = result.get("doi")
+        # Skip if we've already seen this article via PMID
+        if pmid and pmid in seen_pmids:
+            continue
+        # Skip if we've already seen this article via DOI
+        if doi and doi in seen_dois:
+            continue
+        # Skip if we've reached our max
+        if len(all_results) >= max_results:
+            break
+        # Add to seen IDs
+        if pmid:
+            seen_pmids.add(pmid)
+        if doi:
+            seen_dois.add(doi)
+        # Create identifier
+        identifier = f"PMID:{pmid}" if pmid else f"DOI:{doi}"
+        # Add to results
+        all_results.append({
+            "id": identifier,
+            "title": result["title"],
+            "text": result["abstract"],
+            "citation": result["citation"],
+            "url": result["url"],
+            "source_type": result["source_type"],
+            "is_open_access": result["is_open_access"],
+            "pmid": pmid,  # May be None
+            "doi": doi     # Alternative identifier
+        })
     # Then add results from full query search
     for result in full_query_results:
         pmid = result["pmid"]
                 "pmid": pmid  # Keep the original PMID for direct access
             })
+    # Step 5: Ensure we have at least some results
     if not all_results:
+        print("No relevant medical evidence found")
     else:
+        print(f"Found {len(all_results)} relevant medical articles across all sources")
     return all_results
 1. IMPORTANT: You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
+2. When citing information from these articles, use the following formats:
+   • For PubMed articles: [PMID:123456] where 123456 is the actual PubMed ID
+   • For Europe PMC articles without PMID: [DOI:10.xxxx/yyyy] where 10.xxxx/yyyy is the DOI
    Example: "Recent studies have shown improved outcomes with early intervention [PMID:34567890]."
+   Example: "Current guidelines recommend a multidisciplinary approach [DOI:10.1234/abcd]."
 3. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
 7. Use the most recent sources when available, especially for treatment recommendations.
+8. If full text is available (marked as "Open Access" or "Full Text Available"), prioritize information from those sources as they contain more complete data.
+9. Europe PMC sources often provide more complete full text access, so give them equal consideration to PubMed sources.
 """
             msgs.append({"role": "system", "content": evidence_text})
         1. A direct answer to the patient's concerns.
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
+        4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
+           • [PMID:123456] format for PubMed articles
+           • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
+           Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
         -   **Reasoning**: Bullet points detailing your clinical reasoning.
+        -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
+             - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
+               URL: https://doi.org/10.xxxx/yyyy
+        IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
     else:
         # Different instructions when RAG is disabled - no mention of sources or citations
     result = list(medical_terms)[:max_terms]
     return result
 # JSON schema for the search_pubmed function for API documentation
 SEARCH_PUBMED_SCHEMA = {
     "name": "search_pubmed",