Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 21, 2025

Commit

cb23461

verified ·

1 Parent(s): 4c99acf

Update src/model.py

Browse files

Files changed (1) hide show

src/model.py +105 -81

src/model.py CHANGED Viewed

@@ -147,73 +147,70 @@ def extract_and_link_sources(text, evidence_snippets):
             for snippet in evidence_snippets:
                 if source_id_match == snippet["id"]:
                     source_map[source_id_match] = {
-                    "id": snippet["id"],
-                    "title": snippet["title"].strip(),
-                    "url": snippet["url"],
                         "citation": snippet["citation"],
                         "pmid": snippet.get("pmid", ""),
                         "doi": snippet.get("doi", "")
-                }
                     break
-    # Replace PMID citations with links
     linked_text = text
-    for pmid_key in [f"PMID:{pmid}" for pmid in pmid_matches]:
-        if pmid_key in source_map:
-            source_data = source_map[pmid_key]
-            safe_key = re.escape(pmid_key)
-            pattern = f"\\[{safe_key}\\]"
-            # Create a replacement with title and URL
-            short_title = source_data['title'][:60] + "..." if len(source_data['title']) > 60 else source_data['title']
-            replacement = f"[{short_title}]({source_data['url']})"
-            linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
-    # Replace DOI citations with links
-    for doi_key in [f"DOI:{doi}" for doi in doi_matches]:
-        if doi_key in source_map:
-            source_data = source_map[doi_key]
-            safe_key = re.escape(doi_key)
-            pattern = f"\\[{safe_key}\\]"
-            # Create a replacement with title and URL
-            short_title = source_data['title'][:60] + "..." if len(source_data['title']) > 60 else source_data['title']
-            replacement = f"[{short_title}]({source_data['url']})"
-            linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
-    # Replace other citation formats
-        for source_id_key, source_data in source_map.items():
-            if not (source_id_key.startswith("PMID:") or source_id_key.startswith("DOI:")):
-                safe_id = re.escape(source_id_key)
-                pattern = f"\\[{safe_id}\\]"
-                replacement = f"[{source_data['title']}]({source_data['url']})"
-                linked_text = re.sub(pattern, replacement, linked_text)
-    # Handle generic [source_id] placeholder
-    if "source_id" in source_matches:
-        # Use the first snippet available if we have any
-        if evidence_snippets and "source_id" not in source_map:
-            snippet = evidence_snippets[0]  # Use the first snippet
-            if snippet.get("url") and snippet.get("title"):
-                source_map["source_id"] = {
-                    "id": snippet["id"],
-                    "title": snippet["title"].strip(),
-                    "url": snippet["url"],
-                    "citation": snippet["citation"],
-                    "pmid": snippet.get("pmid", ""),
-                    "doi": snippet.get("doi", "")
-                }
-                replacement = f"[{snippet['title']}]({snippet['url']})"
             linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
     # Final fallback for any remaining placeholders
-    linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
-    linked_text = re.sub(r'\[PMID:(\d+)\]', r'[PubMed Article]', linked_text)
-    linked_text = re.sub(r'\[DOI:(10\.\d+\/[^\]]+)\]', r'[Europe PMC Article]', linked_text)
-    return linked_text, source_map
 # Implement PubMed API integration for medical evidence retrieval
 def fetch_from_pubmed_api(query, max_results=3, api_key=None):
@@ -1290,7 +1287,20 @@ def parse_doctor_response(response_text):
         sources_text = sources_match.group(2).strip()
         # Split into individual sources
         if '\n' in sources_text:
-            parsed["sources"] = [item.strip() for item in sources_text.split('\n') if item.strip()]
         else:
             parsed["sources"] = [sources_text]
@@ -1318,11 +1328,19 @@ def parse_doctor_response(response_text):
         parsed["main_response"] = '\n'.join(main_response_lines)
-    # Extract citations in the text (format: [source_id])
-    citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
-    for citation in citation_matches:
         if citation not in parsed["sources"]:
             parsed["sources"].append(citation)
     return parsed
@@ -1386,16 +1404,19 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
             evidence_text += """CITATION INSTRUCTIONS:
 1. IMPORTANT: Provide a direct answer first before asking follow-up questions. Even with limited information, give your best assessment.
 2. You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
-3. When citing information from these articles, use the following formats:
-   • For PubMed articles: [PMID:123456] where 123456 is the actual PubMed ID
-   • For Europe PMC articles without PMID: [DOI:10.xxxx/yyyy] where 10.xxxx/yyyy is the DOI
-   Example: "Recent studies have shown improved outcomes with early intervention [PMID:34567890]."
-   Example: "Current guidelines recommend a multidisciplinary approach [DOI:10.1234/abcd]."
 4. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
 5. When multiple sources support a claim, cite all of them for stronger evidence.
-   Example: "This approach is supported by multiple studies [PMID:12345678][PMID:87654321]."
-6. Include full citations in your Sources section with clickable URLs.
 7. If the abstracts have conflicting information, acknowledge this and present both perspectives with citations.
 8. Use the most recent sources when available, especially for treatment recommendations.
 9. If full text is available (marked as "Open Access" or "Full Text Available"), prioritize information from those sources as they contain more complete data.
@@ -1421,13 +1442,17 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
         **Main Response Structure:**
         1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
-        4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
-           • [PMID:123456] format for PubMed articles
-           • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
            Use no more than 3 sources and no fewer than 2 sources.
@@ -1436,18 +1461,17 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
             Do NOT start the first question with asterisks (**). Format each question properly with just a number.
         -   **Reasoning**: Bullet points detailing your clinical reasoning.
             Do NOT start the first point with asterisks (**). Format each bullet point properly.
-        -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
-             - PMID: 12345678 - Author et al. (Year). Title. Journal.
-               URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
-             - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
-               URL: https://doi.org/10.xxxx/yyyy
         **IMPORTANT FORMATTING NOTES:**
-        1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
-        2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
-        3. Number the follow-up questions starting from 1, not from any other number.
-        4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
-        5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """

             for snippet in evidence_snippets:
                 if source_id_match == snippet["id"]:
                     source_map[source_id_match] = {
+                        "id": snippet["id"],
+                        "title": snippet["title"].strip(),
+                        "url": snippet["url"],
                         "citation": snippet["citation"],
                         "pmid": snippet.get("pmid", ""),
                         "doi": snippet.get("doi", "")
+                    }
                     break
+    # Create a numbered citation system
+    numbered_sources = {}
+    citation_number = 1
+    citation_map = {}  # Maps original citation keys to numbers
+    # First create a numbering system for all sources
+    for key in source_map.keys():
+        citation_map[key] = citation_number
+        numbered_sources[citation_number] = source_map[key]
+        citation_number += 1
+    # Replace citations with numbered format
     linked_text = text
+    for source_key, number in citation_map.items():
+        source_data = source_map[source_key]
+        safe_key = re.escape(source_key)
+        pattern = f"\\[{safe_key}\\]"
+        # Create a colored, clickable numbered reference
+        colored_ref = f"<span style='color:#3366cc;'>[{number}]</span>"
+        replacement = f"<a href='{source_data['url']}' target='_blank'>{colored_ref}</a>"
+        # Replace all instances of this citation with the numbered format
+        linked_text = re.sub(f"\\[{safe_key}\\]", replacement, linked_text)
+    # Handle any remaining source placeholders
+    if "source_id" in source_matches and "source_id" not in citation_map:
+        if evidence_snippets:
+            snippet = evidence_snippets[0]
+            next_number = len(numbered_sources) + 1
+            colored_ref = f"<span style='color:#3366cc;'>[{next_number}]</span>"
+            replacement = f"<a href='{snippet['url']}' target='_blank'>{colored_ref}</a>"
             linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
+            # Add to numbered sources
+            numbered_sources[next_number] = {
+                "id": snippet["id"],
+                "title": snippet["title"].strip(),
+                "url": snippet["url"],
+                "citation": snippet["citation"],
+                "pmid": snippet.get("pmid", ""),
+                "doi": snippet.get("doi", "")
+            }
     # Final fallback for any remaining placeholders
+    linked_text = re.sub(r'\[source_id\]', "<span style='color:#999999;'>[?]</span>", linked_text)
+    linked_text = re.sub(r'\[PMID:(\d+)\]', r"<span style='color:#999999;'>[?]</span>", linked_text)
+    linked_text = re.sub(r'\[DOI:(10\.\d+\/[^\]]+)\]', r"<span style='color:#999999;'>[?]</span>", linked_text)
+    # Now update source_map to use the numbered format for the sources section
+    numbered_source_map = {}
+    for number, data in numbered_sources.items():
+        numbered_source_map[str(number)] = data
+    return linked_text, numbered_source_map
 # Implement PubMed API integration for medical evidence retrieval
 def fetch_from_pubmed_api(query, max_results=3, api_key=None):
         sources_text = sources_match.group(2).strip()
         # Split into individual sources
         if '\n' in sources_text:
+            # Parse each line as a potential source
+            source_lines = [item.strip() for item in sources_text.split('\n') if item.strip()]
+            # Process source lines to adapt to new numbered format
+            formatted_sources = []
+            for line in source_lines:
+                # Look for numbered source pattern: [1] or 1. or similar
+                if re.match(r'^\d+[\.\)]|^\[\d+\]', line):
+                    formatted_sources.append(line)
+                else:
+                    # If no number detected, just add the source
+                    formatted_sources.append(line)
+            parsed["sources"] = formatted_sources
         else:
             parsed["sources"] = [sources_text]
         parsed["main_response"] = '\n'.join(main_response_lines)
+    # Extract citations in the text - both numbered [1] and PMID/DOI formats
+    # Standard citation formats
+    pmid_doi_citation_matches = re.findall(r'\[(PMID|DOI):([\w\d:\.\/]+)\]', response_text)
+    for match in pmid_doi_citation_matches:
+        citation = f"{match[0]}:{match[1]}"
         if citation not in parsed["sources"]:
             parsed["sources"].append(citation)
+    # Numbered citations like [1]
+    numbered_citation_matches = re.findall(r'\[(\d+)\]', response_text)
+    for num in numbered_citation_matches:
+        if num not in parsed["sources"]:
+            parsed["sources"].append(num)
     return parsed
             evidence_text += """CITATION INSTRUCTIONS:
 1. IMPORTANT: Provide a direct answer first before asking follow-up questions. Even with limited information, give your best assessment.
 2. You MUST cite 2-3 different sources in your response. Use no more than 3 sources and no fewer than 2 sources.
+3. When citing information from these articles, use NUMBERED references [1], [2], [3], etc.
+   Example: "Recent studies have shown improved outcomes with early intervention [1]."
+   Example: "Current guidelines recommend a multidisciplinary approach [2]."
+   Example: "This approach is supported by multiple studies [1][3]."
+   DO NOT use formats like [PMID:123456] or [DOI:10.xxxx/yyyy] in the main text.
 4. Focus on specific details from the abstracts - extract actual findings, statistics, or recommendations.
 5. When multiple sources support a claim, cite all of them for stronger evidence.
+6. Include full citations in your Sources section with the format:
+   [1] PMID: 12345678 - Author et al. (Year). Title. Journal.
+   [2] DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
 7. If the abstracts have conflicting information, acknowledge this and present both perspectives with citations.
 8. Use the most recent sources when available, especially for treatment recommendations.
 9. If full text is available (marked as "Open Access" or "Full Text Available"), prioritize information from those sources as they contain more complete data.
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
+        **IMPORTANT CITATION FORMAT CHANGE: Use numbered references [1], [2], [3] instead of PMID/DOI format**
         **Main Response Structure:**
         1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
+        4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using numbered references [1], [2], [3].
+           Each time you use information from a source, cite it with [1], [2], etc.
+           Example: "Recent studies have shown improved outcomes with early intervention [1]."
+           Example: "This approach is supported by multiple studies [1][2]."
            Use no more than 3 sources and no fewer than 2 sources.
             Do NOT start the first question with asterisks (**). Format each question properly with just a number.
         -   **Reasoning**: Bullet points detailing your clinical reasoning.
             Do NOT start the first point with asterisks (**). Format each bullet point properly.
+        -   **Sources**: A numbered list of all references cited in your main response (2-3 sources), formatted as:
+             [1] PMID: 12345678 - Author et al. (Year). Title. Journal.
+             [2] DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
         **IMPORTANT FORMATTING NOTES:**
+        1. Use numbered citations [1], [2], [3] instead of [PMID:12345678] or [DOI:10.xxxx/yyyy] in the main text.
+        2. In the Sources section, include the full citation details with the PMID or DOI.
+        3. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
+        4. Number the follow-up questions starting from 1, not from any other number.
+        5. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
+        6. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """