Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 21, 2025

Commit

7d8679d

verified ·

1 Parent(s): 1cf15d0

formatting fixes

Browse files

Files changed (1) hide show

src/model.py +69 -19

src/model.py CHANGED Viewed

@@ -1192,6 +1192,9 @@ def fetch_medical_evidence(query, max_results=3):
 # Function to parse doctor agent responses
 def parse_doctor_response(response_text):
     """Parse the doctor agent's response into structured components"""
     # Initialize structure
     parsed = {
         "main_response": response_text,
@@ -1220,8 +1223,10 @@ def parse_doctor_response(response_text):
         if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
             # Split on any bullet point marker
             bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
-            # Remove any empty items
-            parsed["follow_up_questions"] = [item.strip() for item in bullet_items if item.strip()]
         elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
             # Split on numbered items
             numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
@@ -1237,10 +1242,15 @@ def parse_doctor_response(response_text):
         reasoning_text = reasoning_match.group(1).strip()
         # Split into bullet points if present
         if '\n-' in reasoning_text:
-            parsed["reasoning"] = [item.strip() for item in reasoning_text.split('\n-') if item.strip()]
-            # Clean up first item which might not have a dash
-            if parsed["reasoning"]:
-                parsed["reasoning"][0] = parsed["reasoning"][0].lstrip('- ')
         else:
             parsed["reasoning"] = [reasoning_text]
@@ -1254,6 +1264,30 @@ def parse_doctor_response(response_text):
         else:
             parsed["sources"] = [sources_text]
     # Extract citations in the text (format: [source_id])
     citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
     for citation in citation_matches:
@@ -1352,13 +1386,13 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         output_instructions = """
         Please structure your response clearly.
         **Priority 1: Direct Answer First**
-        Begin by providing your best assessment based on the available information. Even if the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
         **Main Response Structure:**
-        1. A direct answer to the patient's concerns.
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
         4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
@@ -1366,15 +1400,22 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
            • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
            Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
-        -   **Follow-up Questions**: Specific questions to gather additional information.
-        -   **Reasoning**: Bullet points detailing your clinical reasoning.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
              - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
                URL: https://doi.org/10.xxxx/yyyy
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
     else:
@@ -1382,19 +1423,25 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         output_instructions = """
         Please structure your response clearly.
         **Priority 1: Direct Answer First**
-        Begin by providing your best assessment based on the available information. Even if the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
         **Main Response Structure:**
-        1. A direct answer to the patient's concerns.
         2. If appropriate, a clear diagnosis or differential diagnosis.
         3. Recommendations for a treatment plan or next steps.
         **After your main response, ALWAYS include these sections:**
-        -   **Follow-up Questions**: Specific questions to gather additional information.
-        -   **Reasoning**: Bullet points detailing your clinical reasoning.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
@@ -1404,6 +1451,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
     # Get response from doctor agent
     response = doctor_agent(msgs)
     # Extract and process sources
     explanation = None
     evidence = None
@@ -1417,7 +1467,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         parsed_response = parse_doctor_response(linked_response)
         # Get the main response
-        main_response = linked_response
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
@@ -1431,7 +1481,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
-                # Format as a numbered list
                 follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
             else:
                 follow_up_questions = questions
@@ -1441,7 +1491,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
-        main_response = response
         # Extract reasoning
         reasoning = parsed_response.get("reasoning", [])
@@ -1455,7 +1505,7 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
-                # Format as a numbered list
                 follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
             else:
                 follow_up_questions = questions

 # Function to parse doctor agent responses
 def parse_doctor_response(response_text):
     """Parse the doctor agent's response into structured components"""
+    # First, remove "Direct Answer:" prefix that might appear at the beginning of the response
+    response_text = re.sub(r'^Direct Answer:\s*', '', response_text)
     # Initialize structure
     parsed = {
         "main_response": response_text,
         if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
             # Split on any bullet point marker
             bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
+            # Remove any empty items and ensure first item is properly formatted
+            questions = [item.strip() for item in bullet_items if item.strip()]
+            # The first item might not start with a bullet point
+            parsed["follow_up_questions"] = questions
         elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
             # Split on numbered items
             numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
         reasoning_text = reasoning_match.group(1).strip()
         # Split into bullet points if present
         if '\n-' in reasoning_text:
+            # Split by newline + dash, but ensure we don't lose any content
+            reasoning_points = []
+            for item in reasoning_text.split('\n-'):
+                if item.strip():
+                    # Clean up any dash at the beginning
+                    cleaned_item = item.lstrip('- ').strip()
+                    if cleaned_item:
+                        reasoning_points.append(cleaned_item)
+            parsed["reasoning"] = reasoning_points
         else:
             parsed["reasoning"] = [reasoning_text]
         else:
             parsed["sources"] = [sources_text]
+    # Clean up the main response - remove URLs, PMIDs and DOIs from the text if they're already in the sources section
+    if parsed["sources"]:
+        # Remove URL lines
+        main_response_lines = []
+        skip_lines = 0
+        for line in parsed["main_response"].split('\n'):
+            if skip_lines > 0:
+                skip_lines -= 1
+                continue
+            # Skip lines with just URLs
+            if re.match(r'^URL:\s*https?://', line.strip()):
+                skip_lines = 0
+                continue
+            # Skip lines with PMIDs or DOIs being displayed alone
+            if re.match(r'^(PMID|DOI):', line.strip()):
+                skip_lines = 0
+                continue
+            main_response_lines.append(line)
+        parsed["main_response"] = '\n'.join(main_response_lines)
     # Extract citations in the text (format: [source_id])
     citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
     for citation in citation_matches:
         output_instructions = """
         Please structure your response clearly.
         **Priority 1: Direct Answer First**
+        Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
         **Main Response Structure:**
+        1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
         2. If appropriate, a clear diagnosis or differential diagnosis with likelihood assessments.
         3. Recommendations for a treatment plan or next steps.
         4. IMPORTANT: You MUST cite between 2-3 different medical evidence sources using either:
            • [DOI:10.xxxx/yyyy] format for Europe PMC articles without PMID
            Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
+        -   **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
+        -   **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first point is properly formatted.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
              - DOI: 10.xxxx/yyyy - Author et al. (Year). Title. Journal.
                URL: https://doi.org/10.xxxx/yyyy
+        **IMPORTANT FORMATTING NOTES:**
+        1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
+        2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
+        3. Number the follow-up questions starting from 1, not from any other number.
+        4. For the reasoning section, make sure the first bullet point is properly formatted.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
     else:
         output_instructions = """
         Please structure your response clearly.
         **Priority 1: Direct Answer First**
+        Begin by providing your best assessment based on the available information without using "Direct Answer:" as a heading. Just start your response directly with the answer. If the query lacks some details, offer your initial thoughts based on what is known, while acknowledging areas of uncertainty.
         **Priority 2: Follow-up Questions**
         After your direct answer, include a clearly labeled "Follow-up Questions:" section with specific questions that would help refine your assessment.
         **Main Response Structure:**
+        1. A direct answer to the patient's concerns WITHOUT the heading "Direct Answer:".
         2. If appropriate, a clear diagnosis or differential diagnosis.
         3. Recommendations for a treatment plan or next steps.
         **After your main response, ALWAYS include these sections:**
+        -   **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
+        -   **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first bullet point is properly formatted.
+        **IMPORTANT FORMATTING NOTES:**
+        1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
+        2. Number the follow-up questions starting from 1, not from any other number.
+        3. For the reasoning section, make sure the first bullet point is properly formatted.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
     # Get response from doctor agent
     response = doctor_agent(msgs)
+    # Remove "Direct Answer:" prefix if it appears
+    response = re.sub(r'^Direct Answer:\s*', '', response)
     # Extract and process sources
     explanation = None
     evidence = None
         parsed_response = parse_doctor_response(linked_response)
         # Get the main response
+        main_response = parsed_response["main_response"]
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
+                # Format as a numbered list starting with 1
                 follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
             else:
                 follow_up_questions = questions
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
+        main_response = parsed_response["main_response"]
         # Extract reasoning
         reasoning = parsed_response.get("reasoning", [])
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
+                # Format as a numbered list starting with 1
                 follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
             else:
                 follow_up_questions = questions