Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 21, 2025

Commit

1689af9

verified ·

1 Parent(s): 7d8679d

** formatting fix

Browse files

Files changed (1) hide show

src/model.py +53 -14

src/model.py CHANGED Viewed

@@ -1219,40 +1219,70 @@ def parse_doctor_response(response_text):
     follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
     if follow_up_match:
         follow_up_text = follow_up_match.group(1).strip()
         # Check if questions are formatted as a list
         if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
             # Split on any bullet point marker
             bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
             # Remove any empty items and ensure first item is properly formatted
-            questions = [item.strip() for item in bullet_items if item.strip()]
-            # The first item might not start with a bullet point
             parsed["follow_up_questions"] = questions
         elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
             # Split on numbered items
             numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
-            # Remove any empty items
-            parsed["follow_up_questions"] = [item.strip() for item in numbered_items if item.strip()]
         else:
             # Just use the raw text if no clear list format is detected
-            parsed["follow_up_questions"] = [follow_up_text]
     # Try to extract reasoning if present
     reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
     if reasoning_match:
         reasoning_text = reasoning_match.group(1).strip()
         # Split into bullet points if present
         if '\n-' in reasoning_text:
             # Split by newline + dash, but ensure we don't lose any content
             reasoning_points = []
-            for item in reasoning_text.split('\n-'):
-                if item.strip():
-                    # Clean up any dash at the beginning
-                    cleaned_item = item.lstrip('- ').strip()
                     if cleaned_item:
                         reasoning_points.append(cleaned_item)
             parsed["reasoning"] = reasoning_points
         else:
-            parsed["reasoning"] = [reasoning_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1403,7 +1433,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         **After your main response, ALWAYS include these sections:**
         -   **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
-        -   **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first point is properly formatted.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
@@ -1414,7 +1446,8 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
         2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
         3. Number the follow-up questions starting from 1, not from any other number.
-        4. For the reasoning section, make sure the first bullet point is properly formatted.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
@@ -1435,12 +1468,15 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         **After your main response, ALWAYS include these sections:**
         -   **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
-        -   **Reasoning**: Bullet points detailing your clinical reasoning. Make sure the first bullet point is properly formatted.
         **IMPORTANT FORMATTING NOTES:**
         1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
         2. Number the follow-up questions starting from 1, not from any other number.
-        3. For the reasoning section, make sure the first bullet point is properly formatted.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
@@ -1453,6 +1489,9 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
     # Remove "Direct Answer:" prefix if it appears
     response = re.sub(r'^Direct Answer:\s*', '', response)
     # Extract and process sources
     explanation = None

     follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
     if follow_up_match:
         follow_up_text = follow_up_match.group(1).strip()
+        # Remove any leading markdown formatting (like ** for bold)
+        follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
         # Check if questions are formatted as a list
         if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
             # Split on any bullet point marker
             bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
             # Remove any empty items and ensure first item is properly formatted
+            questions = []
+            for item in bullet_items:
+                if item.strip():
+                    # Remove any markdown formatting from each item
+                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                    questions.append(cleaned_item)
             parsed["follow_up_questions"] = questions
         elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
             # Split on numbered items
             numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
+            # Clean each item and remove any empty ones
+            questions = []
+            for item in numbered_items:
+                if item.strip():
+                    # Remove any markdown formatting
+                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                    questions.append(cleaned_item)
+            parsed["follow_up_questions"] = questions
         else:
             # Just use the raw text if no clear list format is detected
+            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
+            parsed["follow_up_questions"] = [cleaned_text]
     # Try to extract reasoning if present
     reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
     if reasoning_match:
         reasoning_text = reasoning_match.group(1).strip()
+        # Remove any leading markdown formatting (like ** for bold)
+        reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
         # Split into bullet points if present
         if '\n-' in reasoning_text:
             # Split by newline + dash, but ensure we don't lose any content
             reasoning_points = []
+            lines = reasoning_text.split('\n-')
+            # Process the first item which might not have a dash prefix
+            if lines and lines[0].strip():
+                # Clean up any leading/trailing asterisks
+                first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
+                if first_item:
+                    reasoning_points.append(first_item)
+            # Process the rest of the items
+            for i in range(1, len(lines)):
+                if lines[i].strip():
+                    # Clean up leading/trailing asterisks and dashes
+                    cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
                     if cleaned_item:
                         reasoning_points.append(cleaned_item)
             parsed["reasoning"] = reasoning_points
         else:
+            # If there are no bullet points, still clean up any markdown
+            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
+            parsed["reasoning"] = [cleaned_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
         **After your main response, ALWAYS include these sections:**
         -   **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
+            Do NOT start the first question with asterisks (**). Format each question properly with just a number.
+        -   **Reasoning**: Bullet points detailing your clinical reasoning.
+            Do NOT start the first point with asterisks (**). Format each bullet point properly.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
         1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
         2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
         3. Number the follow-up questions starting from 1, not from any other number.
+        4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
+        5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
         **After your main response, ALWAYS include these sections:**
         -   **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
+            Do NOT start the first question with asterisks (**). Format each question properly with just a number.
+        -   **Reasoning**: Bullet points detailing your clinical reasoning.
+            Do NOT start the first bullet point with asterisks (**). Format each point properly.
         **IMPORTANT FORMATTING NOTES:**
         1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
         2. Number the follow-up questions starting from 1, not from any other number.
+        3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
+        4. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
     # Remove "Direct Answer:" prefix if it appears
     response = re.sub(r'^Direct Answer:\s*', '', response)
+    # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
+    response = re.sub(r'\n\s*\*\*\s*', '\n', response)
     # Extract and process sources
     explanation = None