Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 21, 2025

Commit

748fd1d

verified ·

1 Parent(s): 5db0d9d

revert

Browse files

Files changed (1) hide show

src/model.py +110 -151

src/model.py CHANGED Viewed

@@ -1215,115 +1215,74 @@ def parse_doctor_response(response_text):
     if treatment_match:
         parsed["treatment"] = treatment_match.group(2).strip()
-    # Special case handling for "Additional Questions" followed by "Show Reasoning" format
-    if "Additional Questions" in response_text and "Show Reasoning" in response_text:
-        # Extract all content between "Additional Questions" and "Show Reasoning"
-        additional_questions_match = re.search(r'Additional Questions\s*\n\s*\n(.*?)(?=\nShow Reasoning)', response_text, re.DOTALL)
-        if additional_questions_match:
-            questions_text = additional_questions_match.group(1).strip()
-            # If it has numbered questions, parse those
-            if re.search(r'^\d+\.', questions_text):
-                # It's a numbered list - split by the numbers
-                questions = []
-                for q in re.split(r'\d+\.\s+', questions_text):
-                    if q.strip():
-                        questions.append(q.strip())
-                parsed["follow_up_questions"] = questions
-            else:
-                # Try to split by question marks
-                questions = []
-                for q in re.split(r'\?\s+', questions_text):
-                    if q.strip():
-                        questions.append(q.strip() + "?")
-                parsed["follow_up_questions"] = questions
-        # Extract all content after "Show Reasoning"
-        show_reasoning_match = re.search(r'Show Reasoning\s*\n\s*\n(.*?)(?:\n\n|\Z)', response_text, re.DOTALL)
-        if show_reasoning_match:
-            reasoning_text = show_reasoning_match.group(1).strip()
-            # If it has bullet points, parse those
-            if reasoning_text.startswith('-') or '\n-' in reasoning_text:
-                reasoning_points = []
-                for line in reasoning_text.split('\n'):
-                    if line.strip().startswith('-'):
-                        # Remove the bullet point
-                        reasoning_points.append(line.strip()[1:].strip())
-                    elif line.strip():
-                        reasoning_points.append(line.strip())
-                parsed["reasoning"] = reasoning_points
-            else:
-                # Just split by sentences
-                sentences = re.split(r'(?<=[.!])\s+', reasoning_text)
-                parsed["reasoning"] = [s.strip() for s in sentences if s.strip()]
-    else:
-        # Try standard format for follow-up questions
-        follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
-        if follow_up_match:
-            follow_up_text = follow_up_match.group(1).strip()
-            # Remove any leading markdown formatting (like ** for bold)
-            follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
-            # Check if questions are formatted as a list
-            if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
-                # Split on any bullet point marker
-                bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
-                # Remove any empty items and ensure first item is properly formatted
-                questions = []
-                for item in bullet_items:
-                    if item.strip():
-                        # Remove any markdown formatting from each item
-                        cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
-                        questions.append(cleaned_item)
-                parsed["follow_up_questions"] = questions
-            elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
-                # Split on numbered items
-                numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
-                # Clean each item and remove any empty ones
-                questions = []
-                for item in numbered_items:
-                    if item.strip():
-                        # Remove any markdown formatting
-                        cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
-                        questions.append(cleaned_item)
-                parsed["follow_up_questions"] = questions
-            else:
-                # Just use the raw text if no clear list format is detected
-                cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
-                parsed["follow_up_questions"] = [cleaned_text]
-        # Try standard format for reasoning
-        reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
-        if reasoning_match:
-            reasoning_text = reasoning_match.group(1).strip()
-            # Remove any leading markdown formatting (like ** for bold)
-            reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
-            # Split into bullet points if present
-            if '\n-' in reasoning_text:
-                # Split by newline + dash, but ensure we don't lose any content
-                reasoning_points = []
-                lines = reasoning_text.split('\n-')
-                # Process the first item which might not have a dash prefix
-                if lines and lines[0].strip():
-                    # Clean up any leading/trailing asterisks
-                    first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
-                    if first_item:
-                        reasoning_points.append(first_item)
-                # Process the rest of the items
-                for i in range(1, len(lines)):
-                    if lines[i].strip():
-                        # Clean up leading/trailing asterisks and dashes
-                        cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
-                        if cleaned_item:
-                            reasoning_points.append(cleaned_item)
-                parsed["reasoning"] = reasoning_points
-            else:
-                # If there are no bullet points, still clean up any markdown
-                cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
-                parsed["reasoning"] = [cleaned_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1473,12 +1432,10 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
            Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
-        -   **Follow-up Questions**: List your questions as plain text without any numbering or bullet points.
-            DO NOT include numbers like "1." or bullets like "•" or "-" before your questions.
-            Each question should be on a new line.
-        -   **Reasoning**: List your reasoning as plain text without any bullet points.
-            DO NOT include bullets like "•" or "-" before your points.
-            Each point should be on a new line.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
@@ -1487,9 +1444,10 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         **IMPORTANT FORMATTING NOTES:**
         1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
-        2. DO NOT add any numbering to your follow-up questions or bullet points to your reasoning - these will be added automatically by the system.
-        3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
-        4. Make sure all text is clean, with no markdown formatting, no numbers, and no bullet points.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
@@ -1509,17 +1467,16 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         3. Recommendations for a treatment plan or next steps.
         **After your main response, ALWAYS include these sections:**
-        -   **Follow-up Questions**: List your questions as plain text without any numbering or bullet points.
-            DO NOT include numbers like "1." or bullets like "•" or "-" before your questions.
-            Each question should be on a new line.
-        -   **Reasoning**: List your reasoning as plain text without any bullet points.
-            DO NOT include bullets like "•" or "-" before your points.
-            Each point should be on a new line.
         **IMPORTANT FORMATTING NOTES:**
-        1. DO NOT add any numbering to your follow-up questions or bullet points to your reasoning - these will be added automatically by the system.
-        2. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
-        3. Make sure all text is clean, with no markdown formatting, no numbers, and no bullet points.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
@@ -1535,10 +1492,6 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
     # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
     response = re.sub(r'\n\s*\*\*\s*', '\n', response)
-    # Remove any numbering or bullets the model might have included
-    response = re.sub(r'\n\s*\d+\.\s+', '\n', response)  # Remove numbered lists
-    response = re.sub(r'\n\s*[-•*]\s+', '\n', response)  # Remove bullet points
     # Extract and process sources
     explanation = None
@@ -1557,31 +1510,47 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
         if reasoning:
             if isinstance(reasoning, list):
-                # Join with bullet points if it's a list
-                explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
             else:
-                # It's a single string - format as bullet points
-                explanation = f"- {reasoning.strip()}"
         # Extract follow-up questions
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
-                # Format as a numbered list
-                follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])
             else:
-                # It's a single string - format as a single question
-                follow_up_questions = f"1. {questions.strip()}"
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
         main_response = parsed_response["main_response"]
-                # Extract reasoning        reasoning = parsed_response.get("reasoning", [])                if reasoning:            if isinstance(reasoning, list):                # Join with bullet points if it's a list                explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])            else:                # It's a single string - format as bullet points                explanation = f"- {reasoning.strip()}"                        # Extract follow-up questions        questions = parsed_response.get("follow_up_questions", [])                if questions:            if isinstance(questions, list):                # Format as a numbered list                follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])            else:                # It's a single string - format as a single question                follow_up_questions = f"1. {questions.strip()}"
     # Return four values: main response, explanation, follow-up questions, and evidence
     return main_response, explanation, follow_up_questions, evidence_snippets
@@ -1624,11 +1593,6 @@ def run_consultation(use_rag=True):
         print("=" * 30)
         # Ensure explanation is not empty before printing, or print a default message
         if explanation and explanation.strip() and explanation.strip() != "="*50:
-            # Format explanation as bullet points if it's not already
-            if isinstance(explanation, list):
-                explanation = "\n".join([f"- {point}" for point in explanation])
-            elif not explanation.startswith("-"):
-                explanation = "\n".join([f"- {point}" for point in explanation.split("\n")])
             print(explanation)
         else:
             print("No detailed explanation or sources were generated for this response.")
@@ -1638,17 +1602,12 @@ def run_consultation(use_rag=True):
             print("\n" + "=" * 30)
             print("FOLLOW-UP QUESTIONS")
             print("=" * 30)
-            # Format questions as numbered list if it's not already
-            if isinstance(follow_up_questions, list):
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions)])
-            elif not follow_up_questions.startswith("1."):
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions.split("\n"))])
             print(follow_up_questions)
         # Add Open Access Legend if evidence sources were found
         if evidence:
             print("\nLEGEND: 🔓 = Open Access (full text available)")
         # Check if we need to continue with follow-up or start a new case
         next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")

     if treatment_match:
         parsed["treatment"] = treatment_match.group(2).strip()
+    # Try to extract follow-up questions
+    follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
+    if follow_up_match:
+        follow_up_text = follow_up_match.group(1).strip()
+        # Remove any leading markdown formatting (like ** for bold)
+        follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
+        # Check if questions are formatted as a list
+        if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
+            # Split on any bullet point marker
+            bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
+            # Remove any empty items and ensure first item is properly formatted
+            questions = []
+            for item in bullet_items:
+                if item.strip():
+                    # Remove any markdown formatting from each item
+                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                    questions.append(cleaned_item)
+            parsed["follow_up_questions"] = questions
+        elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
+            # Split on numbered items
+            numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
+            # Clean each item and remove any empty ones
+            questions = []
+            for item in numbered_items:
+                if item.strip():
+                    # Remove any markdown formatting
+                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                    questions.append(cleaned_item)
+            parsed["follow_up_questions"] = questions
+        else:
+            # Just use the raw text if no clear list format is detected
+            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
+            parsed["follow_up_questions"] = [cleaned_text]
+    # Try to extract reasoning if present
+    reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
+    if reasoning_match:
+        reasoning_text = reasoning_match.group(1).strip()
+        # Remove any leading markdown formatting (like ** for bold)
+        reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
+        # Split into bullet points if present
+        if '\n-' in reasoning_text:
+            # Split by newline + dash, but ensure we don't lose any content
+            reasoning_points = []
+            lines = reasoning_text.split('\n-')
+            # Process the first item which might not have a dash prefix
+            if lines and lines[0].strip():
+                # Clean up any leading/trailing asterisks
+                first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
+                if first_item:
+                    reasoning_points.append(first_item)
+            # Process the rest of the items
+            for i in range(1, len(lines)):
+                if lines[i].strip():
+                    # Clean up leading/trailing asterisks and dashes
+                    cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
+                    if cleaned_item:
+                        reasoning_points.append(cleaned_item)
+            parsed["reasoning"] = reasoning_points
+        else:
+            # If there are no bullet points, still clean up any markdown
+            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
+            parsed["reasoning"] = [cleaned_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
            Use no more than 3 sources and no fewer than 2 sources.
         **After your main response, ALWAYS include these sections:**
+        -   **Follow-up Questions**: Specific numbered questions starting from 1, not bullets.
+            Do NOT start the first question with asterisks (**). Format each question properly with just a number.
+        -   **Reasoning**: Bullet points detailing your clinical reasoning.
+            Do NOT start the first point with asterisks (**). Format each bullet point properly.
         -   **Sources**: A list of all references cited in your main response (2-3 sources), formatted as:
              - PMID: 12345678 - Author et al. (Year). Title. Journal.
                URL: https://pubmed.ncbi.nlm.nih.gov/12345678/
         **IMPORTANT FORMATTING NOTES:**
         1. Do NOT include technical information like URLs, PMIDs or DOIs in the main answer - these belong in the Sources section only.
+        2. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
+        3. Number the follow-up questions starting from 1, not from any other number.
+        4. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
+        5. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Only cite sources that were provided in the evidence. Do not fabricate references, PMIDs, or DOIs.
         """
         3. Recommendations for a treatment plan or next steps.
         **After your main response, ALWAYS include these sections:**
+        -   **Follow-up Questions**: Specific questions to gather additional information, numbered starting from 1 (not bullet points).
+            Do NOT start the first question with asterisks (**). Format each question properly with just a number.
+        -   **Reasoning**: Bullet points detailing your clinical reasoning.
+            Do NOT start the first bullet point with asterisks (**). Format each point properly.
         **IMPORTANT FORMATTING NOTES:**
+        1. For follow-up questions, use numbered format (1. 2. 3.) not bullet points.
+        2. Number the follow-up questions starting from 1, not from any other number.
+        3. NEVER use markdown formatting like ** (asterisks) at the beginning of any points, questions, or lines.
+        4. Make sure all bullet points and numbered items are clean, with no markdown formatting.
         IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
         """
     # Remove any markdown formatting (** for bold) that might appear at the beginning of lines
     response = re.sub(r'\n\s*\*\*\s*', '\n', response)
     # Extract and process sources
     explanation = None
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
         if reasoning:
             if isinstance(reasoning, list):
+                explanation = "\n".join([f"- {r}" for r in reasoning])
             else:
+                explanation = reasoning
         # Extract follow-up questions
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
+                # Format as a numbered list starting with 1
+                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
             else:
+                follow_up_questions = questions
+            # Debug: Print follow-up questions
+            print(f"Follow-up questions generated: {follow_up_questions}")
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
         main_response = parsed_response["main_response"]
+        # Extract reasoning
+        reasoning = parsed_response.get("reasoning", [])
+        if reasoning:
+            if isinstance(reasoning, list):
+                explanation = "\n".join([f"- {r}" for r in reasoning])
+            else:
+                explanation = reasoning
+        # Extract follow-up questions
+        questions = parsed_response.get("follow_up_questions", [])
+        if questions:
+            if isinstance(questions, list):
+                # Format as a numbered list starting with 1
+                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions) if q])
+            else:
+                follow_up_questions = questions
+            # Debug: Print follow-up questions
+        print(f"Follow-up questions generated: {follow_up_questions}")
     # Return four values: main response, explanation, follow-up questions, and evidence
     return main_response, explanation, follow_up_questions, evidence_snippets
         print("=" * 30)
         # Ensure explanation is not empty before printing, or print a default message
         if explanation and explanation.strip() and explanation.strip() != "="*50:
             print(explanation)
         else:
             print("No detailed explanation or sources were generated for this response.")
             print("\n" + "=" * 30)
             print("FOLLOW-UP QUESTIONS")
             print("=" * 30)
             print(follow_up_questions)
         # Add Open Access Legend if evidence sources were found
         if evidence:
             print("\nLEGEND: 🔓 = Open Access (full text available)")
         # Check if we need to continue with follow-up or start a new case
         next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")