Spaces:

Dhruv-Ty
/

chat

Sleeping

App Files Files Community

Dhruv-Ty commited on May 21, 2025

Commit

5db0d9d

verified ·

1 Parent(s): d284c4b

Update src/model.py

Browse files

Files changed (1) hide show

src/model.py +128 -185

src/model.py CHANGED Viewed

@@ -1215,74 +1215,115 @@ def parse_doctor_response(response_text):
     if treatment_match:
         parsed["treatment"] = treatment_match.group(2).strip()
-    # Try to extract follow-up questions
-    follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
-    if follow_up_match:
-        follow_up_text = follow_up_match.group(1).strip()
-        # Remove any leading markdown formatting (like ** for bold)
-        follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
-        # Check if questions are formatted as a list
-        if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
-            # Split on any bullet point marker
-            bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
-            # Remove any empty items and ensure first item is properly formatted
-            questions = []
-            for item in bullet_items:
-                if item.strip():
-                    # Remove any markdown formatting from each item
-                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
-                    questions.append(cleaned_item)
-            parsed["follow_up_questions"] = questions
-        elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
-            # Split on numbered items
-            numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
-            # Clean each item and remove any empty ones
-            questions = []
-            for item in numbered_items:
-                if item.strip():
-                    # Remove any markdown formatting
-                    cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
-                    questions.append(cleaned_item)
-            parsed["follow_up_questions"] = questions
-        else:
-            # Just use the raw text if no clear list format is detected
-            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
-            parsed["follow_up_questions"] = [cleaned_text]
-    # Try to extract reasoning if present
-    reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
-    if reasoning_match:
-        reasoning_text = reasoning_match.group(1).strip()
-        # Remove any leading markdown formatting (like ** for bold)
-        reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
-        # Split into bullet points if present
-        if '\n-' in reasoning_text:
-            # Split by newline + dash, but ensure we don't lose any content
-            reasoning_points = []
-            lines = reasoning_text.split('\n-')
-            # Process the first item which might not have a dash prefix
-            if lines and lines[0].strip():
-                # Clean up any leading/trailing asterisks
-                first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
-                if first_item:
-                    reasoning_points.append(first_item)
-            # Process the rest of the items
-            for i in range(1, len(lines)):
-                if lines[i].strip():
-                    # Clean up leading/trailing asterisks and dashes
-                    cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
-                    if cleaned_item:
-                        reasoning_points.append(cleaned_item)
-            parsed["reasoning"] = reasoning_points
-        else:
-            # If there are no bullet points, still clean up any markdown
-            cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
-            parsed["reasoning"] = [cleaned_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
@@ -1516,89 +1557,31 @@ def orchestrator_chat(history, query, use_rag, is_follow_up=False):
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
         if reasoning:
             if isinstance(reasoning, list):
-                # Add bullet points to reasoning items
                 explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
             else:
-                explanation = reasoning
         # Extract follow-up questions
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
-                # Format as a numbered list starting with 1
-                # Remove any existing numbers or bullets first
-                clean_questions = []
-                for q in questions:
-                    if q.strip():
-                        # Remove any existing numbering or bullets
-                        clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
-                        clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
-                        clean_questions.append(clean_q)
-                # Add proper numbering
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
             else:
-                # If it's a string, split by newlines and format each line as a question
-                question_lines = questions.split('\n')
-                clean_questions = []
-                for q in question_lines:
-                    if q.strip():
-                        # Remove any existing numbering or bullets
-                        clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
-                        clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
-                        clean_questions.append(clean_q)
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
-            # Debug: Print follow-up questions
-            print(f"Follow-up questions generated: {follow_up_questions}")
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
         main_response = parsed_response["main_response"]
-        # Extract reasoning
-        reasoning = parsed_response.get("reasoning", [])
-        if reasoning:
-            if isinstance(reasoning, list):
-                # Add bullet points to reasoning items
-                explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
-            else:
-                # If it's a string, split by newlines and format each line as a bullet point
-                reasoning_lines = reasoning.split('\n')
-                explanation = "\n".join([f"- {r.strip()}" for r in reasoning_lines if r.strip()])
-        # Extract follow-up questions
-        questions = parsed_response.get("follow_up_questions", [])
-        if questions:
-            if isinstance(questions, list):
-                # Clean each question and add proper numbering
-                clean_questions = []
-                for q in questions:
-                    if q.strip():
-                        # Remove any existing numbering or bullets
-                        clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
-                        clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
-                        clean_questions.append(clean_q)
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
-            else:
-                # If it's a string, split by newlines and format each line as a question
-                question_lines = questions.split('\n')
-                clean_questions = []
-                for q in question_lines:
-                    if q.strip():
-                        # Remove any existing numbering or bullets
-                        clean_q = re.sub(r'^\s*\d+\.\s+', '', q.strip())
-                        clean_q = re.sub(r'^\s*[-•*]\s+', '', clean_q)
-                        clean_questions.append(clean_q)
-                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(clean_questions) if q])
-        # Debug: Print follow-up questions
-        print(f"Follow-up questions generated: {follow_up_questions}")
     # Return four values: main response, explanation, follow-up questions, and evidence
     return main_response, explanation, follow_up_questions, evidence_snippets
@@ -1641,26 +1624,11 @@ def run_consultation(use_rag=True):
         print("=" * 30)
         # Ensure explanation is not empty before printing, or print a default message
         if explanation and explanation.strip() and explanation.strip() != "="*50:
-            # Format explanation for display - ensure bullet points
-            if not explanation.startswith("-"):
-                # Split the paragraph by periods to create separate points
-                formatted_explanation = []
-                # Better sentence splitting with regex
-                sentences = re.split(r'(?<=[.!])\s+', explanation)
-                for point in sentences:
-                    if point.strip() and len(point.strip()) > 10:  # Avoid very short fragments
-                        formatted_explanation.append(f"- {point.strip()}" + ("." if not point.strip().endswith((".", "!", "?")) else ""))
-                # If we didn't get good splitting, try a simpler approach
-                if len(formatted_explanation) <= 1 and len(explanation) > 50:
-                    formatted_explanation = []
-                    lines = explanation.split(". ")
-                    for line in lines:
-                        if line.strip():
-                            formatted_explanation.append(f"- {line.strip()}.")
-                explanation = "\n".join(formatted_explanation)
             print(explanation)
         else:
             print("No detailed explanation or sources were generated for this response.")
@@ -1670,38 +1638,13 @@ def run_consultation(use_rag=True):
             print("\n" + "=" * 30)
             print("FOLLOW-UP QUESTIONS")
             print("=" * 30)
-            # Format questions for display - ensure numbering
-            if not follow_up_questions.startswith("1."):
-                # Split paragraph by question marks to separate questions
-                formatted_questions = []
-                i = 1
-                # Modified regex to handle different question formats
-                for q in re.split(r'(?<=[?])\s+', follow_up_questions):
-                    if q.strip():
-                        if not q.strip().endswith("?"):
-                            # If this doesn't end with a question mark, it might be multiple questions
-                            subquestions = re.split(r'(?<=\?)\s+', q)
-                            for sq in subquestions:
-                                if sq.strip():
-                                    formatted_questions.append(f"{i}. {sq.strip()}" + ("?" if not sq.strip().endswith("?") else ""))
-                                    i += 1
-                        else:
-                            formatted_questions.append(f"{i}. {q.strip()}")
-                            i += 1
-                # If we couldn't split properly, try another approach - just add line breaks after each sentence
-                if len(formatted_questions) <= 1 and len(follow_up_questions) > 50:
-                    formatted_questions = []
-                    i = 1
-                    sentences = re.split(r'[.?]\s+', follow_up_questions)
-                    for s in sentences:
-                        if s.strip() and len(s.strip()) > 10:  # Avoid very short fragments
-                            formatted_questions.append(f"{i}. {s.strip()}?")
-                            i += 1
-                follow_up_questions = "\n".join(formatted_questions)
             print(follow_up_questions)
         # Add Open Access Legend if evidence sources were found
         if evidence:
             print("\nLEGEND: 🔓 = Open Access (full text available)")

     if treatment_match:
         parsed["treatment"] = treatment_match.group(2).strip()
+    # Special case handling for "Additional Questions" followed by "Show Reasoning" format
+    if "Additional Questions" in response_text and "Show Reasoning" in response_text:
+        # Extract all content between "Additional Questions" and "Show Reasoning"
+        additional_questions_match = re.search(r'Additional Questions\s*\n\s*\n(.*?)(?=\nShow Reasoning)', response_text, re.DOTALL)
+        if additional_questions_match:
+            questions_text = additional_questions_match.group(1).strip()
+            # If it has numbered questions, parse those
+            if re.search(r'^\d+\.', questions_text):
+                # It's a numbered list - split by the numbers
+                questions = []
+                for q in re.split(r'\d+\.\s+', questions_text):
+                    if q.strip():
+                        questions.append(q.strip())
+                parsed["follow_up_questions"] = questions
+            else:
+                # Try to split by question marks
+                questions = []
+                for q in re.split(r'\?\s+', questions_text):
+                    if q.strip():
+                        questions.append(q.strip() + "?")
+                parsed["follow_up_questions"] = questions
+        # Extract all content after "Show Reasoning"
+        show_reasoning_match = re.search(r'Show Reasoning\s*\n\s*\n(.*?)(?:\n\n|\Z)', response_text, re.DOTALL)
+        if show_reasoning_match:
+            reasoning_text = show_reasoning_match.group(1).strip()
+            # If it has bullet points, parse those
+            if reasoning_text.startswith('-') or '\n-' in reasoning_text:
+                reasoning_points = []
+                for line in reasoning_text.split('\n'):
+                    if line.strip().startswith('-'):
+                        # Remove the bullet point
+                        reasoning_points.append(line.strip()[1:].strip())
+                    elif line.strip():
+                        reasoning_points.append(line.strip())
+                parsed["reasoning"] = reasoning_points
+            else:
+                # Just split by sentences
+                sentences = re.split(r'(?<=[.!])\s+', reasoning_text)
+                parsed["reasoning"] = [s.strip() for s in sentences if s.strip()]
+    else:
+        # Try standard format for follow-up questions
+        follow_up_match = re.search(r'(?i)(?:follow[ -]?up questions|additional questions|clarifying questions):?\s*(.*?)(?:\n\n|\n(?:reasoning|sources):|\Z)', response_text, re.DOTALL)
+        if follow_up_match:
+            follow_up_text = follow_up_match.group(1).strip()
+            # Remove any leading markdown formatting (like ** for bold)
+            follow_up_text = re.sub(r'^\*\*\s*', '', follow_up_text)
+            # Check if questions are formatted as a list
+            if '\n-' in follow_up_text or '\n•' in follow_up_text or '\n*' in follow_up_text:
+                # Split on any bullet point marker
+                bullet_items = re.split(r'\n\s*[-•*]\s*', follow_up_text)
+                # Remove any empty items and ensure first item is properly formatted
+                questions = []
+                for item in bullet_items:
+                    if item.strip():
+                        # Remove any markdown formatting from each item
+                        cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                        questions.append(cleaned_item)
+                parsed["follow_up_questions"] = questions
+            elif '\n1.' in follow_up_text or re.search(r'\n\d+\.', follow_up_text):
+                # Split on numbered items
+                numbered_items = re.split(r'\n\s*\d+\.\s*', follow_up_text)
+                # Clean each item and remove any empty ones
+                questions = []
+                for item in numbered_items:
+                    if item.strip():
+                        # Remove any markdown formatting
+                        cleaned_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', item.strip())
+                        questions.append(cleaned_item)
+                parsed["follow_up_questions"] = questions
+            else:
+                # Just use the raw text if no clear list format is detected
+                cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', follow_up_text)
+                parsed["follow_up_questions"] = [cleaned_text]
+        # Try standard format for reasoning
+        reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\n(?:sources|follow)|\Z)', response_text, re.DOTALL)
+        if reasoning_match:
+            reasoning_text = reasoning_match.group(1).strip()
+            # Remove any leading markdown formatting (like ** for bold)
+            reasoning_text = re.sub(r'^\*\*\s*', '', reasoning_text)
+            # Split into bullet points if present
+            if '\n-' in reasoning_text:
+                # Split by newline + dash, but ensure we don't lose any content
+                reasoning_points = []
+                lines = reasoning_text.split('\n-')
+                # Process the first item which might not have a dash prefix
+                if lines and lines[0].strip():
+                    # Clean up any leading/trailing asterisks
+                    first_item = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', lines[0].strip())
+                    if first_item:
+                        reasoning_points.append(first_item)
+                # Process the rest of the items
+                for i in range(1, len(lines)):
+                    if lines[i].strip():
+                        # Clean up leading/trailing asterisks and dashes
+                        cleaned_item = re.sub(r'^\s*[-*]*\s*|\s*\*\*\s*$', '', lines[i].strip())
+                        if cleaned_item:
+                            reasoning_points.append(cleaned_item)
+                parsed["reasoning"] = reasoning_points
+            else:
+                # If there are no bullet points, still clean up any markdown
+                cleaned_text = re.sub(r'^\s*\*\*\s*|\s*\*\*\s*$', '', reasoning_text)
+                parsed["reasoning"] = [cleaned_text]
     # Extract sources/references
     sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
         # Extract reasoning for display
         reasoning = parsed_response.get("reasoning", [])
         if reasoning:
             if isinstance(reasoning, list):
+                # Join with bullet points if it's a list
                 explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])
             else:
+                # It's a single string - format as bullet points
+                explanation = f"- {reasoning.strip()}"
         # Extract follow-up questions
         questions = parsed_response.get("follow_up_questions", [])
         if questions:
             if isinstance(questions, list):
+                # Format as a numbered list
+                follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])
             else:
+                # It's a single string - format as a single question
+                follow_up_questions = f"1. {questions.strip()}"
     else:
         # If RAG is disabled, just parse the response without source processing
         parsed_response = parse_doctor_response(response)
         main_response = parsed_response["main_response"]
+                # Extract reasoning        reasoning = parsed_response.get("reasoning", [])                if reasoning:            if isinstance(reasoning, list):                # Join with bullet points if it's a list                explanation = "\n".join([f"- {r.strip()}" for r in reasoning if r.strip()])            else:                # It's a single string - format as bullet points                explanation = f"- {reasoning.strip()}"                        # Extract follow-up questions        questions = parsed_response.get("follow_up_questions", [])                if questions:            if isinstance(questions, list):                # Format as a numbered list                follow_up_questions = "\n".join([f"{i+1}. {q.strip()}" for i, q in enumerate(questions) if q.strip()])            else:                # It's a single string - format as a single question                follow_up_questions = f"1. {questions.strip()}"
     # Return four values: main response, explanation, follow-up questions, and evidence
     return main_response, explanation, follow_up_questions, evidence_snippets
         print("=" * 30)
         # Ensure explanation is not empty before printing, or print a default message
         if explanation and explanation.strip() and explanation.strip() != "="*50:
+            # Format explanation as bullet points if it's not already
+            if isinstance(explanation, list):
+                explanation = "\n".join([f"- {point}" for point in explanation])
+            elif not explanation.startswith("-"):
+                explanation = "\n".join([f"- {point}" for point in explanation.split("\n")])
             print(explanation)
         else:
             print("No detailed explanation or sources were generated for this response.")
             print("\n" + "=" * 30)
             print("FOLLOW-UP QUESTIONS")
             print("=" * 30)
+            # Format questions as numbered list if it's not already
+            if isinstance(follow_up_questions, list):
+                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions)])
+            elif not follow_up_questions.startswith("1."):
+                follow_up_questions = "\n".join([f"{i+1}. {q}" for i, q in enumerate(follow_up_questions.split("\n"))])
             print(follow_up_questions)
         # Add Open Access Legend if evidence sources were found
         if evidence:
             print("\nLEGEND: 🔓 = Open Access (full text available)")