Spaces:

yasserrmd
/

SinaReason

Sleeping

App Files Files Community

yasserrmd commited on Oct 2, 2025

Commit

188b27c

verified ·

1 Parent(s): 2e924a6

Update app.py

Browse files

Files changed (1) hide show

app.py +261 -104

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from threading import Thread
 import re
 from typing import Iterator, List, Tuple
 import spaces
-# --- Model Configuration ---
-# Using the recommended base model for this setup.
-# The original model name seems to be a fine-tuned version, but the tokenizer should come from the base.
 MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
-TOKENIZER_NAME = "mistralai/Magistral-7B-v0.3" # Use the base model's tokenizer for compatibility
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# --- Medical System Prompt ---
-# As recommended by the model card.
 MEDICAL_SYSTEM_PROMPT = """
 You are SinaReason, a medical reasoning assistant for educational and clinical support.
 Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
@@ -30,74 +33,104 @@ Your thinking process must follow the template below:[THINK]Your thoughts or/and
 class SinaReasonMedicalChat:
     def __init__(self):
-        """Initializes the tokenizer and model."""
         self.tokenizer = None
         self.model = None
         self.load_model()
     def load_model(self):
-        """Load the SinaReason medical model and tokenizer."""
         try:
-            print(f"Loading tokenizer from: {TOKENIZER_NAME}")
-            self.tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-            # Add padding token if it's missing
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
-            print(f"Loading medical model: {MODEL_NAME}")
-            self.model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
-                torch_dtype=torch.bfloat16, # Use bfloat16 for better performance on modern GPUs
-                device_map="auto"
             )
-            print("SinaReason medical model loaded successfully!")
         except Exception as e:
             print(f"Error loading model: {e}")
             raise e
     def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
-        """Extracts thinking process and the clinical response from the full text."""
-        think_pattern = r'\[THINK\](.*?)\[/THINK\]'
-        match = re.search(think_pattern, text, re.DOTALL)
         if match:
             thinking = match.group(1).strip()
-            # Remove the entire thinking block to get the response
-            response = re.sub(think_pattern, "", text, flags=re.DOTALL).strip()
-            return thinking, response
-        else:
-            # If the thinking block is not yet complete or present, treat the whole text as thinking
-            return text, ""
     @spaces.GPU(duration=120)
     def medical_chat_stream(self, message: str, history: List[List[str]], max_tokens: int = 1024,
-                            temperature: float = 0.7, top_p: float = 0.95) -> Iterator[Tuple[str, List[List[str]]]]:
-        """Streams medical reasoning responses, smoothly updating the thinking and summary sections."""
-        self.model.eval()
         if not message.strip():
             return
         # Apply the chat template with the medical system prompt
         messages = [
-            {"role": "system", "content": MEDICAL_SYSTEM_PROMPT},
         ]
         # Add conversation history
         for user_msg, assistant_msg in history:
-            # We need to extract the raw model output from the formatted HTML
-            raw_assistant_msg = re.sub(r'<.*?>', '', assistant_msg).replace("🧠 **Medical Reasoning Process**", "").replace("---", "").replace("🩺 **Clinical Summary**", "").strip()
             messages.append({"role": "user", "content": user_msg})
-            messages.append({"role": "assistant", "content": raw_assistant_msg})
         messages.append({"role": "user", "content": message})
-        # Apply chat template and tokenize
-        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False)
-        inputs = self.tokenizer(text=prompt, return_tensors="pt").to(DEVICE)
-        # Setup streamer for handling the model's output token by token
         streamer = TextIteratorStreamer(
             self.tokenizer,
             timeout=30.0,
@@ -105,7 +138,7 @@ class SinaReasonMedicalChat:
             skip_special_tokens=True
         )
-        # Generation parameters for medical reasoning
         generation_kwargs = {
             **inputs,
             "max_new_tokens": max_tokens,
@@ -117,64 +150,121 @@ class SinaReasonMedicalChat:
             "repetition_penalty": 1.1
         }
-        # Run model generation in a separate thread to avoid blocking the UI
-        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
         thread.start()
-        # Stream the response back to the UI
         partial_response = ""
         for new_token in streamer:
             partial_response += new_token
-            thinking_content, clinical_content = self.extract_thinking_and_response(partial_response)
-            # Create a single, consistently formatted HTML block that updates smoothly
-            formatted_display = f"""🧠 **Medical Reasoning Process**
-<details open>
-<summary>🔍 Click to view detailed thinking process</summary>
-<p style="white-space: pre-wrap;">*{thinking_content}*</p>
-</details>
-<hr>
-🩺 **Clinical Summary**
-<p style="white-space: pre-wrap;">{clinical_content}</p>
-"""
-            # Update the history and yield the change to the Gradio interface
-            new_history = history + [[message, formatted_display]]
-            yield "", new_history
-# --- Gradio Interface ---
 # Initialize the medical chat model
 medical_chat_model = SinaReasonMedicalChat()
 def respond(message, history, max_tokens, temperature, top_p):
-    """Gradio response function wrapper."""
-    yield from medical_chat_model.medical_chat_stream(message, history, max_tokens, temperature, top_p)
-# Custom CSS for a professional medical interface
 css = """
-.medical-chatbot { min-height: 700px; }
-.warning-box { background-color: #fff3cd; border: 1px solid #ffeeba; border-radius: 8px; padding: 15px; margin: 15px 0; color: #856404; }
-.footer-text { text-align: center; color: #666; font-size: 0.9em; margin-top: 20px; }
 """
 with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🩺 SinaReason Medical Reasoning Assistant")
-    gr.Markdown("**Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)")
     with gr.Row():
         gr.HTML("""
         <div class="warning-box">
             <h4>⚠️ Important Medical Disclaimer</h4>
             <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
             <ul>
-                <li>🚫 <strong>NOT for patient diagnosis or treatment.</strong> All information must be verified by qualified professionals.</li>
-                <li>👨‍⚕️ <strong>Intended for professional use only.</strong></li>
-                <li>📚 <strong>For educational purposes</strong> to understand clinical reasoning patterns.</li>
             </ul>
         </div>
         """)
     with gr.Row():
         with gr.Column(scale=4):
             chatbot = gr.Chatbot(
@@ -182,54 +272,121 @@ with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.So
                 show_copy_button=True,
                 bubble_full_width=False,
                 elem_classes=["medical-chatbot"],
-                avatar_images=(None, "https://img.icons8.com/fluency/96/doctor-male.png")
             )
             msg = gr.Textbox(
-                placeholder="Describe a clinical scenario for medical reasoning analysis...",
                 lines=3,
                 show_label=False,
                 container=False
             )
         with gr.Column(scale=1, min_width=250):
             gr.Markdown("### ⚙️ Model Parameters")
-            max_tokens = gr.Slider(minimum=256, maximum=4096, value=2048, step=128, label="Max Tokens")
-            temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature")
-            top_p = gr.Slider(minimum=0.8, maximum=1.0, value=0.95, step=0.01, label="Top-p")
-            gr.Markdown("---")
-            submit_btn = gr.Button("🔍 Analyze Case", variant="primary")
-            retry_btn = gr.Button("🔄 Retry")
-            clear_btn = gr.Button("🗑️ Clear")
     gr.Examples(
         examples=[
             "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
             "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
             "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
         ],
         inputs=[msg],
-        label="📋 Clinical Case Examples"
     )
     gr.HTML("""
     <div class="footer-text">
-        <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509</p>
     </div>
     """)
-    # Event Handlers
-    def clear_chat():
-        return [], ""
-    def retry_last(history):
-        if not history:
-            return [], ""
-        last_user_msg = history[-1][0]
-        return history[:-1], last_user_msg
-    submit_btn.click(respond, [msg, chatbot, max_tokens, temperature, top_p], [msg, chatbot])
-    msg.submit(respond, [msg, chatbot, max_tokens, temperature, top_p], [msg, chatbot])
-    clear_btn.click(clear_chat, None, [chatbot, msg])
-    retry_btn.click(retry_last, [chatbot], [chatbot, msg])
 if __name__ == "__main__":
-    demo.launch(debug=True, show_error=True)

 import gradio as gr
+import gradio as gr
 import torch
+from transformers import AutoTokenizer, Mistral3ForConditionalGeneration, TextIteratorStreamer
 from threading import Thread
 import re
+import time
+import os
 from typing import Iterator, List, Tuple
 import spaces
+import threading
+# Model configuration
 MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
+#MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509-bnb-4bit"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Medical system prompt as recommended by the model card
 MEDICAL_SYSTEM_PROMPT = """
 You are SinaReason, a medical reasoning assistant for educational and clinical support.
 Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
 class SinaReasonMedicalChat:
     def __init__(self):
         self.tokenizer = None
         self.model = None
         self.load_model()
     def load_model(self):
+        """Load the SinaReason medical model and tokenizer"""
         try:
+            print(f"Loading medical model: {MODEL_NAME}")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                "mistralai/Magistral-Small-2509",
+                tokenizer_type="mistral"
+            )
+            # Add padding token if not present
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = Mistral3ForConditionalGeneration.from_pretrained(
                 MODEL_NAME,
+                dtype="auto"
             )
+            print("SinaReason medical model loaded successfully!")
         except Exception as e:
             print(f"Error loading model: {e}")
             raise e
     def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
+        """Extract thinking process from <think>...</think> tags and clinical response"""
+        # Look for the specific [THINK]...[/THINK] pattern used by SinaReason
+        think_pattern = r'[THINK](.*?)[/THINK]'
+        thinking = ""
+        response = text
+        match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE)
         if match:
             thinking = match.group(1).strip()
+            response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip()
+        return thinking, response
     @spaces.GPU(duration=120)
     def medical_chat_stream(self, message: str, history: List[List[str]], max_tokens: int = 1024,
+                              temperature: float = 0.7, top_p: float = 0.95) -> Iterator[Tuple[str, List[List[str]]]]:
+        """Stream medical reasoning responses with thinking display without threading."""
+        self.model.to(DEVICE).eval()
         if not message.strip():
             return
+        index_begin_think = MEDICAL_SYSTEM_PROMPT.find("[THINK]")
+        index_end_think = MEDICAL_SYSTEM_PROMPT.find("[/THINK]")
         # Apply the chat template with the medical system prompt
         messages = [
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": MEDICAL_SYSTEM_PROMPT[:index_begin_think]},
+                    {
+                        "type": "thinking",
+                        "thinking": MEDICAL_SYSTEM_PROMPT[
+                            index_begin_think + len("[THINK]") : index_end_think
+                        ],
+                        "closed": True,
+                    },
+                    {
+                        "type": "text",
+                        "text": MEDICAL_SYSTEM_PROMPT[index_end_think + len("[/THINK]") :],
+                    },
+                ],
+            }
         ]
         # Add conversation history
         for user_msg, assistant_msg in history:
             messages.append({"role": "user", "content": user_msg})
+            messages.append({"role": "assistant", "content": assistant_msg})
+        # Add current message
         messages.append({"role": "user", "content": message})
+        # Apply chat template
+        prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False
+        )
+        # Tokenize input and move to the same device as the model
+        inputs = self.tokenizer(
+            text=prompt,
+            return_tensors="pt"
+        ).to(DEVICE)
+        # Setup streamer
         streamer = TextIteratorStreamer(
             self.tokenizer,
             timeout=30.0,
             skip_special_tokens=True
         )
+        # Generation parameters optimized for medical reasoning
         generation_kwargs = {
             **inputs,
             "max_new_tokens": max_tokens,
             "repetition_penalty": 1.1
         }
+        # Start generation directly.
+        # This will return immediately and the streamer will be populated in the background.
+        #self.model.generate(**generation_kwargs)
+        thread = threading.Thread(target=self.model.generate, kwargs=generation_kwargs)
         thread.start()
+        # Stream the response
         partial_response = ""
+        current_thinking = ""
+        current_response = ""
         for new_token in streamer:
             partial_response += new_token
+            print(partial_response)
+            # Extract thinking and response
+            #thinking, response = self.extract_thinking_and_response(partial_response)
+            thinking, response =None, partial_response
+            # Show thinking phase while it's being generated
+            if thinking and thinking != current_thinking:
+                current_thinking = thinking
+                display_text = f"🧠 **Medical Reasoning in Progress...**\n\n<details>\n<summary>🔍 Click to see thinking process</summary>\n\n*{current_thinking}*\n\n</details>"
+                new_history = history + [[message, display_text]]
+                yield "", new_history
+                time.sleep(0.1)  # Smooth streaming
+            # Show clinical response as it's generated
+            if response and response != current_response:
+                current_response = response
+                final_display = f"""🧠 **Medical Reasoning Process**
+                    <details>
+                    <summary>🔍 Click to view detailed thinking process</summary>
+                    *{current_thinking}*
+                    </details>
+                    ---
+                    🩺 **Clinical Summary**
+                    {current_response}"""
+                new_history = history + [[message, final_display]]
+                yield "", new_history
 # Initialize the medical chat model
 medical_chat_model = SinaReasonMedicalChat()
 def respond(message, history, max_tokens, temperature, top_p):
+    """Gradio response function for medical reasoning"""
+    for response in medical_chat_model.medical_chat_stream(message, history, max_tokens, temperature, top_p):
+        yield response
+# Custom CSS for medical interface
 css = """
+.medical-chatbot {
+    min-height: 700px;
+    border: 2px solid #e3f2fd;
+    border-radius: 10px;
+}
+.thinking-section {
+    background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
+    border-left: 4px solid #2196f3;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 8px;
+    font-family: 'Monaco', monospace;
+    font-size: 0.9em;
+}
+.clinical-response {
+    background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
+    border-left: 4px solid #ff9800;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 8px;
+}
+.warning-box {
+    background: #fff3cd;
+    border: 1px solid #ffeaa7;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 15px 0;
+    color: #856404;
+}
+.footer-text {
+    text-align: center;
+    color: #666;
+    font-size: 0.9em;
+    margin-top: 20px;
+}
 """
+# Create medical Gradio interface
 with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🩺 SinaReason Medical Reasoning Assistant
+    **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)
+    This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**.
+    """)
+    # Medical disclaimer
     with gr.Row():
         gr.HTML("""
         <div class="warning-box">
             <h4>⚠️ Important Medical Disclaimer</h4>
             <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
             <ul>
+                <li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
+                <li>👨‍⚕️ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
+                <li>🔍 <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
+                <li>📚 <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
             </ul>
         </div>
         """)
     with gr.Row():
         with gr.Column(scale=4):
             chatbot = gr.Chatbot(
                 show_copy_button=True,
                 bubble_full_width=False,
                 elem_classes=["medical-chatbot"],
+                avatar_images=(None, "🩺")
             )
             msg = gr.Textbox(
+                placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
                 lines=3,
+                max_lines=8,
                 show_label=False,
                 container=False
             )
+            with gr.Row():
+                submit_btn = gr.Button("🔍 Analyze Case", variant="primary", size="sm")
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
+                retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm")
         with gr.Column(scale=1, min_width=250):
             gr.Markdown("### ⚙️ Model Parameters")
+            max_tokens = gr.Slider(
+                minimum=256,
+                maximum=2048,
+                value=1024,
+                step=64,
+                label="Max Tokens",
+                info="Maximum response length"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.05,
+                label="Temperature",
+                info="Reasoning creativity (0.7 recommended)"
+            )
+            top_p = gr.Slider(
+                minimum=0.8,
+                maximum=1.0,
+                value=0.95,
+                step=0.01,
+                label="Top-p",
+                info="Focus precision (0.95 recommended)"
+            )
+            gr.Markdown("""
+            ### 🎯 Usage Guidelines:
+            **Best for:**
+            - Clinical case analysis
+            - Differential diagnosis reasoning
+            - Medical education scenarios
+            - Professional consultation support
+            **Features:**
+            - Transparent `<think>` process
+            - Step-by-step clinical reasoning
+            - Evidence-based conclusions
+            - Professional medical language
+            """)
+    # Event handlers
+    def clear_chat():
+        return [], ""
+    def retry_last(history):
+        if history:
+            last_user_msg = history[-1][0]
+            return history[:-1], last_user_msg
+        return history, ""
+    # Button events
+    submit_btn.click(
+        respond,
+        inputs=[msg, chatbot, max_tokens, temperature, top_p],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        respond,
+        inputs=[msg, chatbot, max_tokens, temperature, top_p],
+        outputs=[msg, chatbot]
+    )
+    clear_btn.click(clear_chat, outputs=[chatbot, msg])
+    retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])
+    # Medical case examples
     gr.Examples(
         examples=[
             "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
             "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
             "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
+            "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
+            "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
+            "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
         ],
         inputs=[msg],
+        label="📋 Clinical Case Examples (Try these scenarios):"
     )
+    # Footer
     gr.HTML("""
     <div class="footer-text">
+        <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
+        <p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
+        <p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
+        <p>🚀 <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
     </div>
     """)
+# Launch configuration for HF Spaces
 if __name__ == "__main__":
+    demo.launch(
+         debug=True,
+        show_error=True
+    )