Spaces:

yasserrmd
/

SinaReason

Running on Zero

App Files Files Community

yasserrmd commited on Sep 21

Commit

fe98ea0

verified ·

1 Parent(s): 8ba849d

Create app.py

Browse files

Files changed (1) hide show

app.py +392 -0

app.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
+import re
+import time
+import os
+from typing import Iterator, List, Tuple
+import spaces
+# Model configuration
+MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Medical system prompt as recommended by the model card
+MEDICAL_SYSTEM_PROMPT = """
+You are SinaReason, a medical reasoning assistant for educational and clinical support.
+Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
+**Never provide medical advice directly to a patient.**
+First, draft your detailed thought process (inner monologue) inside <think> ... </think>.
+- Use this section to work through symptoms, differential diagnoses, and investigation plans.
+- Be explicit and thorough in your reasoning.
+After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional.
+- Summarize the most likely diagnosis and your reasoning.
+- Suggest next steps for investigation or management.
+"""
+class SinaReasonMedicalChat:
+    def __init__(self):
+        self.tokenizer = None
+        self.model = None
+        self.load_model()
+    def load_model(self):
+        """Load the SinaReason medical model and tokenizer"""
+        try:
+            print(f"Loading medical model: {MODEL_NAME}")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                MODEL_NAME,
+                trust_remote_code=True
+            )
+            # Add padding token if not present
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
+                #device_map="auto" if DEVICE == "cuda" else None,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            if DEVICE == "cpu":
+                self.model = self.model.to(DEVICE)
+            print("SinaReason medical model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise e
+    def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
+        """Extract thinking process from <think>...</think> tags and clinical response"""
+        # Look for the specific <think>...</think> pattern used by SinaReason
+        think_pattern = r'<think>(.*?)</think>'
+        thinking = ""
+        response = text
+        match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE)
+        if match:
+            thinking = match.group(1).strip()
+            response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip()
+        return thinking, response
+    @spaces.GPU
+    def medical_chat_stream(self, message: str, history: List[List[str]], max_tokens: int = 1024,
+                          temperature: float = 0.7, top_p: float = 0.95) -> Iterator[Tuple[str, List[List[str]]]]:
+        """Stream medical reasoning responses with thinking display"""
+        if not message.strip():
+            return
+        self.model.to(DEVICE)
+        # Apply the chat template with the medical system prompt
+        messages = [
+            {"role": "system", "content": MEDICAL_SYSTEM_PROMPT},
+        ]
+        # Add conversation history
+        for user_msg, assistant_msg in history:
+            messages.append({"role": "user", "content": user_msg})
+            messages.append({"role": "assistant", "content": assistant_msg})
+        # Add current message
+        messages.append({"role": "user", "content": message})
+        # Apply chat template
+        prompt = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        # Tokenize input
+        inputs = self.tokenizer(
+            text=prompt,
+            images=None,  # Required for this multimodal architecture
+            return_tensors="pt"
+        ).to(DEVICE)
+        # Setup streamer
+        streamer = TextIteratorStreamer(
+            self.tokenizer,
+            timeout=30.0,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        # Generation parameters optimized for medical reasoning
+        generation_kwargs = {
+            **inputs,
+            "images": None,  # Also required here for text-only inference
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "do_sample": True,
+            "pad_token_id": self.tokenizer.eos_token_id,
+            "streamer": streamer,
+            "repetition_penalty": 1.1
+        }
+        # Start generation in a separate thread
+        thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream the response
+        partial_response = ""
+        current_thinking = ""
+        current_response = ""
+        thinking_phase = True
+        for new_token in streamer:
+            partial_response += new_token
+            # Extract thinking and response
+            thinking, response = self.extract_thinking_and_response(partial_response)
+            # Show thinking phase while it's being generated
+            if thinking and thinking != current_thinking:
+                current_thinking = thinking
+                display_text = f"🧠 **Medical Reasoning in Progress...**\n\n<details>\n<summary>🔍 Click to see thinking process</summary>\n\n*{current_thinking}*\n\n</details>"
+                new_history = history + [[message, display_text]]
+                yield "", new_history
+                time.sleep(0.1)  # Smooth streaming
+            # Show clinical response as it's generated
+            if response and response != current_response:
+                current_response = response
+                final_display = f"🩺 **Clinical Analysis**\n\n{current_response}"
+                if current_thinking:
+                    final_display = f"""🧠 **Medical Reasoning Process**
+<details>
+<summary>🔍 Click to view detailed thinking process</summary>
+*{current_thinking}*
+</details>
+---
+🩺 **Clinical Summary**
+{current_response}"""
+                new_history = history + [[message, final_display]]
+                yield "", new_history
+        thread.join()
+# Initialize the medical chat model
+medical_chat_model = SinaReasonMedicalChat()
+def respond(message, history, max_tokens, temperature, top_p):
+    """Gradio response function for medical reasoning"""
+    for response in medical_chat_model.medical_chat_stream(message, history, max_tokens, temperature, top_p):
+        yield response
+# Custom CSS for medical interface
+css = """
+.medical-chatbot {
+    min-height: 700px;
+    border: 2px solid #e3f2fd;
+    border-radius: 10px;
+}
+.thinking-section {
+    background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
+    border-left: 4px solid #2196f3;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 8px;
+    font-family: 'Monaco', monospace;
+    font-size: 0.9em;
+}
+.clinical-response {
+    background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
+    border-left: 4px solid #ff9800;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 8px;
+}
+.warning-box {
+    background: #fff3cd;
+    border: 1px solid #ffeaa7;
+    border-radius: 8px;
+    padding: 15px;
+    margin: 15px 0;
+    color: #856404;
+}
+.footer-text {
+    text-align: center;
+    color: #666;
+    font-size: 0.9em;
+    margin-top: 20px;
+}
+"""
+# Create medical Gradio interface
+with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🩺 SinaReason Medical Reasoning Assistant
+    **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)
+    This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**.
+    """)
+    # Medical disclaimer
+    with gr.Row():
+        gr.HTML("""
+        <div class="warning-box">
+            <h4>⚠️ Important Medical Disclaimer</h4>
+            <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
+            <ul>
+                <li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
+                <li>👨‍⚕️ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
+                <li>🔍 <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
+                <li>📚 <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
+            </ul>
+        </div>
+        """)
+    with gr.Row():
+        with gr.Column(scale=4):
+            chatbot = gr.Chatbot(
+                height=700,
+                show_copy_button=True,
+                bubble_full_width=False,
+                elem_classes=["medical-chatbot"],
+                avatar_images=(None, "🩺")
+            )
+            msg = gr.Textbox(
+                placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
+                lines=3,
+                max_lines=8,
+                show_label=False,
+                container=False
+            )
+            with gr.Row():
+                submit_btn = gr.Button("🔍 Analyze Case", variant="primary", size="sm")
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
+                retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm")
+        with gr.Column(scale=1, min_width=250):
+            gr.Markdown("### ⚙️ Model Parameters")
+            max_tokens = gr.Slider(
+                minimum=256,
+                maximum=2048,
+                value=1024,
+                step=64,
+                label="Max Tokens",
+                info="Maximum response length"
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.05,
+                label="Temperature",
+                info="Reasoning creativity (0.7 recommended)"
+            )
+            top_p = gr.Slider(
+                minimum=0.8,
+                maximum=1.0,
+                value=0.95,
+                step=0.01,
+                label="Top-p",
+                info="Focus precision (0.95 recommended)"
+            )
+            gr.Markdown("""
+            ### 🎯 Usage Guidelines:
+            **Best for:**
+            - Clinical case analysis
+            - Differential diagnosis reasoning
+            - Medical education scenarios
+            - Professional consultation support
+            **Features:**
+            - Transparent `<think>` process
+            - Step-by-step clinical reasoning
+            - Evidence-based conclusions
+            - Professional medical language
+            """)
+    # Event handlers
+    def clear_chat():
+        return [], ""
+    def retry_last(history):
+        if history:
+            last_user_msg = history[-1][0]
+            return history[:-1], last_user_msg
+        return history, ""
+    # Button events
+    submit_btn.click(
+        respond,
+        inputs=[msg, chatbot, max_tokens, temperature, top_p],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        respond,
+        inputs=[msg, chatbot, max_tokens, temperature, top_p],
+        outputs=[msg, chatbot]
+    )
+    clear_btn.click(clear_chat, outputs=[chatbot, msg])
+    retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])
+    # Medical case examples
+    gr.Examples(
+        examples=[
+            "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
+            "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
+            "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
+            "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
+            "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
+            "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
+        ],
+        inputs=[msg],
+        label="📋 Clinical Case Examples (Try these scenarios):"
+    )
+    # Footer
+    gr.HTML("""
+    <div class="footer-text">
+        <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
+        <p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
+        <p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
+        <p>🚀 <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
+    </div>
+    """)
+# Launch configuration for HF Spaces
+if __name__ == "__main__":
+    demo.queue(
+        concurrency_count=1,  # Medical reasoning is compute-intensive
+        max_size=5,
+        api_open=False
+    ).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        quiet=False
+    )