Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import re | |
| import os | |
| from typing import List, Tuple | |
| import spaces | |
| # Model configuration | |
| MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Medical system prompt as recommended by the model card | |
| MEDICAL_SYSTEM_PROMPT = """ | |
| You are SinaReason, a medical reasoning assistant for educational and clinical support. | |
| Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students). | |
| **Never provide medical advice directly to a patient.** | |
| First, draft your detailed thought process (inner monologue) inside <think> ... </think>. | |
| - Use this section to work through symptoms, differential diagnoses, and investigation plans. | |
| - Be explicit and thorough in your reasoning. | |
| After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional. | |
| - Summarize the most likely diagnosis and your reasoning. | |
| - Suggest next steps for investigation or management. | |
| """ | |
| class SinaReasonMedicalChat: | |
| def __init__(self): | |
| self.tokenizer = None | |
| self.model = None | |
| # The PixtralProcessor requires an image argument, even if it's None. | |
| # This is a mandatory part of the call signature. | |
| self.dummy_image = None | |
| self.load_model() | |
| def load_model(self): | |
| """Load the SinaReason medical model and tokenizer using Unsloth""" | |
| try: | |
| print(f"Loading medical model with Unsloth: {MODEL_NAME}") | |
| print("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.bfloat16, # Use bfloat16 for modern GPUs | |
| device_map="auto", # Automatically map to the available GPU | |
| ) | |
| # Load the standard tokenizer | |
| self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| print("SinaReason medical model loaded successfully with Unsloth!") | |
| except Exception as e: | |
| print(f"Error loading model with Unsloth: {e}") | |
| raise e | |
| def extract_thinking_and_response(self, text: str) -> Tuple[str, str]: | |
| """Extract thinking process from <think>...</think> tags and clinical response""" | |
| think_pattern = r'<think>(.*?)</think>' | |
| thinking = "" | |
| response = text | |
| match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE) | |
| if match: | |
| thinking = match.group(1).strip() | |
| response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip() | |
| return thinking, response | |
| def medical_chat(self, message: str, history: List[List[str]], max_tokens: int = 1024, | |
| temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]: | |
| """Generate medical reasoning responses using the Unsloth model.""" | |
| if not message.strip(): | |
| return "", history | |
| self.model.to("cuda") | |
| self.model.eval() | |
| # Apply the chat template with the medical system prompt | |
| messages = [{"role": "system", "content": MEDICAL_SYSTEM_PROMPT}] | |
| for user_msg, assistant_msg in history: | |
| raw_assistant_msg = assistant_msg.split("π©Ί **Clinical Summary**")[-1].strip() | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": raw_assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| formatted_prompt = self.tokenizer.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True, | |
| ) | |
| # THE HACK IS GONE: Standard tokenization without any 'images' argument. | |
| inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device) | |
| # THE HACK IS GONE: Standard generation call. | |
| generation_kwargs = { | |
| **inputs, | |
| "max_new_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": top_p, | |
| "do_sample": True, | |
| "pad_token_id": self.tokenizer.eos_token_id, | |
| } | |
| output = self.model.generate(**generation_kwargs)[0] | |
| full_response = self.tokenizer.decode(output[inputs.input_ids.shape[1]:], skip_special_tokens=True) | |
| # Extract thinking and clinical summary | |
| thinking, response = self.extract_thinking_and_response(full_response) | |
| # Format the final display | |
| final_display = "" | |
| if thinking: | |
| final_display += f"""π§ **Medical Reasoning Process** | |
| <details> | |
| <summary>π Click to view detailed thinking process</summary> | |
| *{thinking}* | |
| </details> | |
| --- | |
| """ | |
| final_display += f"""π©Ί **Clinical Summary** | |
| {response}""" | |
| new_history = history + [[message, final_display]] | |
| return "", new_history | |
| # Initialize the medical chat model | |
| medical_chat_model = SinaReasonMedicalChat() | |
| def respond(message, history, max_tokens, temperature, top_p): | |
| """Gradio response function for medical reasoning""" | |
| return medical_chat_model.medical_chat(message, history, max_tokens, temperature, top_p) | |
| # Custom CSS for medical interface | |
| css = """ | |
| .medical-chatbot { | |
| min-height: 700px; | |
| border: 2px solid #e3f2fd; | |
| border-radius: 10px; | |
| } | |
| .thinking-section { | |
| background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%); | |
| border-left: 4px solid #2196f3; | |
| padding: 15px; | |
| margin: 10px 0; | |
| border-radius: 8px; | |
| font-family: 'Monaco', monospace; | |
| font-size: 0.9em; | |
| } | |
| .clinical-response { | |
| background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%); | |
| border-left: 4px solid #ff9800; | |
| padding: 15px; | |
| margin: 10px 0; | |
| border-radius: 8px; | |
| } | |
| .warning-box { | |
| background: #fff3cd; | |
| border: 1px solid #ffeaa7; | |
| border-radius: 8px; | |
| padding: 15px; | |
| margin: 15px 0; | |
| color: #856404; | |
| } | |
| .footer-text { | |
| text-align: center; | |
| color: #666; | |
| font-size: 0.9em; | |
| margin-top: 20px; | |
| } | |
| """ | |
| # Create medical Gradio interface | |
| with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π©Ί SinaReason Medical Reasoning Assistant | |
| **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna) | |
| This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**. | |
| """) | |
| # Medical disclaimer | |
| with gr.Row(): | |
| gr.HTML(""" | |
| <div class="warning-box"> | |
| <h4>β οΈ Important Medical Disclaimer</h4> | |
| <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p> | |
| <ul> | |
| <li>π« <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li> | |
| <li>π¨ββοΈ <strong>Professional use only</strong> - Intended for clinicians and medical students</li> | |
| <li>π <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li> | |
| <li>π <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li> | |
| </ul> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbot = gr.Chatbot( | |
| height=700, | |
| show_copy_button=True, | |
| bubble_full_width=False, | |
| elem_classes=["medical-chatbot"], | |
| avatar_images=(None, "π©Ί") | |
| ) | |
| msg = gr.Textbox( | |
| placeholder="Describe a clinical scenario or case for medical reasoning analysis...", | |
| lines=3, | |
| max_lines=8, | |
| show_label=False, | |
| container=False | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Analyze Case", variant="primary", size="sm") | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="sm") | |
| retry_btn = gr.Button("π Retry", variant="secondary", size="sm") | |
| with gr.Column(scale=1, min_width=250): | |
| gr.Markdown("### βοΈ Model Parameters") | |
| max_tokens = gr.Slider( | |
| minimum=256, | |
| maximum=2048, | |
| value=1024, | |
| step=64, | |
| label="Max Tokens", | |
| info="Maximum response length" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.05, | |
| label="Temperature", | |
| info="Reasoning creativity (0.7 recommended)" | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.8, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.01, | |
| label="Top-p", | |
| info="Focus precision (0.95 recommended)" | |
| ) | |
| gr.Markdown(""" | |
| ### π― Usage Guidelines: | |
| **Best for:** | |
| - Clinical case analysis | |
| - Differential diagnosis reasoning | |
| - Medical education scenarios | |
| - Professional consultation support | |
| **Features:** | |
| - Transparent `<think>` process | |
| - Step-by-step clinical reasoning | |
| - Evidence-based conclusions | |
| - Professional medical language | |
| """) | |
| # Event handlers | |
| def clear_chat(): | |
| return [], "" | |
| def retry_last(history): | |
| if history: | |
| last_user_msg = history[-1][0] | |
| return history[:-1], last_user_msg | |
| return history, "" | |
| # Button events | |
| submit_btn.click( | |
| respond, | |
| inputs=[msg, chatbot, max_tokens, temperature, top_p], | |
| outputs=[msg, chatbot] | |
| ) | |
| msg.submit( | |
| respond, | |
| inputs=[msg, chatbot, max_tokens, temperature, top_p], | |
| outputs=[msg, chatbot] | |
| ) | |
| clear_btn.click(clear_chat, outputs=[chatbot, msg]) | |
| retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg]) | |
| # Medical case examples | |
| gr.Examples( | |
| examples=[ | |
| "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?", | |
| "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?", | |
| "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?", | |
| "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?", | |
| "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?", | |
| "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?" | |
| ], | |
| inputs=[msg], | |
| label="π Clinical Case Examples (Try these scenarios):" | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div class="footer-text"> | |
| <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p> | |
| <p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p> | |
| <p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p> | |
| <p>π <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p> | |
| </div> | |
| """) | |
| # Launch configuration for HF Spaces | |
| if __name__ == "__main__": | |
| demo.launch( | |
| show_error=True | |
| ) |