import gradio as gr import torch #from transformers import AutoTokenizer, Mistral3ForConditionalGeneration import re import os from typing import List, Tuple import spaces # Model configuration MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Medical system prompt as recommended by the model card MEDICAL_SYSTEM_PROMPT = """ You are SinaReason, a medical reasoning assistant for educational and clinical support. Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students). **Never provide medical advice directly to a patient.** First, draft your detailed thought process (inner monologue) inside ... . - Use this section to work through symptoms, differential diagnoses, and investigation plans. - Be explicit and thorough in your reasoning. After closing , provide a clear, self-contained medical summary appropriate for a clinical professional. - Summarize the most likely diagnosis and your reasoning. - Suggest next steps for investigation or management. """ class SinaReasonMedicalChat: def __init__(self): self.tokenizer = None self.model = None # The PixtralProcessor requires an image argument, even if it's None. # This is a mandatory part of the call signature. self.dummy_image = None #self.load_model() def load_model(self): """Load the SinaReason medical model and tokenizer using Unsloth""" try: from unsloth import FastLanguageModel print(f"Loading medical model with Unsloth: {MODEL_NAME}") print("cuda" if torch.cuda.is_available() else "cpu") # Use FastLanguageModel from Unsloth to load the model and tokenizer self.model, self.tokenizer = FastLanguageModel.from_pretrained( model_name=MODEL_NAME, dtype=torch.bfloat16, load_in_4bit=True, # Or False if you have enough VRAM for 16-bit device_map="cuda", ) print("SinaReason medical model loaded successfully with Unsloth!") except Exception as e: print(f"Error loading model with Unsloth: {e}") raise e def extract_thinking_and_response(self, text: str) -> Tuple[str, str]: """Extract thinking process from ... tags and clinical response""" think_pattern = r'(.*?)' thinking = "" response = text match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE) if match: thinking = match.group(1).strip() response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip() return thinking, response @spaces.GPU(duration=120) def medical_chat(self, message: str, history: List[List[str]], max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]: """Generate medical reasoning responses using the Unsloth model.""" # No need for model.to(DEVICE), Unsloth's device_map handles it. self.load_model() self.model.eval() if not message.strip(): return "", history # Apply the chat template with the medical system prompt messages = [{"role": "system", "content": MEDICAL_SYSTEM_PROMPT}] for user_msg, assistant_msg in history: raw_assistant_msg = assistant_msg.split("🩺 **Clinical Summary**")[-1].strip() messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": raw_assistant_msg}) messages.append({"role": "user", "content": message}) # Format the prompt using the chat template formatted_prompt = self.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) # Tokenize the input, correctly passing images=None inputs = self.tokenizer( text=formatted_prompt, images=self.dummy_image, return_tensors="pt" ).to(self.model.device) # Generation parameters generation_kwargs = { **inputs, "images": self.dummy_image, # This MUST be passed to model.generate "max_new_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "do_sample": True, "pad_token_id": self.tokenizer.eos_token_id, } # Generate the full response output = self.model.generate(**generation_kwargs)[0] # Decode only the newly generated tokens full_response = self.tokenizer.decode(output[inputs.input_ids.shape[1]:], skip_special_tokens=True) # Extract thinking and clinical summary thinking, response = self.extract_thinking_and_response(full_response) # Format the final display final_display = "" if thinking: final_display += f"""🧠 **Medical Reasoning Process**
🔍 Click to view detailed thinking process *{thinking}*
--- """ final_display += f"""🩺 **Clinical Summary** {response}""" new_history = history + [[message, final_display]] return "", new_history # Initialize the medical chat model medical_chat_model = SinaReasonMedicalChat() def respond(message, history, max_tokens, temperature, top_p): """Gradio response function for medical reasoning""" return medical_chat_model.medical_chat(message, history, max_tokens, temperature, top_p) # Custom CSS for medical interface css = """ .medical-chatbot { min-height: 700px; border: 2px solid #e3f2fd; border-radius: 10px; } .thinking-section { background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%); border-left: 4px solid #2196f3; padding: 15px; margin: 10px 0; border-radius: 8px; font-family: 'Monaco', monospace; font-size: 0.9em; } .clinical-response { background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%); border-left: 4px solid #ff9800; padding: 15px; margin: 10px 0; border-radius: 8px; } .warning-box { background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 15px; margin: 15px 0; color: #856404; } .footer-text { text-align: center; color: #666; font-size: 0.9em; margin-top: 20px; } """ # Create medical Gradio interface with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🩺 SinaReason Medical Reasoning Assistant **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna) This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**. """) # Medical disclaimer with gr.Row(): gr.HTML("""

⚠️ Important Medical Disclaimer

This is a research and educational tool for medical professionals, researchers, and students.

""") with gr.Row(): with gr.Column(scale=4): chatbot = gr.Chatbot( height=700, show_copy_button=True, bubble_full_width=False, elem_classes=["medical-chatbot"], avatar_images=(None, "🩺") ) msg = gr.Textbox( placeholder="Describe a clinical scenario or case for medical reasoning analysis...", lines=3, max_lines=8, show_label=False, container=False ) with gr.Row(): submit_btn = gr.Button("🔍 Analyze Case", variant="primary", size="sm") clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm") retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm") with gr.Column(scale=1, min_width=250): gr.Markdown("### ⚙️ Model Parameters") max_tokens = gr.Slider( minimum=256, maximum=2048, value=1024, step=64, label="Max Tokens", info="Maximum response length" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature", info="Reasoning creativity (0.7 recommended)" ) top_p = gr.Slider( minimum=0.8, maximum=1.0, value=0.95, step=0.01, label="Top-p", info="Focus precision (0.95 recommended)" ) gr.Markdown(""" ### 🎯 Usage Guidelines: **Best for:** - Clinical case analysis - Differential diagnosis reasoning - Medical education scenarios - Professional consultation support **Features:** - Transparent `` process - Step-by-step clinical reasoning - Evidence-based conclusions - Professional medical language """) # Event handlers def clear_chat(): return [], "" def retry_last(history): if history: last_user_msg = history[-1][0] return history[:-1], last_user_msg return history, "" # Button events submit_btn.click( respond, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[msg, chatbot] ) msg.submit( respond, inputs=[msg, chatbot, max_tokens, temperature, top_p], outputs=[msg, chatbot] ) clear_btn.click(clear_chat, outputs=[chatbot, msg]) retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg]) # Medical case examples gr.Examples( examples=[ "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?", "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?", "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?", "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?", "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?", "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?" ], inputs=[msg], label="📋 Clinical Case Examples (Try these scenarios):" ) # Footer gr.HTML(""" """) # Launch configuration for HF Spaces if __name__ == "__main__": demo.launch( show_error=True )