SinaReason / app.py
yasserrmd's picture
Update app.py
dec6090 verified
raw
history blame
12.6 kB
import gradio as gr
import torch
#from transformers import AutoTokenizer, Mistral3ForConditionalGeneration
import re
import os
from typing import List, Tuple
import spaces
# Model configuration
MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Medical system prompt as recommended by the model card
MEDICAL_SYSTEM_PROMPT = """
You are SinaReason, a medical reasoning assistant for educational and clinical support.
Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
**Never provide medical advice directly to a patient.**
First, draft your detailed thought process (inner monologue) inside <think> ... </think>.
- Use this section to work through symptoms, differential diagnoses, and investigation plans.
- Be explicit and thorough in your reasoning.
After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional.
- Summarize the most likely diagnosis and your reasoning.
- Suggest next steps for investigation or management.
"""
class SinaReasonMedicalChat:
def __init__(self):
self.tokenizer = None
self.model = None
# The PixtralProcessor requires an image argument, even if it's None.
# This is a mandatory part of the call signature.
self.dummy_image = None
#self.load_model()
def load_model(self):
"""Load the SinaReason medical model and tokenizer using Unsloth"""
try:
from unsloth import FastLanguageModel
print(f"Loading medical model with Unsloth: {MODEL_NAME}")
print("cuda" if torch.cuda.is_available() else "cpu")
# Use FastLanguageModel from Unsloth to load the model and tokenizer
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
model_name=MODEL_NAME,
dtype=torch.bfloat16,
load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
device_map="cuda",
)
print("SinaReason medical model loaded successfully with Unsloth!")
except Exception as e:
print(f"Error loading model with Unsloth: {e}")
raise e
def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
"""Extract thinking process from <think>...</think> tags and clinical response"""
think_pattern = r'<think>(.*?)</think>'
thinking = ""
response = text
match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE)
if match:
thinking = match.group(1).strip()
response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip()
return thinking, response
@spaces.GPU(duration=120)
def medical_chat(self, message: str, history: List[List[str]], max_tokens: int = 1024,
temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
"""Generate medical reasoning responses using the Unsloth model."""
# No need for model.to(DEVICE), Unsloth's device_map handles it.
self.load_model()
self.model.eval()
if not message.strip():
return "", history
# Apply the chat template with the medical system prompt
messages = [{"role": "system", "content": MEDICAL_SYSTEM_PROMPT}]
for user_msg, assistant_msg in history:
raw_assistant_msg = assistant_msg.split("🩺 **Clinical Summary**")[-1].strip()
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": raw_assistant_msg})
messages.append({"role": "user", "content": message})
# Format the prompt using the chat template
formatted_prompt = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
# Tokenize the input, correctly passing images=None
inputs = self.tokenizer(
text=formatted_prompt,
images=self.dummy_image,
return_tensors="pt"
).to(self.model.device)
# Generation parameters
generation_kwargs = {
**inputs,
"images": self.dummy_image, # This MUST be passed to model.generate
"max_new_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"do_sample": True,
"pad_token_id": self.tokenizer.eos_token_id,
}
# Generate the full response
output = self.model.generate(**generation_kwargs)[0]
# Decode only the newly generated tokens
full_response = self.tokenizer.decode(output[inputs.input_ids.shape[1]:], skip_special_tokens=True)
# Extract thinking and clinical summary
thinking, response = self.extract_thinking_and_response(full_response)
# Format the final display
final_display = ""
if thinking:
final_display += f"""🧠 **Medical Reasoning Process**
<details>
<summary>πŸ” Click to view detailed thinking process</summary>
*{thinking}*
</details>
---
"""
final_display += f"""🩺 **Clinical Summary**
{response}"""
new_history = history + [[message, final_display]]
return "", new_history
# Initialize the medical chat model
medical_chat_model = SinaReasonMedicalChat()
def respond(message, history, max_tokens, temperature, top_p):
"""Gradio response function for medical reasoning"""
return medical_chat_model.medical_chat(message, history, max_tokens, temperature, top_p)
# Custom CSS for medical interface
css = """
.medical-chatbot {
min-height: 700px;
border: 2px solid #e3f2fd;
border-radius: 10px;
}
.thinking-section {
background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
border-left: 4px solid #2196f3;
padding: 15px;
margin: 10px 0;
border-radius: 8px;
font-family: 'Monaco', monospace;
font-size: 0.9em;
}
.clinical-response {
background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
border-left: 4px solid #ff9800;
padding: 15px;
margin: 10px 0;
border-radius: 8px;
}
.warning-box {
background: #fff3cd;
border: 1px solid #ffeaa7;
border-radius: 8px;
padding: 15px;
margin: 15px 0;
color: #856404;
}
.footer-text {
text-align: center;
color: #666;
font-size: 0.9em;
margin-top: 20px;
}
"""
# Create medical Gradio interface
with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🩺 SinaReason Medical Reasoning Assistant
**Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)
This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**.
""")
# Medical disclaimer
with gr.Row():
gr.HTML("""
<div class="warning-box">
<h4>⚠️ Important Medical Disclaimer</h4>
<p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
<ul>
<li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
<li>πŸ‘¨β€βš•οΈ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
<li>πŸ” <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
<li>πŸ“š <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
</ul>
</div>
""")
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
height=700,
show_copy_button=True,
bubble_full_width=False,
elem_classes=["medical-chatbot"],
avatar_images=(None, "🩺")
)
msg = gr.Textbox(
placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
lines=3,
max_lines=8,
show_label=False,
container=False
)
with gr.Row():
submit_btn = gr.Button("πŸ” Analyze Case", variant="primary", size="sm")
clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="sm")
retry_btn = gr.Button("πŸ”„ Retry", variant="secondary", size="sm")
with gr.Column(scale=1, min_width=250):
gr.Markdown("### βš™οΈ Model Parameters")
max_tokens = gr.Slider(
minimum=256,
maximum=2048,
value=1024,
step=64,
label="Max Tokens",
info="Maximum response length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.05,
label="Temperature",
info="Reasoning creativity (0.7 recommended)"
)
top_p = gr.Slider(
minimum=0.8,
maximum=1.0,
value=0.95,
step=0.01,
label="Top-p",
info="Focus precision (0.95 recommended)"
)
gr.Markdown("""
### 🎯 Usage Guidelines:
**Best for:**
- Clinical case analysis
- Differential diagnosis reasoning
- Medical education scenarios
- Professional consultation support
**Features:**
- Transparent `<think>` process
- Step-by-step clinical reasoning
- Evidence-based conclusions
- Professional medical language
""")
# Event handlers
def clear_chat():
return [], ""
def retry_last(history):
if history:
last_user_msg = history[-1][0]
return history[:-1], last_user_msg
return history, ""
# Button events
submit_btn.click(
respond,
inputs=[msg, chatbot, max_tokens, temperature, top_p],
outputs=[msg, chatbot]
)
msg.submit(
respond,
inputs=[msg, chatbot, max_tokens, temperature, top_p],
outputs=[msg, chatbot]
)
clear_btn.click(clear_chat, outputs=[chatbot, msg])
retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])
# Medical case examples
gr.Examples(
examples=[
"Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
"Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
"Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
"Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
"Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
"Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
],
inputs=[msg],
label="πŸ“‹ Clinical Case Examples (Try these scenarios):"
)
# Footer
gr.HTML("""
<div class="footer-text">
<p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
<p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
<p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
<p>πŸš€ <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
</div>
""")
# Launch configuration for HF Spaces
if __name__ == "__main__":
demo.launch(
show_error=True
)