Spaces:

yasserrmd
/

SinaReason

Running on Zero

App Files Files Community

SinaReason / app.py

yasserrmd

Update app.py

dec6090 verified 3 months ago

raw

history blame

12.6 kB

	import gradio as gr
	import torch
	#from transformers import AutoTokenizer, Mistral3ForConditionalGeneration
	import re
	import os
	from typing import List, Tuple
	import spaces





	# Model configuration
	MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# Medical system prompt as recommended by the model card
	MEDICAL_SYSTEM_PROMPT = """
	You are SinaReason, a medical reasoning assistant for educational and clinical support.
	Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
	Never provide medical advice directly to a patient.
	First, draft your detailed thought process (inner monologue) inside <think> ... </think>.
	- Use this section to work through symptoms, differential diagnoses, and investigation plans.
	- Be explicit and thorough in your reasoning.
	After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional.
	- Summarize the most likely diagnosis and your reasoning.
	- Suggest next steps for investigation or management.
	"""



	class SinaReasonMedicalChat:
	def __init__(self):
	self.tokenizer = None
	self.model = None
	# The PixtralProcessor requires an image argument, even if it's None.
	# This is a mandatory part of the call signature.
	self.dummy_image = None
	#self.load_model()

	def load_model(self):
	"""Load the SinaReason medical model and tokenizer using Unsloth"""
	try:
	from unsloth import FastLanguageModel
	print(f"Loading medical model with Unsloth: {MODEL_NAME}")
	print("cuda" if torch.cuda.is_available() else "cpu")

	# Use FastLanguageModel from Unsloth to load the model and tokenizer
	self.model, self.tokenizer = FastLanguageModel.from_pretrained(
	model_name=MODEL_NAME,
	dtype=torch.bfloat16,
	load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
	device_map="cuda",
	)

	print("SinaReason medical model loaded successfully with Unsloth!")

	except Exception as e:
	print(f"Error loading model with Unsloth: {e}")
	raise e

	def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
	"""Extract thinking process from <think>...</think> tags and clinical response"""
	think_pattern = r'<think>(.*?)</think>'
	thinking = ""
	response = text

	match = re.search(think_pattern, text, re.DOTALL \| re.IGNORECASE)
	if match:
	thinking = match.group(1).strip()
	response = re.sub(think_pattern, "", text, flags=re.DOTALL \| re.IGNORECASE).strip()

	return thinking, response

	@spaces.GPU(duration=120)
	def medical_chat(self, message: str, history: List[List[str]], max_tokens: int = 1024,
	temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
	"""Generate medical reasoning responses using the Unsloth model."""
	# No need for model.to(DEVICE), Unsloth's device_map handles it.
	self.load_model()
	self.model.eval()
	if not message.strip():
	return "", history

	# Apply the chat template with the medical system prompt
	messages = [{"role": "system", "content": MEDICAL_SYSTEM_PROMPT}]
	for user_msg, assistant_msg in history:
	raw_assistant_msg = assistant_msg.split("🩺 Clinical Summary")[-1].strip()
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": raw_assistant_msg})
	messages.append({"role": "user", "content": message})

	# Format the prompt using the chat template
	formatted_prompt = self.tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	)

	# Tokenize the input, correctly passing images=None
	inputs = self.tokenizer(
	text=formatted_prompt,
	images=self.dummy_image,
	return_tensors="pt"
	).to(self.model.device)

	# Generation parameters
	generation_kwargs = {
	**inputs,
	"images": self.dummy_image, # This MUST be passed to model.generate
	"max_new_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"do_sample": True,
	"pad_token_id": self.tokenizer.eos_token_id,
	}

	# Generate the full response
	output = self.model.generate(**generation_kwargs)[0]

	# Decode only the newly generated tokens
	full_response = self.tokenizer.decode(output[inputs.input_ids.shape[1]:], skip_special_tokens=True)

	# Extract thinking and clinical summary
	thinking, response = self.extract_thinking_and_response(full_response)

	# Format the final display
	final_display = ""
	if thinking:
	final_display += f"""🧠 Medical Reasoning Process
	<details>
	<summary>🔍 Click to view detailed thinking process</summary>
	{thinking}
	</details>
	---
	"""

	final_display += f"""🩺 Clinical Summary
	{response}"""

	new_history = history + [[message, final_display]]
	return "", new_history


	# Initialize the medical chat model
	medical_chat_model = SinaReasonMedicalChat()

	def respond(message, history, max_tokens, temperature, top_p):
	"""Gradio response function for medical reasoning"""
	return medical_chat_model.medical_chat(message, history, max_tokens, temperature, top_p)

	# Custom CSS for medical interface
	css = """
	.medical-chatbot {
	min-height: 700px;
	border: 2px solid #e3f2fd;
	border-radius: 10px;
	}
	.thinking-section {
	background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
	border-left: 4px solid #2196f3;
	padding: 15px;
	margin: 10px 0;
	border-radius: 8px;
	font-family: 'Monaco', monospace;
	font-size: 0.9em;
	}
	.clinical-response {
	background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
	border-left: 4px solid #ff9800;
	padding: 15px;
	margin: 10px 0;
	border-radius: 8px;
	}
	.warning-box {
	background: #fff3cd;
	border: 1px solid #ffeaa7;
	border-radius: 8px;
	padding: 15px;
	margin: 15px 0;
	color: #856404;
	}
	.footer-text {
	text-align: center;
	color: #666;
	font-size: 0.9em;
	margin-top: 20px;
	}
	"""

	# Create medical Gradio interface
	with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🩺 SinaReason Medical Reasoning Assistant

	Advanced Clinical Reasoning Model - Inspired by Ibn Sina (Avicenna)

	This model provides transparent chain-of-thought medical reasoning for educational and clinical support purposes.
	""")

	# Medical disclaimer
	with gr.Row():
	gr.HTML("""
	<div class="warning-box">
	<h4>⚠️ Important Medical Disclaimer</h4>
	<p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
	<ul>
	<li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
	<li>👨‍⚕️ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
	<li>🔍 <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
	<li>📚 <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
	</ul>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(
	height=700,
	show_copy_button=True,
	bubble_full_width=False,
	elem_classes=["medical-chatbot"],
	avatar_images=(None, "🩺")
	)

	msg = gr.Textbox(
	placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
	lines=3,
	max_lines=8,
	show_label=False,
	container=False
	)

	with gr.Row():
	submit_btn = gr.Button("🔍 Analyze Case", variant="primary", size="sm")
	clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
	retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm")

	with gr.Column(scale=1, min_width=250):
	gr.Markdown("### ⚙️ Model Parameters")

	max_tokens = gr.Slider(
	minimum=256,
	maximum=2048,
	value=1024,
	step=64,
	label="Max Tokens",
	info="Maximum response length"
	)

	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.05,
	label="Temperature",
	info="Reasoning creativity (0.7 recommended)"
	)

	top_p = gr.Slider(
	minimum=0.8,
	maximum=1.0,
	value=0.95,
	step=0.01,
	label="Top-p",
	info="Focus precision (0.95 recommended)"
	)

	gr.Markdown("""
	### 🎯 Usage Guidelines:

	Best for:
	- Clinical case analysis
	- Differential diagnosis reasoning
	- Medical education scenarios
	- Professional consultation support

	Features:
	- Transparent `<think>` process
	- Step-by-step clinical reasoning
	- Evidence-based conclusions
	- Professional medical language
	""")

	# Event handlers
	def clear_chat():
	return [], ""

	def retry_last(history):
	if history:
	last_user_msg = history[-1][0]
	return history[:-1], last_user_msg
	return history, ""

	# Button events
	submit_btn.click(
	respond,
	inputs=[msg, chatbot, max_tokens, temperature, top_p],
	outputs=[msg, chatbot]
	)

	msg.submit(
	respond,
	inputs=[msg, chatbot, max_tokens, temperature, top_p],
	outputs=[msg, chatbot]
	)

	clear_btn.click(clear_chat, outputs=[chatbot, msg])
	retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])

	# Medical case examples
	gr.Examples(
	examples=[
	"Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
	"Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
	"Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
	"Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
	"Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
	"Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
	],
	inputs=[msg],
	label="📋 Clinical Case Examples (Try these scenarios):"
	)

	# Footer
	gr.HTML("""
	<div class="footer-text">
	<p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
	<p><strong>Base:</strong> Magistral-Small-2509 \| <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
	<p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
	<p>🚀 <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
	</div>
	""")


	# Launch configuration for HF Spaces
	if __name__ == "__main__":
	demo.launch(
	show_error=True
	)