yasserrmd commited on
Commit
fe98ea0
Β·
verified Β·
1 Parent(s): 8ba849d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +392 -0
app.py ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
+ from threading import Thread
5
+ import re
6
+ import time
7
+ import os
8
+ from typing import Iterator, List, Tuple
9
+ import spaces
10
+
11
+ # Model configuration
12
+ MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
13
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
+ # Medical system prompt as recommended by the model card
16
+ MEDICAL_SYSTEM_PROMPT = """
17
+ You are SinaReason, a medical reasoning assistant for educational and clinical support.
18
+ Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
19
+ **Never provide medical advice directly to a patient.**
20
+
21
+ First, draft your detailed thought process (inner monologue) inside <think> ... </think>.
22
+ - Use this section to work through symptoms, differential diagnoses, and investigation plans.
23
+ - Be explicit and thorough in your reasoning.
24
+
25
+ After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional.
26
+ - Summarize the most likely diagnosis and your reasoning.
27
+ - Suggest next steps for investigation or management.
28
+ """
29
+
30
+ class SinaReasonMedicalChat:
31
+ def __init__(self):
32
+ self.tokenizer = None
33
+ self.model = None
34
+ self.load_model()
35
+
36
+ def load_model(self):
37
+ """Load the SinaReason medical model and tokenizer"""
38
+ try:
39
+ print(f"Loading medical model: {MODEL_NAME}")
40
+ self.tokenizer = AutoTokenizer.from_pretrained(
41
+ MODEL_NAME,
42
+ trust_remote_code=True
43
+ )
44
+
45
+ # Add padding token if not present
46
+ if self.tokenizer.pad_token is None:
47
+ self.tokenizer.pad_token = self.tokenizer.eos_token
48
+
49
+ self.model = AutoModelForCausalLM.from_pretrained(
50
+ MODEL_NAME,
51
+ torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
52
+ #device_map="auto" if DEVICE == "cuda" else None,
53
+ trust_remote_code=True,
54
+ low_cpu_mem_usage=True
55
+ )
56
+
57
+ if DEVICE == "cpu":
58
+ self.model = self.model.to(DEVICE)
59
+
60
+ print("SinaReason medical model loaded successfully!")
61
+
62
+ except Exception as e:
63
+ print(f"Error loading model: {e}")
64
+ raise e
65
+
66
+ def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
67
+ """Extract thinking process from <think>...</think> tags and clinical response"""
68
+ # Look for the specific <think>...</think> pattern used by SinaReason
69
+ think_pattern = r'<think>(.*?)</think>'
70
+
71
+ thinking = ""
72
+ response = text
73
+
74
+ match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE)
75
+ if match:
76
+ thinking = match.group(1).strip()
77
+ response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip()
78
+
79
+ return thinking, response
80
+
81
+ @spaces.GPU
82
+ def medical_chat_stream(self, message: str, history: List[List[str]], max_tokens: int = 1024,
83
+ temperature: float = 0.7, top_p: float = 0.95) -> Iterator[Tuple[str, List[List[str]]]]:
84
+ """Stream medical reasoning responses with thinking display"""
85
+ if not message.strip():
86
+ return
87
+
88
+ self.model.to(DEVICE)
89
+
90
+ # Apply the chat template with the medical system prompt
91
+ messages = [
92
+ {"role": "system", "content": MEDICAL_SYSTEM_PROMPT},
93
+ ]
94
+
95
+ # Add conversation history
96
+ for user_msg, assistant_msg in history:
97
+ messages.append({"role": "user", "content": user_msg})
98
+ messages.append({"role": "assistant", "content": assistant_msg})
99
+
100
+ # Add current message
101
+ messages.append({"role": "user", "content": message})
102
+
103
+ # Apply chat template
104
+ prompt = self.tokenizer.apply_chat_template(
105
+ messages,
106
+ tokenize=False,
107
+ add_generation_prompt=True,
108
+ )
109
+
110
+ # Tokenize input
111
+ inputs = self.tokenizer(
112
+ text=prompt,
113
+ images=None, # Required for this multimodal architecture
114
+ return_tensors="pt"
115
+ ).to(DEVICE)
116
+
117
+ # Setup streamer
118
+ streamer = TextIteratorStreamer(
119
+ self.tokenizer,
120
+ timeout=30.0,
121
+ skip_prompt=True,
122
+ skip_special_tokens=True
123
+ )
124
+
125
+ # Generation parameters optimized for medical reasoning
126
+ generation_kwargs = {
127
+ **inputs,
128
+ "images": None, # Also required here for text-only inference
129
+ "max_new_tokens": max_tokens,
130
+ "temperature": temperature,
131
+ "top_p": top_p,
132
+ "do_sample": True,
133
+ "pad_token_id": self.tokenizer.eos_token_id,
134
+ "streamer": streamer,
135
+ "repetition_penalty": 1.1
136
+ }
137
+
138
+ # Start generation in a separate thread
139
+ thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
140
+ thread.start()
141
+
142
+ # Stream the response
143
+ partial_response = ""
144
+ current_thinking = ""
145
+ current_response = ""
146
+ thinking_phase = True
147
+
148
+ for new_token in streamer:
149
+ partial_response += new_token
150
+
151
+ # Extract thinking and response
152
+ thinking, response = self.extract_thinking_and_response(partial_response)
153
+
154
+ # Show thinking phase while it's being generated
155
+ if thinking and thinking != current_thinking:
156
+ current_thinking = thinking
157
+ display_text = f"🧠 **Medical Reasoning in Progress...**\n\n<details>\n<summary>πŸ” Click to see thinking process</summary>\n\n*{current_thinking}*\n\n</details>"
158
+ new_history = history + [[message, display_text]]
159
+ yield "", new_history
160
+ time.sleep(0.1) # Smooth streaming
161
+
162
+ # Show clinical response as it's generated
163
+ if response and response != current_response:
164
+ current_response = response
165
+
166
+ final_display = f"🩺 **Clinical Analysis**\n\n{current_response}"
167
+
168
+ if current_thinking:
169
+ final_display = f"""🧠 **Medical Reasoning Process**
170
+
171
+ <details>
172
+ <summary>πŸ” Click to view detailed thinking process</summary>
173
+
174
+ *{current_thinking}*
175
+
176
+ </details>
177
+
178
+ ---
179
+
180
+ 🩺 **Clinical Summary**
181
+
182
+ {current_response}"""
183
+
184
+ new_history = history + [[message, final_display]]
185
+ yield "", new_history
186
+
187
+ thread.join()
188
+
189
+ # Initialize the medical chat model
190
+ medical_chat_model = SinaReasonMedicalChat()
191
+
192
+ def respond(message, history, max_tokens, temperature, top_p):
193
+ """Gradio response function for medical reasoning"""
194
+ for response in medical_chat_model.medical_chat_stream(message, history, max_tokens, temperature, top_p):
195
+ yield response
196
+
197
+ # Custom CSS for medical interface
198
+ css = """
199
+ .medical-chatbot {
200
+ min-height: 700px;
201
+ border: 2px solid #e3f2fd;
202
+ border-radius: 10px;
203
+ }
204
+ .thinking-section {
205
+ background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
206
+ border-left: 4px solid #2196f3;
207
+ padding: 15px;
208
+ margin: 10px 0;
209
+ border-radius: 8px;
210
+ font-family: 'Monaco', monospace;
211
+ font-size: 0.9em;
212
+ }
213
+ .clinical-response {
214
+ background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
215
+ border-left: 4px solid #ff9800;
216
+ padding: 15px;
217
+ margin: 10px 0;
218
+ border-radius: 8px;
219
+ }
220
+ .warning-box {
221
+ background: #fff3cd;
222
+ border: 1px solid #ffeaa7;
223
+ border-radius: 8px;
224
+ padding: 15px;
225
+ margin: 15px 0;
226
+ color: #856404;
227
+ }
228
+ .footer-text {
229
+ text-align: center;
230
+ color: #666;
231
+ font-size: 0.9em;
232
+ margin-top: 20px;
233
+ }
234
+ """
235
+
236
+ # Create medical Gradio interface
237
+ with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
238
+ gr.Markdown("""
239
+ # 🩺 SinaReason Medical Reasoning Assistant
240
+
241
+ **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)
242
+
243
+ This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**.
244
+ """)
245
+
246
+ # Medical disclaimer
247
+ with gr.Row():
248
+ gr.HTML("""
249
+ <div class="warning-box">
250
+ <h4>⚠️ Important Medical Disclaimer</h4>
251
+ <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
252
+ <ul>
253
+ <li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
254
+ <li>πŸ‘¨β€βš•οΈ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
255
+ <li>πŸ” <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
256
+ <li>πŸ“š <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
257
+ </ul>
258
+ </div>
259
+ """)
260
+
261
+ with gr.Row():
262
+ with gr.Column(scale=4):
263
+ chatbot = gr.Chatbot(
264
+ height=700,
265
+ show_copy_button=True,
266
+ bubble_full_width=False,
267
+ elem_classes=["medical-chatbot"],
268
+ avatar_images=(None, "🩺")
269
+ )
270
+
271
+ msg = gr.Textbox(
272
+ placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
273
+ lines=3,
274
+ max_lines=8,
275
+ show_label=False,
276
+ container=False
277
+ )
278
+
279
+ with gr.Row():
280
+ submit_btn = gr.Button("πŸ” Analyze Case", variant="primary", size="sm")
281
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary", size="sm")
282
+ retry_btn = gr.Button("πŸ”„ Retry", variant="secondary", size="sm")
283
+
284
+ with gr.Column(scale=1, min_width=250):
285
+ gr.Markdown("### βš™οΈ Model Parameters")
286
+
287
+ max_tokens = gr.Slider(
288
+ minimum=256,
289
+ maximum=2048,
290
+ value=1024,
291
+ step=64,
292
+ label="Max Tokens",
293
+ info="Maximum response length"
294
+ )
295
+
296
+ temperature = gr.Slider(
297
+ minimum=0.1,
298
+ maximum=1.0,
299
+ value=0.7,
300
+ step=0.05,
301
+ label="Temperature",
302
+ info="Reasoning creativity (0.7 recommended)"
303
+ )
304
+
305
+ top_p = gr.Slider(
306
+ minimum=0.8,
307
+ maximum=1.0,
308
+ value=0.95,
309
+ step=0.01,
310
+ label="Top-p",
311
+ info="Focus precision (0.95 recommended)"
312
+ )
313
+
314
+ gr.Markdown("""
315
+ ### 🎯 Usage Guidelines:
316
+
317
+ **Best for:**
318
+ - Clinical case analysis
319
+ - Differential diagnosis reasoning
320
+ - Medical education scenarios
321
+ - Professional consultation support
322
+
323
+ **Features:**
324
+ - Transparent `<think>` process
325
+ - Step-by-step clinical reasoning
326
+ - Evidence-based conclusions
327
+ - Professional medical language
328
+ """)
329
+
330
+ # Event handlers
331
+ def clear_chat():
332
+ return [], ""
333
+
334
+ def retry_last(history):
335
+ if history:
336
+ last_user_msg = history[-1][0]
337
+ return history[:-1], last_user_msg
338
+ return history, ""
339
+
340
+ # Button events
341
+ submit_btn.click(
342
+ respond,
343
+ inputs=[msg, chatbot, max_tokens, temperature, top_p],
344
+ outputs=[msg, chatbot]
345
+ )
346
+
347
+ msg.submit(
348
+ respond,
349
+ inputs=[msg, chatbot, max_tokens, temperature, top_p],
350
+ outputs=[msg, chatbot]
351
+ )
352
+
353
+ clear_btn.click(clear_chat, outputs=[chatbot, msg])
354
+ retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])
355
+
356
+ # Medical case examples
357
+ gr.Examples(
358
+ examples=[
359
+ "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
360
+ "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
361
+ "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
362
+ "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
363
+ "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
364
+ "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
365
+ ],
366
+ inputs=[msg],
367
+ label="πŸ“‹ Clinical Case Examples (Try these scenarios):"
368
+ )
369
+
370
+ # Footer
371
+ gr.HTML("""
372
+ <div class="footer-text">
373
+ <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
374
+ <p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
375
+ <p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
376
+ <p>πŸš€ <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
377
+ </div>
378
+ """)
379
+
380
+ # Launch configuration for HF Spaces
381
+ if __name__ == "__main__":
382
+ demo.queue(
383
+ concurrency_count=1, # Medical reasoning is compute-intensive
384
+ max_size=5,
385
+ api_open=False
386
+ ).launch(
387
+ server_name="0.0.0.0",
388
+ server_port=7860,
389
+ share=False,
390
+ show_error=True,
391
+ quiet=False
392
+ )