Alejo760 commited on
Commit
abaa6d9
Β·
verified Β·
1 Parent(s): 8523e93

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +359 -0
app.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ from PIL import Image
5
+ import io
6
+ import base64
7
+ import requests
8
+ from typing import Optional
9
+ import os
10
+ import spaces
11
+
12
+ # Model configuration
13
+ MODEL_ID = "google/medgemma-1.5-4b-it"
14
+
15
+ # Language configurations
16
+ LANGUAGES = {
17
+ "en": "English",
18
+ "es": "Spanish (EspaΓ±ol)"
19
+ }
20
+
21
+ # Language instruction templates
22
+ LANGUAGE_INSTRUCTIONS = {
23
+ "en": "Please respond in English.",
24
+ "es": "Por favor responde en espaΓ±ol."
25
+ }
26
+
27
+ class MedGemmaDemo:
28
+ def __init__(self):
29
+ self.pipe = None
30
+ self.loaded = False
31
+
32
+ def load_model(self):
33
+ """Load the MedGemma model using pipeline"""
34
+ if not self.loaded:
35
+ print("Loading MedGemma model...")
36
+ try:
37
+ # Get HF token from environment variable
38
+ hf_token = os.environ.get("HF_TOKEN")
39
+
40
+ if not hf_token:
41
+ raise ValueError(
42
+ "HF_TOKEN not found in environment variables. "
43
+ "Please set your Hugging Face token as an environment variable or repository secret."
44
+ )
45
+
46
+ # Load the model using pipeline
47
+ # ZeroGPU will handle device allocation automatically
48
+ self.pipe = pipeline(
49
+ "image-text-to-text",
50
+ model=MODEL_ID,
51
+ torch_dtype=torch.bfloat16,
52
+ device_map="auto",
53
+ token=hf_token
54
+ )
55
+
56
+ self.loaded = True
57
+ print("Model loaded successfully!")
58
+ except Exception as e:
59
+ print(f"Error loading model: {e}")
60
+ raise e
61
+
62
+ @spaces.GPU(duration=60) # ZeroGPU decorator - allocates GPU for 60 seconds
63
+ def generate_response(
64
+ self,
65
+ image: Image.Image,
66
+ prompt: str,
67
+ language: str = "en",
68
+ max_new_tokens: int = 512,
69
+ temperature: float = 0.7,
70
+ top_p: float = 0.9,
71
+ ) -> str:
72
+ """Generate a response from MedGemma given an image and prompt"""
73
+ if not self.loaded:
74
+ self.load_model()
75
+
76
+ try:
77
+ # Add language instruction to the prompt
78
+ language_instruction = LANGUAGE_INSTRUCTIONS.get(language, LANGUAGE_INSTRUCTIONS["en"])
79
+ full_prompt = f"{prompt}\n\n{language_instruction}"
80
+
81
+ # Format messages for the pipeline
82
+ messages = [
83
+ {
84
+ "role": "user",
85
+ "content": [
86
+ {"type": "image", "image": image},
87
+ {"type": "text", "text": full_prompt}
88
+ ]
89
+ }
90
+ ]
91
+
92
+ # Generate response using pipeline
93
+ outputs = self.pipe(
94
+ text=messages,
95
+ max_new_tokens=max_new_tokens,
96
+ temperature=temperature,
97
+ top_p=top_p,
98
+ do_sample=True,
99
+ )
100
+
101
+ # Extract the generated text from the output
102
+ # The output is a list of message dictionaries
103
+ generated_messages = outputs[0]["generated_text"]
104
+
105
+ # Find the assistant's response
106
+ assistant_response = ""
107
+ if isinstance(generated_messages, list):
108
+ # Look for the assistant's message
109
+ for message in generated_messages:
110
+ if message.get("role") == "assistant":
111
+ assistant_response = message.get("content", "")
112
+ break
113
+ elif isinstance(generated_messages, str):
114
+ # If it's already a string, use it directly
115
+ assistant_response = generated_messages
116
+
117
+ # Clean up the response
118
+ if not assistant_response:
119
+ assistant_response = "No response generated."
120
+
121
+ return assistant_response.strip()
122
+
123
+ except Exception as e:
124
+ return f"Error generating response: {str(e)}"
125
+
126
+ # Initialize the demo
127
+ demo_instance = MedGemmaDemo()
128
+
129
+ def process_image_with_prompt(image, prompt, language, max_tokens, temperature, top_p):
130
+ """Gradio interface function"""
131
+ if image is None:
132
+ return "Please upload an image."
133
+
134
+ if not prompt or prompt.strip() == "":
135
+ return "Please enter a prompt."
136
+
137
+ try:
138
+ # Convert to PIL Image if needed
139
+ if not isinstance(image, Image.Image):
140
+ image = Image.fromarray(image)
141
+
142
+ # Ensure RGB
143
+ if image.mode != "RGB":
144
+ image = image.convert("RGB")
145
+
146
+ # Generate response
147
+ response = demo_instance.generate_response(
148
+ image=image,
149
+ prompt=prompt,
150
+ language=language,
151
+ max_new_tokens=max_tokens,
152
+ temperature=temperature,
153
+ top_p=top_p,
154
+ )
155
+
156
+ return response
157
+
158
+ except Exception as e:
159
+ return f"Error processing image: {str(e)}"
160
+
161
+ # Enhanced example prompts for medical imaging with clinical structure
162
+ example_prompts = [
163
+ "Describe the key findings in this medical image. Provide one main diagnosis, two differential diagnoses, and suggestions for follow-up management.",
164
+ "Analyze this image and provide: 1) Key anatomical structures visible, 2) Any pathological findings, 3) Clinical significance, 4) Recommended next steps.",
165
+ "Generate a comprehensive radiology report including: findings, impression, main diagnosis, differential diagnoses, and management recommendations.",
166
+ "What are the primary abnormalities in this image? Discuss the most likely diagnosis, alternative diagnoses to consider, and appropriate follow-up imaging or tests.",
167
+ "Provide a structured assessment: 1) Image quality and technique, 2) Normal anatomical structures, 3) Abnormal findings, 4) Differential diagnoses, 5) Clinical recommendations.",
168
+ "Describe the pathological findings in detail. What is your primary diagnosis? List at least two differential diagnoses and suggest appropriate management strategies.",
169
+ "Evaluate this image for any signs of acute pathology. Provide diagnostic impressions, severity assessment, and urgent management considerations if applicable.",
170
+ "Analyze the imaging features present and correlate with potential clinical presentations. Include main diagnosis, differentials, and follow-up recommendations.",
171
+ ]
172
+
173
+ # Create Gradio interface
174
+ with gr.Blocks(title="MedGemma Medical Image Analysis") as demo:
175
+ gr.Markdown(
176
+ """
177
+ # πŸ₯ MedGemma Medical Image Analysis Demo
178
+
179
+ This demo showcases **MedGemma 1.5 4B**, Google's open medical AI model for analyzing medical images.
180
+ **Powered by ZeroGPU** for efficient GPU allocation on Hugging Face Spaces.
181
+
182
+ **⚠️ Setup Required:**
183
+ 1. Accept the model license at: https://huggingface.co/google/medgemma-1.5-4b-it
184
+ 2. Set your HF token in the Space settings (Settings β†’ Repository secrets β†’ Add secret: `HF_TOKEN`)
185
+ 3. Enable ZeroGPU in Space settings (Hardware β†’ ZeroGPU)
186
+
187
+ **Note:** This is a demonstration tool. All outputs should be independently verified and clinically
188
+ correlated before any medical use. MedGemma is intended as a developer tool and requires validation
189
+ for specific use cases.
190
+
191
+ ### Capabilities:
192
+ - 2D Medical Image Analysis (X-rays, CT slices, MRI slices, etc.)
193
+ - Multilingual responses in 10+ languages
194
+ - Structured clinical reporting
195
+ - Differential diagnosis generation
196
+
197
+ ### How to use:
198
+ 1. Upload a medical image (X-ray, CT, MRI, etc.)
199
+ 2. Select your preferred output language
200
+ 3. Enter your question or select an example prompt
201
+ 4. Adjust generation parameters if needed
202
+ 5. Click "Analyze Image" to get MedGemma's response
203
+ """
204
+ )
205
+
206
+ with gr.Row():
207
+ with gr.Column(scale=1):
208
+ image_input = gr.Image(
209
+ label="Medical Image",
210
+ type="pil",
211
+ image_mode="RGB"
212
+ )
213
+
214
+ language_select = gr.Dropdown(
215
+ choices=[(v, k) for k, v in LANGUAGES.items()],
216
+ value="en",
217
+ label="Output Language",
218
+ info="Select the language for the AI response"
219
+ )
220
+
221
+ prompt_input = gr.Textbox(
222
+ label="Prompt/Question",
223
+ placeholder="e.g., Describe the key findings in this chest X-ray and provide a diagnosis...",
224
+ lines=4
225
+ )
226
+
227
+ with gr.Accordion("πŸ“‹ Example Clinical Prompts", open=True):
228
+ gr.Markdown("*Click any prompt below to use it*")
229
+ for i, prompt in enumerate(example_prompts):
230
+ btn = gr.Button(
231
+ f"Example {i+1}: {prompt[:80]}...",
232
+ variant="secondary"
233
+ )
234
+ btn.click(
235
+ fn=lambda p=prompt: p,
236
+ outputs=prompt_input
237
+ )
238
+
239
+ with gr.Accordion("βš™οΈ Generation Parameters", open=False):
240
+ max_tokens = gr.Slider(
241
+ minimum=128,
242
+ maximum=2048,
243
+ value=512,
244
+ step=64,
245
+ label="Max New Tokens",
246
+ info="Maximum length of the generated response"
247
+ )
248
+ temperature = gr.Slider(
249
+ minimum=0.1,
250
+ maximum=1.0,
251
+ value=0.7,
252
+ step=0.1,
253
+ label="Temperature",
254
+ info="Higher values = more creative, lower = more focused"
255
+ )
256
+ top_p = gr.Slider(
257
+ minimum=0.5,
258
+ maximum=1.0,
259
+ value=0.9,
260
+ step=0.05,
261
+ label="Top P",
262
+ info="Nucleus sampling threshold"
263
+ )
264
+
265
+ analyze_btn = gr.Button("πŸ” Analyze Image", variant="primary")
266
+
267
+ with gr.Column(scale=1):
268
+ output_text = gr.Textbox(
269
+ label="MedGemma Response",
270
+ lines=25
271
+ )
272
+
273
+ gr.Markdown(
274
+ """
275
+ ### πŸ’‘ Tips for Better Results:
276
+
277
+ - **Be specific**: Include the imaging modality and body part
278
+ - **Structure your request**: Ask for findings, diagnosis, and management
279
+ - **Use medical terminology**: The model is trained on clinical language
280
+ - **Request differentials**: Ask for alternative diagnoses to consider
281
+ - **Multilingual**: The model can respond in your preferred language
282
+
283
+ ### 🌍 Supported Languages:
284
+ English, French, Spanish, Chinese, Haitian Creole, Portuguese, Arabic, Hindi, German, Japanese
285
+ """
286
+ )
287
+
288
+ # Wire up the interface
289
+ analyze_btn.click(
290
+ fn=process_image_with_prompt,
291
+ inputs=[image_input, prompt_input, language_select, max_tokens, temperature, top_p],
292
+ outputs=output_text
293
+ )
294
+
295
+ # Add example showing structured clinical format
296
+ gr.Markdown(
297
+ """
298
+ ---
299
+
300
+ ### πŸ“Š Example Clinical Report Structure
301
+
302
+ For comprehensive analysis, use prompts that request structured output:
303
+
304
+ ```
305
+ FINDINGS:
306
+ - List observed anatomical structures
307
+ - Describe any pathological changes
308
+ - Note image quality and technique
309
+
310
+ IMPRESSION:
311
+ - Primary diagnosis with confidence level
312
+ - Supporting evidence from the image
313
+
314
+ DIFFERENTIAL DIAGNOSES:
315
+ 1. First alternative diagnosis
316
+ 2. Second alternative diagnosis
317
+
318
+ RECOMMENDATIONS:
319
+ - Follow-up imaging if needed
320
+ - Additional tests or consultations
321
+ - Clinical correlation suggestions
322
+ ```
323
+
324
+ ---
325
+
326
+ ### About MedGemma
327
+
328
+ MedGemma is part of Google's Health AI Developer Foundations (HAI-DEF) program. It's built on Gemma 3
329
+ and specifically trained on medical data including chest X-rays, dermatology images, ophthalmology images,
330
+ histopathology slides, and medical text.
331
+
332
+ **Key Features:**
333
+ - Multimodal (text + image) understanding
334
+ - Medical terminology and context awareness
335
+ - Support for various medical imaging modalities
336
+ - Multilingual clinical reporting
337
+ - Open-source and available on Hugging Face
338
+
339
+ **Resources:**
340
+ - [Model Card](https://huggingface.co/google/medgemma-1.5-4b-it)
341
+ - [Documentation](https://developers.google.com/health-ai-developer-foundations/medgemma)
342
+ - [GitHub Repository](https://github.com/Google-Health/medgemma)
343
+
344
+ **⚠️ Disclaimer:** MedGemma is a research and development tool. It has not been evaluated or optimized
345
+ for clinical use. All outputs require independent verification by qualified healthcare professionals.
346
+ This tool should never be used as the sole basis for clinical decisions.
347
+ """
348
+ )
349
+
350
+ # Launch the demo
351
+ if __name__ == "__main__":
352
+ demo.queue()
353
+ demo.launch(
354
+ server_name="0.0.0.0",
355
+ server_port=7860,
356
+ share=False
357
+ )
358
+
359
+