Spaces:

Alejo760
/

MedGemma1.5test

Sleeping

App Files Files Community

Alejo760 commited on Feb 25

Commit

abaa6d9

verified ·

1 Parent(s): 8523e93

Create app.py

Browse files

Files changed (1) hide show

app.py +359 -0

app.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import gradio as gr
+import torch
+from transformers import pipeline
+from PIL import Image
+import io
+import base64
+import requests
+from typing import Optional
+import os
+import spaces
+# Model configuration
+MODEL_ID = "google/medgemma-1.5-4b-it"
+# Language configurations
+LANGUAGES = {
+    "en": "English",
+    "es": "Spanish (Español)"
+}
+# Language instruction templates
+LANGUAGE_INSTRUCTIONS = {
+    "en": "Please respond in English.",
+    "es": "Por favor responde en español."
+}
+class MedGemmaDemo:
+    def __init__(self):
+        self.pipe = None
+        self.loaded = False
+    def load_model(self):
+        """Load the MedGemma model using pipeline"""
+        if not self.loaded:
+            print("Loading MedGemma model...")
+            try:
+                # Get HF token from environment variable
+                hf_token = os.environ.get("HF_TOKEN")
+                if not hf_token:
+                    raise ValueError(
+                        "HF_TOKEN not found in environment variables. "
+                        "Please set your Hugging Face token as an environment variable or repository secret."
+                    )
+                # Load the model using pipeline
+                # ZeroGPU will handle device allocation automatically
+                self.pipe = pipeline(
+                    "image-text-to-text",
+                    model=MODEL_ID,
+                    torch_dtype=torch.bfloat16,
+                    device_map="auto",
+                    token=hf_token
+                )
+                self.loaded = True
+                print("Model loaded successfully!")
+            except Exception as e:
+                print(f"Error loading model: {e}")
+                raise e
+    @spaces.GPU(duration=60)  # ZeroGPU decorator - allocates GPU for 60 seconds
+    def generate_response(
+        self,
+        image: Image.Image,
+        prompt: str,
+        language: str = "en",
+        max_new_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+    ) -> str:
+        """Generate a response from MedGemma given an image and prompt"""
+        if not self.loaded:
+            self.load_model()
+        try:
+            # Add language instruction to the prompt
+            language_instruction = LANGUAGE_INSTRUCTIONS.get(language, LANGUAGE_INSTRUCTIONS["en"])
+            full_prompt = f"{prompt}\n\n{language_instruction}"
+            # Format messages for the pipeline
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image", "image": image},
+                        {"type": "text", "text": full_prompt}
+                    ]
+                }
+            ]
+            # Generate response using pipeline
+            outputs = self.pipe(
+                text=messages,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+            )
+            # Extract the generated text from the output
+            # The output is a list of message dictionaries
+            generated_messages = outputs[0]["generated_text"]
+            # Find the assistant's response
+            assistant_response = ""
+            if isinstance(generated_messages, list):
+                # Look for the assistant's message
+                for message in generated_messages:
+                    if message.get("role") == "assistant":
+                        assistant_response = message.get("content", "")
+                        break
+            elif isinstance(generated_messages, str):
+                # If it's already a string, use it directly
+                assistant_response = generated_messages
+            # Clean up the response
+            if not assistant_response:
+                assistant_response = "No response generated."
+            return assistant_response.strip()
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+# Initialize the demo
+demo_instance = MedGemmaDemo()
+def process_image_with_prompt(image, prompt, language, max_tokens, temperature, top_p):
+    """Gradio interface function"""
+    if image is None:
+        return "Please upload an image."
+    if not prompt or prompt.strip() == "":
+        return "Please enter a prompt."
+    try:
+        # Convert to PIL Image if needed
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # Ensure RGB
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Generate response
+        response = demo_instance.generate_response(
+            image=image,
+            prompt=prompt,
+            language=language,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        return response
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+# Enhanced example prompts for medical imaging with clinical structure
+example_prompts = [
+    "Describe the key findings in this medical image. Provide one main diagnosis, two differential diagnoses, and suggestions for follow-up management.",
+    "Analyze this image and provide: 1) Key anatomical structures visible, 2) Any pathological findings, 3) Clinical significance, 4) Recommended next steps.",
+    "Generate a comprehensive radiology report including: findings, impression, main diagnosis, differential diagnoses, and management recommendations.",
+    "What are the primary abnormalities in this image? Discuss the most likely diagnosis, alternative diagnoses to consider, and appropriate follow-up imaging or tests.",
+    "Provide a structured assessment: 1) Image quality and technique, 2) Normal anatomical structures, 3) Abnormal findings, 4) Differential diagnoses, 5) Clinical recommendations.",
+    "Describe the pathological findings in detail. What is your primary diagnosis? List at least two differential diagnoses and suggest appropriate management strategies.",
+    "Evaluate this image for any signs of acute pathology. Provide diagnostic impressions, severity assessment, and urgent management considerations if applicable.",
+    "Analyze the imaging features present and correlate with potential clinical presentations. Include main diagnosis, differentials, and follow-up recommendations.",
+]
+# Create Gradio interface
+with gr.Blocks(title="MedGemma Medical Image Analysis") as demo:
+    gr.Markdown(
+        """
+        # 🏥 MedGemma Medical Image Analysis Demo
+        This demo showcases **MedGemma 1.5 4B**, Google's open medical AI model for analyzing medical images.
+        **Powered by ZeroGPU** for efficient GPU allocation on Hugging Face Spaces.
+        **⚠️ Setup Required:**
+        1. Accept the model license at: https://huggingface.co/google/medgemma-1.5-4b-it
+        2. Set your HF token in the Space settings (Settings → Repository secrets → Add secret: `HF_TOKEN`)
+        3. Enable ZeroGPU in Space settings (Hardware → ZeroGPU)
+        **Note:** This is a demonstration tool. All outputs should be independently verified and clinically
+        correlated before any medical use. MedGemma is intended as a developer tool and requires validation
+        for specific use cases.
+        ### Capabilities:
+        - 2D Medical Image Analysis (X-rays, CT slices, MRI slices, etc.)
+        - Multilingual responses in 10+ languages
+        - Structured clinical reporting
+        - Differential diagnosis generation
+        ### How to use:
+        1. Upload a medical image (X-ray, CT, MRI, etc.)
+        2. Select your preferred output language
+        3. Enter your question or select an example prompt
+        4. Adjust generation parameters if needed
+        5. Click "Analyze Image" to get MedGemma's response
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="Medical Image",
+                type="pil",
+                image_mode="RGB"
+            )
+            language_select = gr.Dropdown(
+                choices=[(v, k) for k, v in LANGUAGES.items()],
+                value="en",
+                label="Output Language",
+                info="Select the language for the AI response"
+            )
+            prompt_input = gr.Textbox(
+                label="Prompt/Question",
+                placeholder="e.g., Describe the key findings in this chest X-ray and provide a diagnosis...",
+                lines=4
+            )
+            with gr.Accordion("📋 Example Clinical Prompts", open=True):
+                gr.Markdown("*Click any prompt below to use it*")
+                for i, prompt in enumerate(example_prompts):
+                    btn = gr.Button(
+                        f"Example {i+1}: {prompt[:80]}...",
+                        variant="secondary"
+                    )
+                    btn.click(
+                        fn=lambda p=prompt: p,
+                        outputs=prompt_input
+                    )
+            with gr.Accordion("⚙️ Generation Parameters", open=False):
+                max_tokens = gr.Slider(
+                    minimum=128,
+                    maximum=2048,
+                    value=512,
+                    step=64,
+                    label="Max New Tokens",
+                    info="Maximum length of the generated response"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Temperature",
+                    info="Higher values = more creative, lower = more focused"
+                )
+                top_p = gr.Slider(
+                    minimum=0.5,
+                    maximum=1.0,
+                    value=0.9,
+                    step=0.05,
+                    label="Top P",
+                    info="Nucleus sampling threshold"
+                )
+            analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="MedGemma Response",
+                lines=25
+            )
+            gr.Markdown(
+                """
+                ### 💡 Tips for Better Results:
+                - **Be specific**: Include the imaging modality and body part
+                - **Structure your request**: Ask for findings, diagnosis, and management
+                - **Use medical terminology**: The model is trained on clinical language
+                - **Request differentials**: Ask for alternative diagnoses to consider
+                - **Multilingual**: The model can respond in your preferred language
+                ### 🌍 Supported Languages:
+                English, French, Spanish, Chinese, Haitian Creole, Portuguese, Arabic, Hindi, German, Japanese
+                """
+            )
+    # Wire up the interface
+    analyze_btn.click(
+        fn=process_image_with_prompt,
+        inputs=[image_input, prompt_input, language_select, max_tokens, temperature, top_p],
+        outputs=output_text
+    )
+    # Add example showing structured clinical format
+    gr.Markdown(
+        """
+        ---
+        ### 📊 Example Clinical Report Structure
+        For comprehensive analysis, use prompts that request structured output:
+        ```
+        FINDINGS:
+        - List observed anatomical structures
+        - Describe any pathological changes
+        - Note image quality and technique
+        IMPRESSION:
+        - Primary diagnosis with confidence level
+        - Supporting evidence from the image
+        DIFFERENTIAL DIAGNOSES:
+        1. First alternative diagnosis
+        2. Second alternative diagnosis
+        RECOMMENDATIONS:
+        - Follow-up imaging if needed
+        - Additional tests or consultations
+        - Clinical correlation suggestions
+        ```
+        ---
+        ### About MedGemma
+        MedGemma is part of Google's Health AI Developer Foundations (HAI-DEF) program. It's built on Gemma 3
+        and specifically trained on medical data including chest X-rays, dermatology images, ophthalmology images,
+        histopathology slides, and medical text.
+        **Key Features:**
+        - Multimodal (text + image) understanding
+        - Medical terminology and context awareness
+        - Support for various medical imaging modalities
+        - Multilingual clinical reporting
+        - Open-source and available on Hugging Face
+        **Resources:**
+        - [Model Card](https://huggingface.co/google/medgemma-1.5-4b-it)
+        - [Documentation](https://developers.google.com/health-ai-developer-foundations/medgemma)
+        - [GitHub Repository](https://github.com/Google-Health/medgemma)
+        **⚠️ Disclaimer:** MedGemma is a research and development tool. It has not been evaluated or optimized
+        for clinical use. All outputs require independent verification by qualified healthcare professionals.
+        This tool should never be used as the sole basis for clinical decisions.
+        """
+    )
+# Launch the demo
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )