Spaces:

samwell
/

medrax2

Paused

App Files Files Community

Emily Xie commited on Jul 31, 2025

Commit

aa6bc6b

1 Parent(s): 35945d9

for test on gpu

Browse files

Files changed (4) hide show

main.py +8 -3
medrax/tools/__init__.py +1 -1
medrax/tools/medgemma.py +225 -0
medrax/tools/medgemma_client.py +145 -0

main.py CHANGED Viewed

@@ -65,6 +65,9 @@ def initialize_agent(
     prompts = load_prompts_from_file(prompt_file)
     prompt = prompts["MEDICAL_ASSISTANT"]
     all_tools = {
         "TorchXRayVisionClassifierTool": lambda: TorchXRayVisionClassifierTool(device=device),
         "ArcPlusClassifierTool": lambda: ArcPlusClassifierTool(cache_dir=model_dir, device=device),
@@ -87,6 +90,7 @@ def initialize_agent(
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
     }
     try:
@@ -149,10 +153,11 @@ if __name__ == "__main__":
         # "LlavaMedTool",  # For multimodal medical image understanding
         # "XRayPhraseGroundingTool",  # For locating described features in X-rays
         # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
-        "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
-        "WebBrowserTool",  # For web browsing and search capabilities
-        "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
     ]
     # Configure the Retrieval Augmented Generation (RAG) system

     prompts = load_prompts_from_file(prompt_file)
     prompt = prompts["MEDICAL_ASSISTANT"]
+    # Define the URL of the MedGemma FastAPI service.
+    MEDGEMMA_API_URL = os.getenv("MEDGEMMA_API_URL", "http://127.0.0.1:8002")
     all_tools = {
         "TorchXRayVisionClassifierTool": lambda: TorchXRayVisionClassifierTool(device=device),
         "ArcPlusClassifierTool": lambda: ArcPlusClassifierTool(cache_dir=model_dir, device=device),
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
+        "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(api_url=MEDGEMMA_API_URL)
     }
     try:
         # "LlavaMedTool",  # For multimodal medical image understanding
         # "XRayPhraseGroundingTool",  # For locating described features in X-rays
         # "ChestXRayGeneratorTool",  # For generating synthetic chest X-rays
+        # "MedSAM2Tool",  # For advanced medical image segmentation using MedSAM2
+        # "WebBrowserTool",  # For web browsing and search capabilities
+        # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
+        "MedGemmaVQATool"  # For visual question answering on medical images
     ]
     # Configure the Retrieval Augmented Generation (RAG) system

medrax/tools/__init__.py CHANGED Viewed

@@ -13,4 +13,4 @@ from .rag import *
 from .web_browser import *
 from .python_tool import *
 from .medsam2 import *

 from .web_browser import *
 from .python_tool import *
 from .medsam2 import *
+from .medgemma_client import *

medrax/tools/medgemma.py ADDED Viewed

	@@ -0,0 +1,225 @@

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from pydantic import BaseModel, Field
+from typing import List, Optional, Any, Dict, Tuple
+from pathlib import Path
+import torch
+from PIL import Image
+from transformers import pipeline, BitsAndBytesConfig
+import asyncio
+import uvicorn
+import os
+import uuid
+import traceback
+import sys
+import transformers
+print("--- ENVIRONMENT CHECK ---")
+print(f"Python Executable: {sys.executable}")
+print(f"PyTorch version: {torch.__version__}")
+print(f"Transformers version: {transformers.__version__}")
+print("-----------------------")
+# --- Configuration ---
+CACHE_DIR = "./model_cache"
+UPLOAD_DIR = "./uploaded_images"
+# Create directories if they don't exist
+os.makedirs(CACHE_DIR, exist_ok=True)
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+# --- Pydantic Models for API ---
+class VQAInput(BaseModel):
+    prompt: str = Field(..., description="Question or instruction about the medical images")
+    system_prompt: Optional[str] = Field(
+        "You are an expert radiologist.",
+        description="System prompt to set the context for the model",
+    )
+    max_new_tokens: int = Field(
+        300, description="Maximum number of tokens to generate in the response"
+    )
+class VQAResponse(BaseModel):
+    response: str
+    metadata: Dict[str, Any]
+class ErrorResponse(BaseModel):
+    error: str
+    metadata: Dict[str, Any]
+# --- MedGemma Model Handling ---
+class MedGemmaModel:
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        if not cls._instance:
+            cls._instance = super(MedGemmaModel, cls).__new__(cls)
+        return cls._instance
+    def __init__(self,
+                 model_name: str = "google/medgemma-4b-it",
+                 device: Optional[str] = "cuda",
+                 dtype: torch.dtype = torch.bfloat16,
+                 load_in_4bit: bool = False):
+        if hasattr(self, 'pipe') and self.pipe is not None:
+            return
+        self.device = device if device and torch.cuda.is_available() else "cpu"
+        self.dtype = dtype
+        self.pipe = None
+        model_kwargs = {"torch_dtype": self.dtype, "cache_dir": CACHE_DIR}
+        if load_in_4bit:
+            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
+        model_kwargs["device_map"] = {"": self.device}
+        try:
+            self.pipe = pipeline("image-text-to-text",
+                                 model=model_name,
+                                 model_kwargs=model_kwargs,
+                                 trust_remote_code=True,
+                                 use_cache=True)
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize MedGemma pipeline: {str(e)}")
+    def _prepare_messages(
+        self, image_paths: List[str], prompt: str, system_prompt: str
+    ) -> Tuple[List[Dict[str, Any]], List[Image.Image]]:
+        images = []
+        for path in image_paths:
+            if not Path(path).is_file():
+                raise FileNotFoundError(f"Image file not found: {path}")
+            image = Image.open(path)
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            images.append(image)
+        messages = [
+            {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}]
+                + [{"type": "image", "image": img} for img in images],
+            },
+        ]
+        return messages, images
+    async def aget_response(self, image_paths: List[str], prompt: str, system_prompt: str, max_new_tokens: int) -> str:
+        loop = asyncio.get_event_loop()
+        messages, _ = await loop.run_in_executor(None, self._prepare_messages, image_paths, prompt, system_prompt)
+        def _generate():
+            return self.pipe(
+                text=messages,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+            )
+        output = await loop.run_in_executor(None, _generate)
+        if (
+            isinstance(output, list)
+            and output
+            and isinstance(output[0].get("generated_text"), list)
+        ):
+            generated_text = output[0]["generated_text"]
+            if generated_text:
+                return generated_text[-1].get("content", "").strip()
+        return "No response generated"
+# --- FastAPI Application ---
+app = FastAPI(title="MedGemma VQA API",
+              description="API for medical visual question answering using Google's MedGemma model.")
+medgemma_model: Optional[MedGemmaModel] = None
+@app.on_event("startup")
+async def startup_event():
+    """Load the MedGemma model at application startup."""
+    global medgemma_model
+    try:
+        medgemma_model = MedGemmaModel()
+        print("MedGemma model loaded successfully.")
+    except RuntimeError as e:
+        print(f"Error loading MedGemma model: {e}")
+        # Depending on the desired behavior, you might want to exit the application
+        # if the model fails to load.
+        # exit(1)
+@app.post("/analyze-images/",
+            response_model=VQAResponse,
+            responses={500: {"model": ErrorResponse},
+                       404: {"model": ErrorResponse}},
+            summary="Analyze one or more medical images")
+async def analyze_images(
+    images: List[UploadFile] = File(..., description="List of medical image files to analyze (JPG or PNG)."),
+    prompt: str = Form(..., description="Question or instruction about the medical images."),
+    system_prompt: Optional[str] = Form("You are an expert radiologist.", description="System prompt to set the context for the model."),
+    max_new_tokens: int = Form(100, description="Maximum number of tokens to generate in the response.")
+):
+    """
+    Upload one or more medical images and a prompt to get an analysis from the MedGemma model.
+    """
+    if medgemma_model is None or medgemma_model.pipe is None:
+        raise HTTPException(status_code=503, detail="Model is not available. Please try again later.")
+    image_paths = []
+    for image in images:
+        if image.content_type not in ["image/jpeg", "image/png"]:
+            raise HTTPException(status_code=400, detail=f"Unsupported image format: {image.content_type}. Only JPG and PNG are supported.")
+        # Generate a unique filename to avoid overwrites
+        unique_filename = f"{uuid.uuid4()}_{image.filename}"
+        file_path = os.path.join(UPLOAD_DIR, unique_filename)
+        try:
+            with open(file_path, "wb") as buffer:
+                buffer.write(await image.read())
+            image_paths.append(file_path)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Failed to save uploaded image: {str(e)}")
+    try:
+        response_text = await medgemma_model.aget_response(image_paths, prompt, system_prompt, max_new_tokens)
+        metadata = {
+            "image_paths": image_paths,
+            "prompt": prompt,
+            "system_prompt": system_prompt,
+            "max_new_tokens": max_new_tokens,
+            "num_images": len(image_paths),
+            "analysis_status": "completed",
+        }
+        return VQAResponse(response=response_text, metadata=metadata)
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=404, detail=f"Image file not found: {str(e)}")
+    except Exception as e:
+        print("--- AN EXCEPTION OCCURRED IN THE ENDPOINT ---")
+        traceback.print_exc()
+        # Catch potential CUDA out-of-memory errors and other exceptions
+        error_message = "An unexpected error occurred during analysis."
+        if "CUDA out of memory" in str(e):
+            error_message = "GPU memory exhausted. Try reducing image resolution or max_new_tokens."
+        metadata = {
+            "image_paths": image_paths,
+            "prompt": prompt,
+            "analysis_status": "failed",
+            "error_details": str(e),
+        }
+        raise HTTPException(status_code=500, detail=error_message)
+    finally:
+        # Clean up saved images
+        for path in image_paths:
+            try:
+                os.remove(path)
+            except OSError:
+                # Log this error if needed, but don't let it crash the request
+                pass
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8002)

medrax/tools/medgemma_client.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import httpx
+from typing import Dict, List, Optional, Type, Any
+from langchain_core.tools import BaseTool
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from pydantic import BaseModel, Field
+import os
+# This input schema should be identical to the one in your original tool
+class MedGemmaVQAInput(BaseModel):
+    """Input schema for the MedGemma VQA Tool. The agent provides local paths to images."""
+    image_paths: List[str] = Field(
+        ...,
+        description="List of paths to medical image files to analyze. These are local paths accessible to the agent.",
+    )
+    prompt: str = Field(..., description="Question or instruction about the medical images")
+    system_prompt: Optional[str] = Field(
+        "You are an expert radiologist.",
+        description="System prompt to set the context for the model",
+    )
+    max_new_tokens: int = Field(
+        300, description="Maximum number of tokens to generate in the response"
+    )
+class MedGemmaAPIClientTool(BaseTool):
+    """
+    A client tool to interact with a remote MedGemma VQA FastAPI service.
+    This tool takes local image paths, reads them, and sends them to the API endpoint
+    for analysis.
+    """
+    name: str = "medgemma_medical_vqa_service"
+    description: str = (
+        "Sends medical images and a prompt to a specialized MedGemma VQA service for analysis. "
+        "Use this for expert-level reasoning, diagnosis assistance, and detailed image interpretation "
+        "across modalities like chest X-rays, dermatology, etc. Input must be local image paths and a prompt."
+    )
+    args_schema: Type[BaseModel] = MedGemmaVQAInput
+    api_url: str  # The URL of the running FastAPI service
+    def _run(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        system_prompt: str = "You are an expert radiologist.",
+        max_new_tokens: int = 300,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> str:
+        """Execute the tool synchronously."""
+        # httpx is a modern HTTP client that supports sync and async
+        timeout_config = httpx.Timeout(300.0, connect=10.0)
+        client = httpx.Client(timeout=timeout_config)
+        # Prepare the multipart form data
+        files_to_send = []
+        opened_files = []
+        try:
+            for path in image_paths:
+                f = open(path, "rb")
+                opened_files.append(f)
+                # The key 'images' must match the parameter name in the FastAPI endpoint
+                files_to_send.append(("images", (os.path.basename(path), f, "image/jpeg")))
+            data = {
+                "prompt": prompt,
+                "system_prompt": system_prompt,
+                "max_new_tokens": max_new_tokens,
+            }
+            response = client.post(
+                f"{self.api_url}/analyze-images/",
+                data=data,
+                files=files_to_send,
+            )
+            response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+            # The agent expects a string response from a tool
+            return response.json()["response"]
+        # --- KEY FIX 3: More specific exception handling for clearer errors ---
+        except httpx.TimeoutException:
+            return f"Error: The request to the MedGemma API timed out after {timeout_config.read} seconds. The server might be overloaded or the model is taking too long to load. Try again later."
+        except httpx.ConnectError:
+            return f"Error: Could not connect to the MedGemma API. Check if the server address '{self.api_url}' is correct and running."
+        except httpx.HTTPStatusError as e:
+            return f"Error: The MedGemma API returned an error (Status {e.response.status_code}): {e.response.text}"
+        except Exception as e:
+            return f"An unexpected error occurred in the MedGemma client tool: {str(e)}"
+        finally:
+            # Important: Ensure all opened files are closed.
+            for f in opened_files:
+                f.close()
+    async def _arun(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        system_prompt: str = "You are an expert radiologist.",
+        max_new_tokens: int = 300,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> str:
+        """Execute the tool asynchronously."""
+        async with httpx.AsyncClient() as client:
+            files_to_send = []
+            opened_files = []
+            try:
+                # Note: File I/O is blocking, for a truly async app you might use aiofiles
+                # But for this use case, this is generally acceptable.
+                for path in image_paths:
+                    f = open(path, "rb")
+                    opened_files.append(f)
+                    files_to_send.append(("images", (os.path.basename(path), f, "image/jpeg")))
+                data = {
+                    "prompt": prompt,
+                    "system_prompt": system_prompt,
+                    "max_new_tokens": max_new_tokens,
+                }
+                response = await client.post(
+                    f"{self.api_url}/analyze-images/",
+                    data=data,
+                    files=files_to_send,
+                    timeout=120.0
+                )
+                response.raise_for_status()
+                return response.json()["response"]
+            except httpx.HTTPStatusError as e:
+                return f"Error calling MedGemma API: {e.response.status_code} - {e.response.text}"
+            except Exception as e:
+                return f"An unexpected error occurred: {str(e)}"
+            finally:
+                for f in opened_files:
+                    f.close()
+if __name__ == "__main__":
+    client_tool = MedGemmaAPIClientTool(api_url="http://localhost:8002")
+    result = client_tool.run({
+        "image_paths": ["demo/chest/pneumonia1.jpg"],
+        "prompt": "What abnormality do you see?"
+    })
+    print(result)