Spaces:

samwell
/

medrax2

Paused

App Files Files Community

Emily Xie commited on Aug 2, 2025

Commit

205758b

1 Parent(s): aa6bc6b

medgemma fastapi tool integration

Browse files

Files changed (10) hide show

README.md +20 -0
main.py +8 -4
medrax/tools/__init__.py +1 -3
medrax/tools/vqa/__init__.py +16 -0
medrax/tools/vqa/llava_med.py +186 -0
medrax/tools/vqa/medgemma/medgemma.py +431 -0
medrax/tools/vqa/medgemma/medgemma_client.py +290 -0
medrax/tools/vqa/medgemma/medgemma_requirements.txt +55 -0
medrax/tools/vqa/medgemma/medgemma_setup.py +64 -0
medrax/tools/vqa/xray_vqa.py +186 -0

README.md CHANGED Viewed

@@ -22,6 +22,7 @@ MedRAX is built on a robust technical foundation:
 ### Integrated Tools
 - **Visual QA**: Utilizes CheXagent and LLaVA-Med for complex visual understanding and medical reasoning
 - **Segmentation**: Employs MedSAM2 (advanced medical image segmentation) and PSPNet model trained on ChestX-Det for precise anatomical structure identification
 - **Grounding**: Uses Maira-2 for localizing specific findings in medical images
 - **Report Generation**: Implements SwinV2 Transformer trained on CheXpert Plus for detailed medical reporting
@@ -130,6 +131,10 @@ PINECONE_API_KEY=
 # Requires Google Custom Search API credentials.
 GOOGLE_SEARCH_API_KEY=
 GOOGLE_SEARCH_ENGINE_ID=
 ```
 ### Getting Started
@@ -232,6 +237,21 @@ XRayVQATool(
 ```
 - CheXagent weights download automatically
 ### MedSAM2 Tool
 ```python
 MedSAM2Tool(

 ### Integrated Tools
 - **Visual QA**: Utilizes CheXagent and LLaVA-Med for complex visual understanding and medical reasoning
+- **MedGemma VQA**: Advanced medical visual question answering using Google's MedGemma 4B model for comprehensive medical image analysis across multiple modalities
 - **Segmentation**: Employs MedSAM2 (advanced medical image segmentation) and PSPNet model trained on ChestX-Det for precise anatomical structure identification
 - **Grounding**: Uses Maira-2 for localizing specific findings in medical images
 - **Report Generation**: Implements SwinV2 Transformer trained on CheXpert Plus for detailed medical reporting
 # Requires Google Custom Search API credentials.
 GOOGLE_SEARCH_API_KEY=
 GOOGLE_SEARCH_ENGINE_ID=
+# MedGemma VQA Tool (Optional)
+# URL for the MedGemma FastAPI service
+MEDGEMMA_API_URL=http://127.0.0.1:8002
 ```
 ### Getting Started
 ```
 - CheXagent weights download automatically
+### MedGemma VQA Tool
+```python
+MedGemmaAPIClientTool(
+    device=device,
+    cache_dir=model_dir,
+    api_url=MEDGEMMA_API_URL)
+)
+```
+- **Advanced Medical VQA**: Uses Google's MedGemma 4B instruction-tuned model for comprehensive medical image analysis
+- **Multi-modal Capabilities**: Specialized for chest X-rays, dermatology, ophthalmology, and pathology images
+- **Expert-level Analysis**: Provides radiologist-level medical reasoning and diagnosis assistance
+- **High Performance**: Supports up to 128K context length and 896x896 image resolution
+- **Memory Efficient**: 4-bit quantization available (~4GB VRAM) with full precision option (~8GB VRAM)
+- **Automatic Setup**: Model weights download automatically when service starts
 ### MedSAM2 Tool
 ```python
 MedSAM2Tool(

main.py CHANGED Viewed

@@ -73,7 +73,7 @@ def initialize_agent(
         "ArcPlusClassifierTool": lambda: ArcPlusClassifierTool(cache_dir=model_dir, device=device),
         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
-        "XRayVQATool": lambda: XRayVQATool(cache_dir=model_dir, device=device),
         "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(
             cache_dir=model_dir, device=device
         ),
@@ -90,7 +90,7 @@ def initialize_agent(
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
-        "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(api_url=MEDGEMMA_API_URL)
     }
     try:
@@ -157,9 +157,13 @@ if __name__ == "__main__":
         # "WebBrowserTool",  # For web browsing and search capabilities
         # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
-        "MedGemmaVQATool"  # For visual question answering on medical images
     ]
     # Configure the Retrieval Augmented Generation (RAG) system
     # This allows the agent to access and use medical knowledge documents
     rag_config = RAGConfig(
@@ -185,7 +189,7 @@ if __name__ == "__main__":
         model_dir="model-weights",
         temp_dir="temp",  # Change this to the path of the temporary directory
         device="cuda",
-        model="grok-4",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
         temperature=0.7,
         top_p=0.95,
         model_kwargs=model_kwargs,

         "ArcPlusClassifierTool": lambda: ArcPlusClassifierTool(cache_dir=model_dir, device=device),
         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
+        "CheXagentXRayVQATool": lambda: CheXagentXRayVQATool(cache_dir=model_dir, device=device),
         "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(
             cache_dir=model_dir, device=device
         ),
         "MedSAM2Tool": lambda: MedSAM2Tool(
             device=device, cache_dir=model_dir, temp_dir=temp_dir
         ),
+        "MedGemmaVQATool": lambda: MedGemmaAPIClientTool(cache_dir=model_dir, device=device, api_url=MEDGEMMA_API_URL)
     }
     try:
         # "WebBrowserTool",  # For web browsing and search capabilities
         # "MedicalRAGTool",  # For retrieval-augmented generation with medical knowledge
         # "PythonSandboxTool",  # Add the Python sandbox tool
+        "MedGemmaVQATool" # Google MedGemma VQA tool
     ]
+    # Setup the MedGemma environment if the MedGemmaVQATool is selected
+    if "MedGemmaVQATool" in selected_tools:
+        setup_medgemma_env()
     # Configure the Retrieval Augmented Generation (RAG) system
     # This allows the agent to access and use medical knowledge documents
     rag_config = RAGConfig(
         model_dir="model-weights",
         temp_dir="temp",  # Change this to the path of the temporary directory
         device="cuda",
+        model="gpt-4.1-2025-04-14",  # Change this to the model you want to use, e.g. gpt-4.1-2025-04-14, gemini-2.5-pro
         temperature=0.7,
         top_p=0.95,
         model_kwargs=model_kwargs,

medrax/tools/__init__.py CHANGED Viewed

@@ -3,8 +3,7 @@
 from .classification import *
 from .report_generation import *
 from .segmentation import *
-from .xray_vqa import *
-from .llava_med import *
 from .grounding import *
 from .generation import *
 from .dicom import *
@@ -13,4 +12,3 @@ from .rag import *
 from .web_browser import *
 from .python_tool import *
 from .medsam2 import *
-from .medgemma_client import *

 from .classification import *
 from .report_generation import *
 from .segmentation import *
+from .vqa import *
 from .grounding import *
 from .generation import *
 from .dicom import *
 from .web_browser import *
 from .python_tool import *
 from .medsam2 import *

medrax/tools/vqa/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Visual Question Answering tools for medical images."""
+from .llava_med import LlavaMedTool, LlavaMedInput
+from .xray_vqa import CheXagentXRayVQATool, XRayVQAToolInput
+from .medgemma_client import MedGemmaAPIClientTool, MedGemmaVQAInput
+from .medgemma_setup import setup_medgemma_env
+__all__ = [
+    "LlavaMedTool",
+    "LlavaMedInput",
+    "CheXagentXRayVQATool",
+    "XRayVQAToolInput",
+    "MedGemmaAPIClientTool",
+    "MedGemmaVQAInput",
+    "setup_medgemma_env"
+]

medrax/tools/vqa/llava_med.py ADDED Viewed

	@@ -0,0 +1,186 @@

+from typing import Any, Dict, Optional, Tuple, Type
+from pydantic import BaseModel, Field
+import torch
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+from PIL import Image
+from medrax.llava.conversation import conv_templates
+from medrax.llava.model.builder import load_pretrained_model
+from medrax.llava.mm_utils import tokenizer_image_token, process_images
+from medrax.llava.constants import (
+    IMAGE_TOKEN_INDEX,
+    DEFAULT_IMAGE_TOKEN,
+    DEFAULT_IM_START_TOKEN,
+    DEFAULT_IM_END_TOKEN,
+)
+class LlavaMedInput(BaseModel):
+    """Input for the LLaVA-Med Visual QA tool. Only supports JPG or PNG images."""
+    question: str = Field(..., description="The question to ask about the medical image")
+    image_path: Optional[str] = Field(
+        None,
+        description="Path to the medical image file (optional), only supports JPG or PNG images",
+    )
+class LlavaMedTool(BaseTool):
+    """Tool that performs medical visual question answering using LLaVA-Med.
+    This tool uses a large language model fine-tuned on medical images to answer
+    questions about medical images. It can handle both image-based questions and
+    general medical questions without images.
+    """
+    name: str = "llava_med_qa"
+    description: str = (
+        "A tool that answers questions about biomedical images and general medical questions using LLaVA-Med. "
+        "While it can process chest X-rays, it may not be as reliable for detailed chest X-ray analysis. "
+        "Input should be a question and optionally a path to a medical image file."
+    )
+    args_schema: Type[BaseModel] = LlavaMedInput
+    tokenizer: Any = None
+    model: Any = None
+    image_processor: Any = None
+    context_len: int = 200000
+    def __init__(
+        self,
+        model_path: str = "microsoft/llava-med-v1.5-mistral-7b",
+        cache_dir: str = "/model-weights",
+        low_cpu_mem_usage: bool = True,
+        torch_dtype: torch.dtype = torch.bfloat16,
+        device: str = "cuda",
+        load_in_4bit: bool = False,
+        load_in_8bit: bool = False,
+        **kwargs,
+    ):
+        super().__init__()
+        self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
+            model_path=model_path,
+            model_base=None,
+            model_name=model_path,
+            load_in_4bit=load_in_4bit,
+            load_in_8bit=load_in_8bit,
+            cache_dir=cache_dir,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+            torch_dtype=torch_dtype,
+            device=device,
+            **kwargs,
+        )
+        self.model.eval()
+    def _process_input(
+        self, question: str, image_path: Optional[str] = None
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        if self.model.config.mm_use_im_start_end:
+            question = (
+                DEFAULT_IM_START_TOKEN
+                + DEFAULT_IMAGE_TOKEN
+                + DEFAULT_IM_END_TOKEN
+                + "\n"
+                + question
+            )
+        else:
+            question = DEFAULT_IMAGE_TOKEN + "\n" + question
+        conv = conv_templates["vicuna_v1"].copy()
+        conv.append_message(conv.roles[0], question)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+        input_ids = (
+            tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
+            .unsqueeze(0)
+            .cuda()
+        )
+        image_tensor = None
+        if image_path:
+            image = Image.open(image_path)
+            image_tensor = process_images([image], self.image_processor, self.model.config)[0]
+            image_tensor = image_tensor.unsqueeze(0).half().cuda()
+        return input_ids, image_tensor
+    def _run(
+        self,
+        question: str,
+        image_path: Optional[str] = None,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[str, Dict]:
+        """Answer a medical question, optionally based on an input image.
+        Args:
+            question (str): The medical question to answer.
+            image_path (Optional[str]): The path to the medical image file (if applicable).
+            run_manager (Optional[CallbackManagerForToolRun]): The callback manager for the tool run.
+        Returns:
+            Tuple[str, Dict]: A tuple containing the model's answer and any additional metadata.
+        Raises:
+            Exception: If there's an error processing the input or generating the answer.
+        """
+        try:
+            input_ids, image_tensor = self._process_input(question, image_path)
+            input_ids = input_ids.to(device=self.model.device)
+            image_tensor = image_tensor.to(device=self.model.device, dtype=self.model.dtype)
+            with torch.inference_mode():
+                output_ids = self.model.generate(
+                    input_ids,
+                    images=image_tensor,
+                    do_sample=False,
+                    temperature=0.2,
+                    max_new_tokens=500,
+                    use_cache=True,
+                )
+            output = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
+            metadata = {
+                "question": question,
+                "image_path": image_path,
+                "analysis_status": "completed",
+            }
+            return output, metadata
+        except Exception as e:
+            return f"Error generating answer: {str(e)}", {
+                "question": question,
+                "image_path": image_path,
+                "analysis_status": "failed",
+            }
+    async def _arun(
+        self,
+        question: str,
+        image_path: Optional[str] = None,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[str, Dict]:
+        """Asynchronously answer a medical question, optionally based on an input image.
+        This method currently calls the synchronous version, as the model inference
+        is not inherently asynchronous. For true asynchronous behavior, consider
+        using a separate thread or process.
+        Args:
+            question (str): The medical question to answer.
+            image_path (Optional[str]): The path to the medical image file (if applicable).
+            run_manager (Optional[AsyncCallbackManagerForToolRun]): The async callback manager for the tool run.
+        Returns:
+            Tuple[str, Dict]: A tuple containing the model's answer and any additional metadata.
+        Raises:
+            Exception: If there's an error processing the input or generating the answer.
+        """
+        return self._run(question, image_path)

medrax/tools/vqa/medgemma/medgemma.py ADDED Viewed

	@@ -0,0 +1,431 @@

+import asyncio
+import os
+from pathlib import Path
+import sys
+import traceback
+from typing import Any, Dict, List, Optional, Tuple
+import uuid
+from PIL import Image
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+from pydantic import BaseModel, Field
+import torch
+import transformers
+from transformers import BitsAndBytesConfig, pipeline
+import uvicorn
+#TODO: delete this
+print("ENVIRONMENT CHECK")
+print(f"Python Executable: {sys.executable}")
+print(f"PyTorch version: {torch.__version__}")
+print(f"Transformers version: {transformers.__version__}")
+# Configuration
+UPLOAD_DIR = "./medgemma_images"
+# Create directories if they don't exist
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+# Pydantic Models for API
+class VQAInput(BaseModel):
+    """Input schema for the MedGemma VQA API endpoint.
+    Defines the structure for requests to the /analyze-images/ endpoint.
+    Used for validating incoming API requests and generating OpenAPI documentation.
+    """
+    prompt: str = Field(..., description="Question or instruction about the medical images")
+    system_prompt: Optional[str] = Field(
+        "You are an expert radiologist.",
+        description="System prompt to set the context for the model",
+    )
+    max_new_tokens: int = Field(
+        300, description="Maximum number of tokens to generate in the response"
+    )
+class VQAResponse(BaseModel):
+    """Response schema for successful MedGemma VQA API requests.
+    Defines the structure of successful responses from the /analyze-images/ endpoint.
+    Used for response validation and OpenAPI documentation.
+    """
+    response: str = Field(..., description="Generated medical analysis response from MedGemma model")
+    metadata: Dict[str, Any] = Field(..., description="Additional metadata about the analysis request and results")
+class ErrorResponse(BaseModel):
+    """Error response schema for failed MedGemma VQA API requests.
+    Defines the structure of error responses from the /analyze-images/ endpoint.
+    Used for error response validation and OpenAPI documentation.
+    """
+    error: str = Field(..., description="Human-readable error message describing what went wrong")
+    metadata: Dict[str, Any] = Field(..., description="Additional metadata about the error and request context")
+# MedGemma Model Handling
+class MedGemmaModel:
+    """Medical visual question answering model using Google's MedGemma 4B model.
+    MedGemma is a specialized multimodal AI model trained on medical images and text.
+    It provides expert-level analysis for chest X-rays, dermatology images,
+    ophthalmology images, and histopathology slides.
+    Key capabilities:
+    - Medical image classification and analysis across multiple modalities
+    - Visual question answering for radiology, dermatology, pathology, ophthalmology
+    - Clinical reasoning and medical knowledge integration
+    - Multi-modal medical understanding (text + images)
+    - Support for up to 128K context length
+    Performance:
+    - Full precision (bfloat16): ~8GB VRAM, recommended for medical applications
+    - 4-bit quantization (default): Available but may affect quality on some systems
+    This class implements a singleton pattern to ensure only one model instance
+    is loaded in memory, optimizing resource usage for the FastAPI service.
+    """
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        """Create or return the singleton instance of MedGemmaModel.
+        Ensures only one model instance exists in memory, preventing
+        multiple model loads and conserving GPU memory.
+        Returns:
+            MedGemmaModel: The singleton instance
+        """
+        if not cls._instance:
+            cls._instance = super(MedGemmaModel, cls).__new__(cls)
+        return cls._instance
+    def __init__(
+        self,
+        model_name: str = "google/medgemma-4b-it",
+        device: Optional[str] = "cuda",
+        dtype: torch.dtype = torch.bfloat16,
+        cache_dir: Optional[str] = None,
+        load_in_4bit: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the MedGemmaModel.
+        Args:
+            model_name: Name of the MedGemma model to use (default: "google/medgemma-4b-it")
+            device: Device to run model on - "cuda" or "cpu" (default: "cuda")
+            dtype: Data type for model weights - bfloat16 recommended for efficiency (default: torch.bfloat16)
+            cache_dir: Directory to cache downloaded models (default: None)
+            load_in_4bit: Whether to load model in 4-bit quantization for memory efficiency (default: True)
+            **kwargs: Additional arguments passed to the model pipeline
+        Raises:
+            RuntimeError: If model initialization fails (e.g., insufficient GPU memory)
+        """
+        # Re-initialization guard
+        if hasattr(self, 'pipe') and self.pipe is not None:
+            return
+        self.device = device if device and torch.cuda.is_available() else "cpu"
+        self.dtype = dtype
+        self.cache_dir = cache_dir
+        # Setup model configuration
+        model_kwargs = {
+            "torch_dtype": self.dtype,
+        }
+        if cache_dir:
+            model_kwargs["cache_dir"] = cache_dir
+        # Handle device mapping and quantization
+        pipeline_kwargs = {
+            "model": model_name,
+            "model_kwargs": model_kwargs,
+            "trust_remote_code": True,
+            "use_cache": True,
+        }
+        if load_in_4bit:
+            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
+        model_kwargs["device_map"] = {"": self.device}
+        try:
+            self.pipe = pipeline("image-text-to-text", **pipeline_kwargs)
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize MedGemma pipeline: {str(e)}")
+    def _prepare_messages(
+        self, image_paths: List[str], prompt: str, system_prompt: str
+    ) -> Tuple[List[Dict[str, Any]], List[Image.Image]]:
+        """Prepare chat messages in the format expected by MedGemma.
+        Converts image paths to PIL Image objects and formats them into the
+        chat message structure that MedGemma expects for multimodal input.
+        Args:
+            image_paths: List of file paths to medical images
+            prompt: User's question or instruction about the images
+            system_prompt: System context message to set the model's role
+        Returns:
+            Tuple containing:
+                - List of formatted chat messages for MedGemma
+                - List of loaded PIL Image objects
+        Raises:
+            FileNotFoundError: If any image file cannot be found
+        """
+        images = []
+        for path in image_paths:
+            if not Path(path).is_file():
+                raise FileNotFoundError(f"Image file not found: {path}")
+            image = Image.open(path)
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            images.append(image)
+        # Create messages in chat format
+        messages = [
+            {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}]
+                + [{"type": "image", "image": img} for img in images],
+            },
+        ]
+        return messages, images
+    def _generate_response(self, messages: List[Dict[str, Any]], max_new_tokens: int) -> str:
+        """Generate response using MedGemma pipeline.
+        Processes the formatted messages through the MedGemma model to generate
+        a medical analysis response.
+        Args:
+            messages: Formatted chat messages with images and text
+            max_new_tokens: Maximum number of tokens to generate in response
+        Returns:
+            Generated response text from MedGemma model
+        """
+        # Generate using pipeline
+        output = self.pipe(
+            text=messages,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+        )
+        # Extract generated text from pipeline output
+        if (
+            isinstance(output, list)
+            and output
+            and isinstance(output[0].get("generated_text"), list)
+        ):
+            generated_text = output[0]["generated_text"]
+            if generated_text:
+                return generated_text[-1].get("content", "").strip()
+        return "No response generated"
+    def _create_error_response(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        error_message: str,
+        error_type: str,
+        error_details: str,
+    ) -> Dict[str, Any]:
+        """Create standardized error response metadata.
+        Generates consistent error metadata structure for logging and debugging
+        purposes across different error scenarios.
+        Args:
+            image_paths: List of image paths that were being processed
+            prompt: User prompt that was being processed
+            error_message: Human-readable error message
+            error_type: Categorization of the error (e.g., "memory_error", "file_not_found")
+            error_details: Detailed technical error information
+        Returns:
+            Dictionary containing standardized error metadata
+        """
+        return {
+            "image_paths": image_paths,
+            "prompt": prompt,
+            "analysis_status": "failed",
+            "error_type": error_type,
+            "error_details": error_details,
+        }
+    async def aget_response(self, image_paths: List[str], prompt: str, system_prompt: str, max_new_tokens: int) -> str:
+        """Async method to get response from MedGemma model.
+        Main entry point for generating medical analysis responses. Handles
+        the complete pipeline from image loading to response generation
+        in an asynchronous manner.
+        Args:
+            image_paths: List of file paths to medical images
+            prompt: User's question or instruction about the images
+            system_prompt: System context message to set the model's role
+            max_new_tokens: Maximum number of tokens to generate in response
+        Returns:
+            Generated medical analysis response as a string
+        Raises:
+            FileNotFoundError: If any image file cannot be found
+            RuntimeError: If model inference fails
+        """
+        loop = asyncio.get_event_loop()
+        messages, _ = await loop.run_in_executor(None, self._prepare_messages, image_paths, prompt, system_prompt)
+        def _generate():
+            return self._generate_response(messages, max_new_tokens)
+        return await loop.run_in_executor(None, _generate)
+# FastAPI Application
+app = FastAPI(
+    title="MedGemma VQA API",
+    description="API for medical visual question answering using Google's MedGemma model."
+)
+medgemma_model: Optional[MedGemmaModel] = None
+@app.on_event("startup")
+async def startup_event():
+    """Load the MedGemma model at application startup.
+    This function is called when the FastAPI application starts up.
+    It initializes the MedGemma model as a global singleton instance,
+    ensuring the model is loaded and ready to handle requests.
+    The model is loaded with default settings optimized for medical
+    image analysis, including 4-bit quantization for memory efficiency.
+    Raises:
+        SystemExit: If model loading fails, the application will exit
+                   to prevent serving requests with an unavailable model.
+    """
+    global medgemma_model
+    try:
+        medgemma_model = MedGemmaModel()
+        print("MedGemma model loaded successfully.")
+    except RuntimeError as e:
+        print(f"Error loading MedGemma model: {e}")
+        exit(1)
+@app.post("/analyze-images/",
+            response_model=VQAResponse,
+            responses={
+                500: {"model": ErrorResponse, "description": "Internal server error or model inference failure"},
+                404: {"model": ErrorResponse, "description": "Image file not found"},
+                400: {"description": "Invalid request format or unsupported image type"},
+                503: {"description": "Model not available or not loaded"}
+            },
+            summary="Analyze one or more medical images",
+            description="Upload medical images and receive AI-powered analysis using Google's MedGemma model.")
+async def analyze_images(
+    images: List[UploadFile] = File(..., description="List of medical image files to analyze (JPG or PNG)."),
+    prompt: str = Form(..., description="Question or instruction about the medical images."),
+    system_prompt: Optional[str] = Form("You are an expert radiologist.", description="System prompt to set the context for the model."),
+    max_new_tokens: int = Form(100, description="Maximum number of tokens to generate in the response.")
+):
+    """Analyze medical images using MedGemma AI model.
+    This endpoint accepts one or more medical images along with a prompt
+    and returns AI-generated medical analysis.
+    The endpoint handles the complete pipeline:
+    1. Validates uploaded image files
+    2. Saves images temporarily to disk
+    3. Processes images through MedGemma model
+    4. Returns structured analysis with metadata
+    5. Cleans up temporary files
+    Args:
+        images: List of uploaded image files (JPG/PNG format)
+        prompt: Medical question or instruction about the images
+        system_prompt: Context setting for the AI model (default: radiologist role)
+        max_new_tokens: Maximum response length (default: 100)
+    Returns:
+        VQAResponse: Contains the AI-generated analysis and request metadata
+    Raises:
+        HTTPException 400: Invalid image format or request structure
+        HTTPException 404: Image file not found during processing
+        HTTPException 500: Model inference error or memory issues
+        HTTPException 503: Model not available for processing
+    """
+    # Check if model is available
+    if medgemma_model is None or medgemma_model.pipe is None:
+        raise HTTPException(status_code=503, detail="Model is not available. Please try again later.")
+    # Process uploaded images
+    image_paths = []
+    for image in images:
+        # Validate image format
+        if image.content_type not in ["image/jpeg", "image/png"]:
+            raise HTTPException(status_code=400, detail=f"Unsupported image format: {image.content_type}. Only JPG and PNG are supported.")
+        # Generate unique filename to avoid conflicts
+        unique_filename = f"{uuid.uuid4()}_{image.filename}"
+        file_path = os.path.join(UPLOAD_DIR, unique_filename)
+        try:
+            # Save uploaded image to disk
+            with open(file_path, "wb") as buffer:
+                buffer.write(await image.read())
+            image_paths.append(file_path)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Failed to save uploaded image: {str(e)}")
+    try:
+        # Generate AI analysis
+        response_text = await medgemma_model.aget_response(image_paths, prompt, system_prompt, max_new_tokens)
+        # Prepare success response
+        metadata = {
+            "image_paths": image_paths,
+            "prompt": prompt,
+            "system_prompt": system_prompt,
+            "max_new_tokens": max_new_tokens,
+            "num_images": len(image_paths),
+            "analysis_status": "completed",
+        }
+        return VQAResponse(response=response_text, metadata=metadata)
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=404, detail=f"Image file not found: {str(e)}")
+    except torch.cuda.OutOfMemoryError as e:
+        error_message = "GPU memory exhausted. Try reducing image resolution or max_new_tokens."
+        metadata = medgemma_model._create_error_response(
+            image_paths, prompt, error_message, "memory_error", str(e)
+        )
+        raise HTTPException(status_code=500, detail=error_message)
+    except Exception as e:
+        traceback.print_exc()
+        metadata = medgemma_model._create_error_response(
+            image_paths, prompt, f"Analysis failed: {str(e)}", "general_error", str(e)
+        )
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+    finally:
+        # Clean up temporary image files
+        for path in image_paths:
+            try:
+                os.remove(path)
+            except OSError:
+                pass
+if __name__ == "__main__":
+    """Launch the MedGemma VQA API server.
+    Starts the FastAPI application with uvicorn server, binding to all
+    network interfaces on port 8002.
+    """
+    uvicorn.run(app, host="0.0.0.0", port=8002)

medrax/tools/vqa/medgemma/medgemma_client.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import os
+from typing import Any, Dict, List, Optional, Tuple, Type
+import httpx
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+class MedGemmaVQAInput(BaseModel):
+    """Input schema for the MedGemma VQA Tool. Only supports JPG or PNG images."""
+    image_paths: List[str] = Field(
+        ...,
+        description="List of paths to medical image files to analyze, only supports JPG or PNG images",
+    )
+    prompt: str = Field(..., description="Question or instruction about the medical images")
+    system_prompt: Optional[str] = Field(
+        "You are an expert radiologist.",
+        description="System prompt to set the context for the model",
+    )
+    max_new_tokens: int = Field(
+        300, description="Maximum number of tokens to generate in the response"
+    )
+class MedGemmaAPIClientTool(BaseTool):
+    """Medical visual question answering tool using Google's MedGemma 4B model via API.
+    MedGemma is a specialized multimodal AI model trained on medical images and text.
+    It provides expert-level analysis for chest X-rays, dermatology images,
+    ophthalmology images, and histopathology slides.
+    Key capabilities:
+    - Medical image classification and analysis across multiple modalities
+    - Visual question answering for radiology, dermatology, pathology, ophthalmology
+    - Clinical reasoning and medical knowledge integration
+    - Multi-modal medical understanding (text + images)
+    - Support for up to 128K context length
+    Performance:
+    - Full precision (bfloat16): ~8GB VRAM, recommended for medical applications
+    - 4-bit quantization (default): Available but may affect quality on some systems
+    """
+    name: str = "medgemma_medical_vqa"
+    description: str = (
+        "Advanced medical visual question answering tool using Google's MedGemma 4B instruction-tuned model via API. "
+        "Specialized for comprehensive medical image analysis across multiple modalities including chest X-rays, "
+        "dermatology images, ophthalmology images, and histopathology slides. Provides expert-level medical "
+        "reasoning, diagnosis assistance, and detailed image interpretation with radiologist-level expertise. "
+        "Input: List of medical image paths and medical question/prompt with optional custom system prompt. "
+        "Output: Comprehensive medical analysis and answers based on visual content with detailed reasoning. "
+        "Supports multi-image analysis, comparative studies, and complex medical reasoning tasks. "
+        "Model handles images up to 896x896 resolution and supports context up to 128K tokens."
+    )
+    args_schema: Type[BaseModel] = MedGemmaVQAInput
+    return_direct: bool = True
+    # API configuration
+    api_url: str  # The URL of the running FastAPI service
+    def __init__(self, api_url: str, **kwargs: Any):
+        """Initialize the MedGemmaAPIClientTool.
+        Args:
+            api_url: The URL of the running MedGemma FastAPI service
+            **kwargs: Additional arguments passed to BaseTool
+        """
+        super().__init__(api_url=api_url, **kwargs)
+    def _prepare_request_data(
+        self, image_paths: List[str], prompt: str, system_prompt: str, max_new_tokens: int
+    ) -> Tuple[List, Dict]:
+        """Prepare multipart form data for API request.
+        Args:
+            image_paths: List of paths to medical images
+            prompt: Question or instruction about the images
+            system_prompt: System context for the model
+            max_new_tokens: Maximum number of tokens to generate
+        Returns:
+            Tuple of files list and data dictionary
+        """
+        files_to_send = []
+        opened_files = []
+        for path in image_paths:
+            with open(path, "rb") as f:
+                files_to_send.append(("images", (os.path.basename(path), f.read(), "image/jpeg")))
+        data = {
+            "prompt": prompt,
+            "system_prompt": system_prompt,
+            "max_new_tokens": max_new_tokens,
+        }
+        return files_to_send, data, opened_files
+    def _create_error_response(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        error_message: str,
+        error_type: str,
+        error_details: str,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Create standardized error response.
+        Args:
+            image_paths: List of image paths
+            prompt: User prompt
+            error_message: Human-readable error message
+            error_type: Type of error
+            error_details: Detailed error information
+        Returns:
+            Tuple of error output and metadata
+        """
+        output = {"error": error_message}
+        metadata = {
+            "image_paths": image_paths,
+            "prompt": prompt,
+            "analysis_status": "failed",
+            "error_type": error_type,
+            "error_details": error_details,
+        }
+        return output, metadata
+    def _run(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        system_prompt: str = "You are an expert radiologist.",
+        max_new_tokens: int = 300,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Execute medical visual question answering via API.
+        Args:
+            image_paths: List of paths to medical images
+            prompt: Question or instruction about the images
+            system_prompt: System context for the model
+            max_new_tokens: Maximum number of tokens to generate
+            run_manager: Optional callback manager
+        Returns:
+            Tuple of output dictionary and metadata
+        """
+        # httpx is a modern HTTP client that supports sync and async
+        timeout_config = httpx.Timeout(300.0, connect=10.0)
+        client = httpx.Client(timeout=timeout_config)
+        try:
+            # Prepare the multipart form data
+            files_to_send, data, opened_files = self._prepare_request_data(
+                image_paths, prompt, system_prompt, max_new_tokens
+            )
+            response = client.post(
+                f"{self.api_url}/analyze-images/",
+                data=data,
+                files=files_to_send,
+            )
+            response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+            response_data = response.json()
+            output = {"response": response_data["response"]}
+            metadata = {
+                "image_paths": image_paths,
+                "prompt": prompt,
+                "system_prompt": system_prompt,
+                "max_new_tokens": max_new_tokens,
+                "num_images": len(image_paths),
+                "analysis_status": "completed",
+            }
+            return output, metadata
+        except httpx.TimeoutException as e:
+            return self._create_error_response(
+                image_paths,
+                prompt,
+                f"Error: The request to the MedGemma API timed out after {timeout_config.read} seconds. The server might be overloaded or the model is taking too long to load. Try again later.",
+                "timeout_error",
+                str(e)
+            )
+        except httpx.ConnectError as e:
+            return self._create_error_response(
+                image_paths,
+                prompt,
+                f"Error: Could not connect to the MedGemma API. Check if the server address '{self.api_url}' is correct and running.",
+                "connection_error",
+                str(e)
+            )
+        except httpx.HTTPStatusError as e:
+            return self._create_error_response(
+                image_paths,
+                prompt,
+                f"Error: The MedGemma API returned an error (Status {e.response.status_code}): {e.response.text}",
+                "http_error",
+                f"Status {e.response.status_code}: {e.response.text}"
+            )
+        except Exception as e:
+            return self._create_error_response(
+                image_paths,
+                prompt,
+                f"An unexpected error occurred in the MedGemma client tool: {str(e)}",
+                "general_error",
+                str(e)
+            )
+        finally:
+            # Ensure all opened files are closed
+            if 'opened_files' in locals():
+                for f in opened_files:
+                    f.close()
+    async def _arun(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        system_prompt: str = "You are an expert radiologist.",
+        max_new_tokens: int = 300,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Execute the tool asynchronously."""
+        async with httpx.AsyncClient() as client:
+            try:
+                # Prepare the multipart form data
+                files_to_send, data, opened_files = self._prepare_request_data(
+                    image_paths, prompt, system_prompt, max_new_tokens
+                )
+                response = await client.post(
+                    f"{self.api_url}/analyze-images/",
+                    data=data,
+                    files=files_to_send,
+                    timeout=120.0
+                )
+                response.raise_for_status()
+                response_data = response.json()
+                output = {"response": response_data["response"]}
+                metadata = {
+                    "image_paths": image_paths,
+                    "prompt": prompt,
+                    "system_prompt": system_prompt,
+                    "max_new_tokens": max_new_tokens,
+                    "num_images": len(image_paths),
+                    "analysis_status": "completed",
+                }
+                return output, metadata
+            except httpx.HTTPStatusError as e:
+                return self._create_error_response(
+                    image_paths,
+                    prompt,
+                    f"Error calling MedGemma API: {e.response.status_code} - {e.response.text}",
+                    "http_error",
+                    f"Status {e.response.status_code}: {e.response.text}"
+                )
+            except Exception as e:
+                return self._create_error_response(
+                    image_paths,
+                    prompt,
+                    f"An unexpected error occurred: {str(e)}",
+                    "general_error",
+                    str(e)
+                )
+            finally:
+                # Ensure all opened files are closed
+                if 'opened_files' in locals():
+                    for f in opened_files:
+                        f.close()
+#TODO: delete this
+if __name__ == "__main__":
+    tool = MedGemmaAPIClientTool(api_url="http://kn045:8002")
+    output, metadata = tool._run(
+        image_paths=["/home/emxie/scratch/MedRAX2/demo/chest/pneumonia1.jpg"],
+        prompt="Classify the xray",
+        system_prompt="You are a radiologist.",
+        max_new_tokens=300
+    )
+    print(output)
+    print(metadata)

medrax/tools/vqa/medgemma/medgemma_requirements.txt ADDED Viewed

	@@ -0,0 +1,55 @@

+accelerate==1.9.0
+annotated_types==0.7.0+computecanada
+anyio==4.9.0+computecanada
+bitsandbytes==0.46.0+computecanada
+certifi==2025.7.14+computecanada
+charset_normalizer==3.4.2+computecanada
+click==8.2.1+computecanada
+fastapi==0.116.1+computecanada
+filelock==3.18.0+computecanada
+fsspec==2025.7.0+computecanada
+h11==0.16.0+computecanada
+hf_xet==1.1.3+computecanada
+httpcore==1.0.9+computecanada
+httpx==0.28.1+computecanada
+huggingface-hub==0.34.3
+idna==3.10+computecanada
+inquirerpy==0.3.4+computecanada
+jinja2==3.1.6+computecanada
+jsonpatch==1.33+computecanada
+jsonpointer==3.0.0+computecanada
+langchain-core==0.3.72
+langsmith==0.4.8+computecanada
+MarkupSafe==2.1.5+computecanada
+mpmath==1.3.0+computecanada
+networkx==3.5+computecanada
+numpy==2.2.2+computecanada
+orjson==3.10.5+computecanada
+packaging==25.0+computecanada
+pfzy==0.3.4+computecanada
+pillow==11.1.0+computecanada
+prompt_toolkit==3.0.51+computecanada
+psutil==6.1.1+computecanada
+pydantic==2.11.7+computecanada
+pydantic_core==2.33.2+computecanada
+python_multipart==0.0.20+computecanada
+PyYAML==6.0.2+computecanada
+regex==2024.11.6+computecanada
+requests==2.32.4+computecanada
+requests_toolbelt==1.0.0+computecanada
+safetensors==0.5.3+computecanada
+sniffio==1.3.1+computecanada
+sshuttle==1.3.1
+starlette==0.47.2
+sympy==1.14.0+computecanada
+tenacity==9.1.2+computecanada
+tokenizers==0.21.1+computecanada
+torch==2.7.1+computecanada
+tqdm==4.67.1+computecanada
+transformers==4.54.1
+typing_extensions==4.14.1+computecanada
+typing_inspection==0.4.1+computecanada
+urllib3==2.5.0+computecanada
+uvicorn==0.35.0+computecanada
+wcwidth==0.2.13+computecanada
+zstandard==0.23.0+computecanada

medrax/tools/vqa/medgemma/medgemma_setup.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+from pathlib import Path
+import subprocess
+import venv
+def setup_medgemma_env():
+    """Set up MedGemma virtual environment and launch the FastAPI service.
+    This function performs the following steps:
+    1. Creates a virtual environment for MedGemma if it doesn't exist
+    2. Installs MedGemma-specific dependencies from requirements.txt
+    3. Launches the MedGemma FastAPI service in the isolated environment
+    Returns:
+        None: Launches MedGemma service as a background process
+    Raises:
+        subprocess.CalledProcessError: If pip installation fails
+        FileNotFoundError: If required files are missing
+        OSError: If virtual environment creation fails
+    """
+    # Get the directory containing this script
+    current_dir = Path(__file__).resolve().parent
+    # Define paths for MedGemma components
+    medgemma_path = current_dir / "medgemma.py"
+    requirements_path = current_dir / "medgemma_requirements.txt"
+    env_dir = current_dir / "medgemma_env"
+    # Determine executable paths based on operating system
+    if os.name == "nt":  # Windows
+        pip_executable = env_dir / "Scripts" / "pip"
+        python_executable = env_dir / "Scripts" / "python"
+    else:  # Unix/Linux/macOS
+        pip_executable = env_dir / "bin" / "pip"
+        python_executable = env_dir / "bin" / "python"
+    # Create virtual environment if it doesn't exist
+    if not env_dir.exists():
+        print("Creating MedGemma virtual environment...")
+        venv.create(env_dir, with_pip=True)
+        # Install MedGemma dependencies
+        print("Installing MedGemma dependencies...")
+        subprocess.check_call([
+            str(pip_executable),
+            "install",
+            "-r",
+            str(requirements_path)
+        ])
+    # Ensure environment exists before accessing executables
+    if not env_dir.exists():
+        raise RuntimeError("Failed to create MedGemma virtual environment")
+    # Launch MedGemma FastAPI service
+    print("Launching MedGemma FastAPI service...")
+    subprocess.Popen([
+        str(python_executable),
+        str(medgemma_path)
+    ])
+    # Note: stdout and stderr redirection commented out for debugging
+    # stdout=subprocess.DEVNULL,
+    # stderr=subprocess.DEVNULL,

medrax/tools/vqa/xray_vqa.py ADDED Viewed

	@@ -0,0 +1,186 @@

+from typing import Dict, List, Optional, Tuple, Type, Any
+from pathlib import Path
+from pydantic import BaseModel, Field
+import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+class XRayVQAToolInput(BaseModel):
+    """Input schema for the CheXagent Tool."""
+    image_paths: List[str] = Field(
+        ..., description="List of paths to chest X-ray images to analyze"
+    )
+    prompt: str = Field(..., description="Question or instruction about the chest X-ray images")
+    max_new_tokens: int = Field(
+        512, description="Maximum number of tokens to generate in the response"
+    )
+class CheXagentXRayVQATool(BaseTool):
+    """Tool that leverages CheXagent for comprehensive chest X-ray analysis."""
+    name: str = "chexagent_xray_vqa"
+    description: str = (
+        "A versatile tool for analyzing chest X-rays. "
+        "Can perform multiple tasks including: visual question answering, report generation, "
+        "abnormality detection, comparative analysis, anatomical description, "
+        "and clinical interpretation. Input should be paths to X-ray images "
+        "and a natural language prompt describing the analysis needed."
+    )
+    args_schema: Type[BaseModel] = XRayVQAToolInput
+    return_direct: bool = True
+    cache_dir: Optional[str] = None
+    device: Optional[str] = None
+    dtype: torch.dtype = torch.bfloat16
+    tokenizer: Optional[AutoTokenizer] = None
+    model: Optional[AutoModelForCausalLM] = None
+    def __init__(
+        self,
+        model_name: str = "StanfordAIMI/CheXagent-2-3b",
+        device: Optional[str] = "cuda",
+        dtype: torch.dtype = torch.bfloat16,
+        cache_dir: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the CheXagentXRayVQATool.
+        Args:
+            model_name: Name of the CheXagent model to use
+            device: Device to run model on (cuda/cpu)
+            dtype: Data type for model weights
+            cache_dir: Directory to cache downloaded models
+            **kwargs: Additional arguments
+        """
+        super().__init__(**kwargs)
+        # Dangerous code, but works for now
+        import transformers
+        original_transformers_version = transformers.__version__
+        transformers.__version__ = "4.40.0"
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.dtype = dtype
+        self.cache_dir = cache_dir
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            cache_dir=cache_dir,
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map=self.device,
+            trust_remote_code=True,
+            cache_dir=cache_dir,
+        )
+        self.model = self.model.to(dtype=self.dtype)
+        self.model.eval()
+        transformers.__version__ = original_transformers_version
+    def _generate_response(self, image_paths: List[str], prompt: str, max_new_tokens: int) -> str:
+        """Generate response using CheXagent model.
+        Args:
+            image_paths: List of paths to chest X-ray images
+            prompt: Question or instruction about the images
+            max_new_tokens: Maximum number of tokens to generate
+        Returns:
+            str: Model's response
+        """
+        query = self.tokenizer.from_list_format(
+            [*[{"image": path} for path in image_paths], {"text": prompt}]
+        )
+        conv = [
+            {"from": "system", "value": "You are a helpful assistant."},
+            {"from": "human", "value": query},
+        ]
+        input_ids = self.tokenizer.apply_chat_template(
+            conv, add_generation_prompt=True, return_tensors="pt"
+        ).to(device=self.device)
+        # Run inference
+        with torch.inference_mode():
+            output = self.model.generate(
+                input_ids,
+                do_sample=False,
+                num_beams=1,
+                temperature=1.0,
+                top_p=1.0,
+                use_cache=True,
+                max_new_tokens=max_new_tokens,
+            )[0]
+            response = self.tokenizer.decode(output[input_ids.size(1) : -1])
+            return response
+    def _run(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        max_new_tokens: int = 512,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Execute the chest X-ray analysis.
+        Args:
+            image_paths: List of paths to chest X-ray images
+            prompt: Question or instruction about the images
+            max_new_tokens: Maximum number of tokens to generate
+            run_manager: Optional callback manager
+        Returns:
+            Tuple[Dict[str, Any], Dict]: Output dictionary and metadata dictionary
+        """
+        try:
+            # Verify image paths
+            for path in image_paths:
+                if not Path(path).is_file():
+                    raise FileNotFoundError(f"Image file not found: {path}")
+            response = self._generate_response(image_paths, prompt, max_new_tokens)
+            output = {
+                "response": response,
+            }
+            metadata = {
+                "image_paths": image_paths,
+                "prompt": prompt,
+                "max_new_tokens": max_new_tokens,
+                "analysis_status": "completed",
+            }
+            return output, metadata
+        except Exception as e:
+            output = {"error": str(e)}
+            metadata = {
+                "image_paths": image_paths,
+                "prompt": prompt,
+                "max_new_tokens": max_new_tokens,
+                "analysis_status": "failed",
+                "error_details": str(e),
+            }
+            return output, metadata
+    async def _arun(
+        self,
+        image_paths: List[str],
+        prompt: str,
+        max_new_tokens: int = 512,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Async version of _run."""
+        return self._run(image_paths, prompt, max_new_tokens)