Spaces:

samwell
/

medrax2

Paused

App Files Files Community

samwell commited on 22 days ago

Commit

27f1dea

1 Parent(s): 1f83b1b

Add NVIDIA NV-Reason-CXR tool for expert chest X-ray analysis

Browse files

Files changed (3) hide show

app.py +11 -0
medrax/tools/__init__.py +1 -0
medrax/tools/nv_reason_cxr.py +202 -0

app.py CHANGED Viewed

@@ -34,6 +34,17 @@ tools = []
 if device == "cuda":
     # Load GPU-based tools
     try:
         from medrax.tools import XRayPhraseGroundingTool
         grounding_tool = XRayPhraseGroundingTool(

 if device == "cuda":
     # Load GPU-based tools
+    try:
+        from medrax.tools import NVReasonCXRTool
+        nv_reason_tool = NVReasonCXRTool(
+            device=device,
+            load_in_4bit=True
+        )
+        tools.append(nv_reason_tool)
+        print("✓ Loaded NV-Reason-CXR tool")
+    except Exception as e:
+        print(f"✗ Failed to load NV-Reason-CXR tool: {e}")
     try:
         from medrax.tools import XRayPhraseGroundingTool
         grounding_tool = XRayPhraseGroundingTool(

medrax/tools/__init__.py CHANGED Viewed

@@ -11,3 +11,4 @@ from .utils import *
 from .rag import *
 from .browsing import *
 from .python_tool import *

 from .rag import *
 from .browsing import *
 from .python_tool import *
+from .nv_reason_cxr import *

medrax/tools/nv_reason_cxr.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""NVIDIA NV-Reason-CXR tool for expert chest X-ray analysis."""
+from typing import Dict, Optional, Tuple, Type, Any
+from pathlib import Path
+import torch
+from PIL import Image
+from pydantic import BaseModel, Field
+from transformers import AutoProcessor, AutoModelForImageTextToText, BitsAndBytesConfig
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+class NVReasonCXRInput(BaseModel):
+    """Input schema for the NV-Reason-CXR Tool."""
+    image_path: str = Field(
+        ...,
+        description="Path to the chest X-ray image file (JPG or PNG)",
+    )
+    query: str = Field(
+        default="Find abnormalities and support devices.",
+        description="Question or instruction for analyzing the X-ray (e.g., 'Find abnormalities and support devices', 'Provide differential diagnoses', 'Write a structured report')",
+    )
+    max_new_tokens: int = Field(
+        default=2048,
+        description="Maximum number of tokens to generate in response"
+    )
+class NVReasonCXRTool(BaseTool):
+    """Tool for expert chest X-ray analysis using NVIDIA's NV-Reason-CXR model.
+    This tool uses NVIDIA's specialized NV-Reason-CXR-3B model for detailed chest X-ray
+    analysis, including abnormality detection, support device identification, differential
+    diagnoses, and structured report generation.
+    """
+    name: str = "nv_reason_cxr_analysis"
+    description: str = (
+        "Expert chest X-ray analysis using NVIDIA's specialized NV-Reason-CXR model. "
+        "This tool provides detailed medical reasoning and can: "
+        "1) Detect abnormalities and support devices in chest X-rays "
+        "2) Provide differential diagnoses "
+        "3) Generate structured radiology reports "
+        "4) Answer specific questions about chest X-ray findings. "
+        "Use this for comprehensive chest X-ray interpretation. "
+        "Example input: {'image_path': '/path/to/xray.jpg', 'query': 'Find abnormalities and support devices'}"
+    )
+    args_schema: Type[BaseModel] = NVReasonCXRInput
+    model: Any = None
+    processor: Any = None
+    device: str = "cuda"
+    def __init__(
+        self,
+        model_path: str = "nvidia/NV-Reason-CXR-3B",
+        cache_dir: Optional[str] = None,
+        load_in_4bit: bool = True,
+        device: Optional[str] = "cuda",
+    ):
+        """Initialize the NV-Reason-CXR Tool."""
+        super().__init__()
+        self.device = device
+        # Setup quantization config
+        if load_in_4bit:
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+            )
+        else:
+            quantization_config = None
+        # Load model
+        print(f"Loading NV-Reason-CXR model from {model_path}...")
+        self.model = AutoModelForImageTextToText.from_pretrained(
+            model_path,
+            device_map=self.device,
+            cache_dir=cache_dir,
+            torch_dtype=torch.bfloat16,
+            quantization_config=quantization_config,
+            trust_remote_code=True,
+        ).eval()
+        self.processor = AutoProcessor.from_pretrained(
+            model_path,
+            cache_dir=cache_dir,
+            trust_remote_code=True,
+            use_fast=True,
+        )
+        print(f"✓ NV-Reason-CXR model loaded successfully")
+    def _run(
+        self,
+        image_path: str,
+        query: str = "Find abnormalities and support devices.",
+        max_new_tokens: int = 2048,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Analyze a chest X-ray image using NV-Reason-CXR.
+        Args:
+            image_path: Path to the chest X-ray image file
+            query: Question or instruction for analysis
+            max_new_tokens: Maximum tokens to generate
+            run_manager: Optional callback manager
+        Returns:
+            Tuple[Dict, Dict]: Output dictionary and metadata dictionary
+        """
+        try:
+            # Load image
+            image = Image.open(image_path)
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            # Prepare messages in chat format
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image"},
+                        {"type": "text", "text": query}
+                    ]
+                }
+            ]
+            # Apply chat template
+            prompt = self.processor.apply_chat_template(
+                messages,
+                add_generation_prompt=True
+            )
+            # Prepare inputs
+            inputs = self.processor(
+                text=prompt,
+                images=[image],
+                return_tensors="pt"
+            )
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Generate response
+            with torch.inference_mode():
+                output_ids = self.model.generate(
+                    **inputs,
+                    max_new_tokens=max_new_tokens,
+                    do_sample=False,  # Deterministic for medical analysis
+                    pad_token_id=self.processor.tokenizer.eos_token_id,
+                )
+            # Decode response
+            prompt_length = inputs["input_ids"].shape[-1]
+            generated_ids = output_ids[0][prompt_length:]
+            response = self.processor.decode(
+                generated_ids,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=True
+            )
+            output = {
+                "analysis": response,
+                "query": query,
+            }
+            metadata = {
+                "image_path": image_path,
+                "model": "nvidia/NV-Reason-CXR-3B",
+                "device": str(self.device),
+                "tokens_generated": len(generated_ids),
+                "status": "completed",
+            }
+            return output, metadata
+        except Exception as e:
+            output = {
+                "error": str(e),
+                "analysis": None,
+            }
+            metadata = {
+                "image_path": image_path,
+                "status": "failed",
+                "error_details": str(e),
+            }
+            return output, metadata
+    async def _arun(
+        self,
+        image_path: str,
+        query: str = "Find abnormalities and support devices.",
+        max_new_tokens: int = 2048,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Asynchronous version of _run."""
+        return self._run(image_path, query, max_new_tokens, run_manager)