Spaces:

samwell
/

medrax2

Paused

samwell Claude commited on Dec 18, 2025

Commit

3a18164

1 Parent(s): 9dc78d7

fix: Improve segmentation overlay and add DICOM support

Segmentation improvements:
- Improved matplotlib overlay rendering with better color opacity
- Added debug logging to track mask detection and alignment
- Changed to subplot-based rendering for better overlay composition
- Increased overlay opacity from 30% to 40% for better visibility
- Added mask count tracking to verify segmentation success

DICOM file support:
- Added DICOM file extensions to Gradio file upload (.dcm, .dicom)
- Updated placeholder text to indicate DICOM support
- Added DICOM file detection in chat function
- DICOM files are passed to agent for processing with DICOM tool
- Added error traceback printing for better debugging

The segmentation overlay should now properly show colored masks
on top of the X-ray image, and DICOM files can be uploaded
without errors.

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

app.py +41 -29
medrax/tools/segmentation/segmentation.py +24 -11

app.py CHANGED Viewed

@@ -177,41 +177,53 @@ def chat(message, history, mode):
         if files and len(files) > 0:
             image_path = files[0]
             # Store image path for tools to use
             # LangChain Google GenAI expects images as base64 or PIL
             try:
-                # Open and encode image for Gemini
-                with Image.open(image_path) as img:
-                    # Convert to RGB if needed
-                    if img.mode != "RGB":
-                        img = img.convert("RGB")
-                    # Resize if too large (max 4096x4096 for Gemini)
-                    max_size = 4096
-                    if img.width > max_size or img.height > max_size:
-                        img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
-                    # Store as bytes for LangChain
-                    buffered = BytesIO()
-                    img.save(buffered, format="PNG")
-                    img_bytes = buffered.getvalue()
-                    img_b64 = base64.b64encode(img_bytes).decode()
-                    # Create multimodal content for Gemini
-                    # Format: [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}]
-                    image_content = {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/png;base64,{img_b64}"
                         }
-                    }
-                    # Include image path in text for tools to use
-                    text = f"[Image: {image_path}]\n\n{text}"
             except Exception as e:
                 print(f"Error processing image: {e}")
-                text = f"[Failed to load image: {image_path}]\n\n{text}"
         message = text
@@ -269,8 +281,8 @@ with gr.Blocks() as demo:
     msg = gr.MultimodalTextbox(
         label="Message",
-        placeholder="Upload an X-ray image and ask a question...",
-        file_types=["image"]
     )
     def respond(message, chat_history, mode_selection):

         if files and len(files) > 0:
             image_path = files[0]
+            # Check if it's a DICOM file
+            is_dicom = image_path.lower().endswith(('.dcm', '.dicom'))
             # Store image path for tools to use
             # LangChain Google GenAI expects images as base64 or PIL
             try:
+                if is_dicom:
+                    # DICOM files need to be converted first
+                    # We'll just pass the path and let the agent handle it
+                    text = f"[DICOM file uploaded: {image_path}]\n\n{text}"
+                    print(f"DICOM file detected: {image_path}")
+                else:
+                    # Open and encode image for Gemini
+                    with Image.open(image_path) as img:
+                        # Convert to RGB if needed
+                        if img.mode != "RGB":
+                            img = img.convert("RGB")
+                        # Resize if too large (max 4096x4096 for Gemini)
+                        max_size = 4096
+                        if img.width > max_size or img.height > max_size:
+                            img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+                        # Store as bytes for LangChain
+                        buffered = BytesIO()
+                        img.save(buffered, format="PNG")
+                        img_bytes = buffered.getvalue()
+                        img_b64 = base64.b64encode(img_bytes).decode()
+                        # Create multimodal content for Gemini
+                        # Format: [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}]
+                        image_content = {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{img_b64}"
+                            }
                         }
+                        # Include image path in text for tools to use
+                        text = f"[Image: {image_path}]\n\n{text}"
             except Exception as e:
                 print(f"Error processing image: {e}")
+                import traceback
+                traceback.print_exc()
+                text = f"[Failed to load image: {image_path}. Error: {str(e)}]\n\n{text}"
         message = text
     msg = gr.MultimodalTextbox(
         label="Message",
+        placeholder="Upload an X-ray image (JPG, PNG, DICOM) and ask a question...",
+        file_types=["image", ".dcm", ".dicom", ".DCM", ".DICOM"]
     )
     def respond(message, chat_history, mode_selection):

medrax/tools/segmentation/segmentation.py CHANGED Viewed

@@ -173,36 +173,49 @@ class ChestXRaySegmentationTool(BaseTool):
     def _save_visualization(self, original_img: np.ndarray, pred_masks: torch.Tensor, organ_indices: List[int]) -> str:
         """Save visualization of original image with segmentation masks overlaid."""
-        plt.figure(figsize=(10, 10))
-        plt.imshow(original_img, cmap="gray", extent=[0, original_img.shape[1], original_img.shape[0], 0])
         # Generate color palette for organs
         colors = plt.cm.rainbow(np.linspace(0, 1, len(organ_indices)))
         # Process and overlay each organ mask
         for idx, (organ_idx, color) in enumerate(zip(organ_indices, colors)):
             mask = pred_masks[0, organ_idx].cpu().numpy()
             if mask.sum() > 0:
                 # Align the mask to the original image coordinates
                 if mask.shape != original_img.shape:
                     mask = self._align_mask_to_original(mask, original_img.shape)
                 # Create a colored overlay with transparency
-                colored_mask = np.zeros((*original_img.shape, 4))
-                colored_mask[mask > 0] = (*color[:3], 0.3)
-                plt.imshow(colored_mask, extent=[0, original_img.shape[1], original_img.shape[0], 0])
                 # Add legend entry for the organ
                 organ_name = list(self.organ_map.keys())[list(self.organ_map.values()).index(organ_idx)]
-                plt.plot([], [], color=color, label=organ_name, linewidth=3)
-        plt.title("Segmentation Overlay")
-        plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
-        plt.axis("off")
         save_path = self.temp_dir / f"segmentation_{uuid.uuid4().hex[:8]}.png"
-        plt.savefig(save_path, bbox_inches="tight", dpi=300)
-        plt.close()
         return str(save_path)

     def _save_visualization(self, original_img: np.ndarray, pred_masks: torch.Tensor, organ_indices: List[int]) -> str:
         """Save visualization of original image with segmentation masks overlaid."""
+        fig, ax = plt.subplots(figsize=(12, 12))
+        # Display original image
+        ax.imshow(original_img, cmap="gray")
         # Generate color palette for organs
         colors = plt.cm.rainbow(np.linspace(0, 1, len(organ_indices)))
         # Process and overlay each organ mask
+        masks_found = 0
         for idx, (organ_idx, color) in enumerate(zip(organ_indices, colors)):
             mask = pred_masks[0, organ_idx].cpu().numpy()
+            # Debug: print mask info
+            print(f"Organ index {organ_idx}: mask sum = {mask.sum()}, mask shape = {mask.shape}")
             if mask.sum() > 0:
+                masks_found += 1
                 # Align the mask to the original image coordinates
                 if mask.shape != original_img.shape:
                     mask = self._align_mask_to_original(mask, original_img.shape)
+                    print(f"Aligned mask shape: {mask.shape}, sum: {mask.sum()}")
                 # Create a colored overlay with transparency
+                # Convert binary mask to RGBA overlay
+                overlay = np.zeros((*original_img.shape, 4))
+                overlay[mask > 0] = [color[0], color[1], color[2], 0.4]  # 40% opacity
+                ax.imshow(overlay)
                 # Add legend entry for the organ
                 organ_name = list(self.organ_map.keys())[list(self.organ_map.values()).index(organ_idx)]
+                ax.plot([], [], color=color, label=organ_name, linewidth=3)
+        print(f"Total masks found and rendered: {masks_found}")
+        ax.set_title("Segmentation Overlay", fontsize=16, pad=20)
+        if masks_found > 0:
+            ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=10)
+        ax.axis("off")
         save_path = self.temp_dir / f"segmentation_{uuid.uuid4().hex[:8]}.png"
+        plt.savefig(save_path, bbox_inches="tight", dpi=150, facecolor='black')
+        plt.close(fig)
         return str(save_path)