Spaces:

milos-milic
/

InSono

Sleeping

MilicMilos commited on Feb 14

Commit

46c5d16

1 Parent(s): 142af98

Improve model inference performance and reliability on hardware

Force disable Flash Attention and optimize inference loop, pre-load the model, and reduce input size for faster processing.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: c144be0a-7fab-4a53-a663-fc927a204409
Replit-Commit-Checkpoint-Type: intermediate_checkpoint
Replit-Commit-Event-Id: 63465661-b0cc-45eb-97fa-7aed76fbe293
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/5b4b75b9-1619-404c-a78d-526127514111/c144be0a-7fab-4a53-a663-fc927a204409/35LY8UZ
Replit-Helium-Checkpoint-Created: true

Files changed (4) hide show

Dockerfile +4 -0
main.py +18 -0
medsam2_pkg/sam2/modeling/sam/transformer.py +7 -7
models/medsam2_inference.py +127 -73

Dockerfile CHANGED Viewed

@@ -23,5 +23,9 @@ RUN mkdir -p uploads checkpoints
 EXPOSE 7860
 ENV PORT=7860
 CMD ["python", "main.py"]

 EXPOSE 7860
 ENV PORT=7860
+ENV SAM2_ALLOW_ALL_KERNELS=1
+ENV TORCH_CUDNN_SDPA_ENABLED=0
+ENV U_FLASH_ATTN=0
+ENV MATH_KERNEL_ON=0
 CMD ["python", "main.py"]

main.py CHANGED Viewed

@@ -1395,7 +1395,25 @@ def batch_report():
     except Exception as e:
         return jsonify({'error': f'Error generating PDF: {str(e)}'}), 500
 if __name__ == '__main__':
     import sys
     port = int(os.environ.get('PORT', sys.argv[1] if len(sys.argv) > 1 else 7860))
     app.run(host='0.0.0.0', port=port, debug=True)

     except Exception as e:
         return jsonify({'error': f'Error generating PDF: {str(e)}'}), 500
+def preload_medsam2():
+    import threading
+    def _load():
+        try:
+            print("[Startup] Pre-loading MedSAM2 model...")
+            from models.medsam2_inference import load_medsam2_model
+            predictor = load_medsam2_model()
+            if predictor is not None:
+                print("[Startup] MedSAM2 model pre-loaded successfully")
+            else:
+                print("[Startup] MedSAM2 model not available (will retry on first request)")
+        except Exception as e:
+            print(f"[Startup] MedSAM2 pre-load failed: {e}")
+    t = threading.Thread(target=_load, daemon=True)
+    t.start()
 if __name__ == '__main__':
     import sys
     port = int(os.environ.get('PORT', sys.argv[1] if len(sys.argv) > 1 else 7860))
+    preload_medsam2()
     app.run(host='0.0.0.0', port=port, debug=True)

medsam2_pkg/sam2/modeling/sam/transformer.py CHANGED Viewed

@@ -17,22 +17,22 @@ from torch import nn, Tensor
 from sam2.modeling.position_encoding import apply_rotary_enc, compute_axial_cis
 from sam2.modeling.sam2_utils import MLP
-from sam2.utils.misc import get_sdpa_settings
 warnings.simplefilter(action="ignore", category=FutureWarning)
-OLD_GPU, USE_FLASH_ATTN, MATH_KERNEL_ON = get_sdpa_settings()
 ALLOW_ALL_KERNELS = os.environ.get("SAM2_ALLOW_ALL_KERNELS", "1") == "1"
-if ALLOW_ALL_KERNELS:
-    print("[SAM2] Flash Attention DISABLED — using all available kernels fallback for maximum compatibility")
-else:
-    print(f"[SAM2] Flash Attention: {USE_FLASH_ATTN}, Math kernel: {MATH_KERNEL_ON}, Old GPU: {OLD_GPU}")
 def sdp_kernel_context(dropout_p):
     """
     Get the context for the attention scaled dot-product kernel.
     Defaults to allowing all kernels for maximum compatibility.
-    Set SAM2_ALLOW_ALL_KERNELS=0 to use Flash Attention when available.
     """
     if ALLOW_ALL_KERNELS:
         return contextlib.nullcontext()

 from sam2.modeling.position_encoding import apply_rotary_enc, compute_axial_cis
 from sam2.modeling.sam2_utils import MLP
 warnings.simplefilter(action="ignore", category=FutureWarning)
 ALLOW_ALL_KERNELS = os.environ.get("SAM2_ALLOW_ALL_KERNELS", "1") == "1"
+OLD_GPU = True
+USE_FLASH_ATTN = False
+MATH_KERNEL_ON = True
+if not ALLOW_ALL_KERNELS:
+    from sam2.utils.misc import get_sdpa_settings
+    OLD_GPU, USE_FLASH_ATTN, MATH_KERNEL_ON = get_sdpa_settings()
+print(f"[SAM2] Attention config: ALLOW_ALL_KERNELS={ALLOW_ALL_KERNELS}, FLASH={USE_FLASH_ATTN}, MATH={MATH_KERNEL_ON}")
 def sdp_kernel_context(dropout_p):
     """
     Get the context for the attention scaled dot-product kernel.
     Defaults to allowing all kernels for maximum compatibility.
     """
     if ALLOW_ALL_KERNELS:
         return contextlib.nullcontext()

models/medsam2_inference.py CHANGED Viewed

@@ -1,11 +1,21 @@
 import numpy as np
 import cv2
 import sys
-import os
 import traceback
 _medsam2_model = None
 _medsam2_predictor = None
 MEDSAM2_PATHS = [
     os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'medsam2_pkg'),
@@ -40,76 +50,108 @@ def _get_device():
         device = "cuda"
         print(f"[MedSAM2] Using CUDA device: {torch.cuda.get_device_name(0)}")
         print(f"[MedSAM2] CUDA memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
     else:
         device = "cpu"
-        print("[MedSAM2] Using CPU device")
     return device
 def load_medsam2_model():
     global _medsam2_model, _medsam2_predictor
     if _medsam2_predictor is not None:
         return _medsam2_predictor
-    if not is_medsam2_available():
-        print("[MedSAM2] Dependencies not available")
-        return None
-    from models.checkpoint_manager import CheckpointManager
-    checkpoint_path = CheckpointManager.get_medsam2_checkpoint()
-    if checkpoint_path is None:
-        print("[MedSAM2] Checkpoint not available")
-        return None
-    try:
-        import torch
-        device = _get_device()
-        print(f"[MedSAM2] Loading model on device: {device}")
-        print(f"[MedSAM2] Checkpoint: {checkpoint_path}")
-        _ensure_medsam2_path()
-        try:
-            from sam2.build_sam import build_sam2
-            from sam2.sam2_image_predictor import SAM2ImagePredictor
-        except ImportError as e:
-            print(f"[MedSAM2] SAM2 library not importable: {e}")
             return None
-        medsam2_path = _find_medsam2_path()
-        config_dir = os.path.join(medsam2_path, 'sam2', 'configs')
-        config_yaml = os.path.join(config_dir, 'sam2.1_hiera_t512.yaml')
-        if not os.path.exists(config_yaml):
-            yaml_files = [f for f in os.listdir(config_dir)] if os.path.isdir(config_dir) else []
-            print(f"[MedSAM2] Config not found at {config_yaml}, available: {yaml_files}")
             return None
-        abs_config = '/' + os.path.abspath(config_yaml)
-        os.environ["SAM2_ALLOW_ALL_KERNELS"] = "1"
-        with torch.no_grad():
-            _medsam2_model = build_sam2(
-                abs_config,
-                ckpt_path=str(checkpoint_path),
-                device=device
-            )
-        _medsam2_predictor = SAM2ImagePredictor(_medsam2_model)
-        print(f"[MedSAM2] Model loaded successfully on {device}")
-        print(f"[MedSAM2] Model device: {_medsam2_predictor.device}")
-        return _medsam2_predictor
-    except Exception as e:
-        print(f"[MedSAM2] Failed to load model: {e}")
-        traceback.print_exc()
-        _medsam2_model = None
-        _medsam2_predictor = None
-        return None
 def segment_with_medsam2(image, click_x, click_y):
     import torch
@@ -127,7 +169,6 @@ def segment_with_medsam2(image, click_x, click_y):
         click_x = int(max(0, min(click_x, img_w - 1)))
         click_y = int(max(0, min(click_y, img_h - 1)))
-        print(f"[MedSAM2] Clamped point: ({click_x}, {click_y})")
         if len(image.shape) == 2:
             image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
@@ -136,34 +177,36 @@ def segment_with_medsam2(image, click_x, click_y):
         else:
             image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        print(f"[MedSAM2] RGB image: shape={image_rgb.shape}, dtype={image_rgb.dtype}, range=[{image_rgb.min()}, {image_rgb.max()}]")
         if image_rgb.dtype != np.uint8:
             if image_rgb.max() <= 1.0:
                 image_rgb = (image_rgb * 255).astype(np.uint8)
             else:
                 image_rgb = image_rgb.astype(np.uint8)
-            print(f"[MedSAM2] Converted to uint8, range=[{image_rgb.min()}, {image_rgb.max()}]")
-        print("[MedSAM2] Setting image on predictor...")
-        with torch.no_grad():
-            predictor.set_image(image_rgb)
-        print("[MedSAM2] Image set successfully")
         point_coords = np.array([[click_x, click_y]], dtype=np.float32)
         point_labels = np.array([1], dtype=np.int32)
-        print(f"[MedSAM2] Point coords: {point_coords}, labels: {point_labels}")
-        print(f"[MedSAM2] Point coords dtype: {point_coords.dtype}, labels dtype: {point_labels.dtype}")
-        print("[MedSAM2] Running predict()...")
-        with torch.no_grad():
             masks, scores, logits = predictor.predict(
                 point_coords=point_coords,
                 point_labels=point_labels,
                 multimask_output=True
             )
-        print(f"[MedSAM2] predict() returned: masks type={type(masks)}, scores type={type(scores)}")
         if masks is None:
             print("[MedSAM2] ERROR: predict() returned None for masks")
@@ -171,15 +214,24 @@ def segment_with_medsam2(image, click_x, click_y):
         print(f"[MedSAM2] Masks shape: {masks.shape}, dtype: {masks.dtype}")
         print(f"[MedSAM2] Scores: {scores}")
-        print(f"[MedSAM2] Logits shape: {logits.shape}")
         if len(masks) == 0:
             print("[MedSAM2] ERROR: predict() returned empty masks array")
             return None
         for i, (mask, score) in enumerate(zip(masks, scores)):
             nonzero = np.count_nonzero(mask)
-            print(f"[MedSAM2] Mask {i}: shape={mask.shape}, nonzero_pixels={nonzero}, score={score:.4f}")
         from utils.image_processing import postprocess_mask
@@ -193,14 +245,12 @@ def segment_with_medsam2(image, click_x, click_y):
                 'score': float(score),
                 'area': area
             })
-            print(f"[MedSAM2] Processed mask {i}: area={area} pixels, score={float(score):.4f}")
         mask_list.sort(key=lambda m: m['area'])
         total_area = sum(m['area'] for m in mask_list)
         if total_area == 0:
-            print("[MedSAM2] WARNING: All masks have zero area after postprocessing")
-            print("[MedSAM2] Returning raw masks without postprocessing cleanup")
             mask_list = []
             for i, (mask, score) in enumerate(zip(masks, scores)):
                 binary = (mask.astype(np.uint8)) * 255
@@ -212,7 +262,11 @@ def segment_with_medsam2(image, click_x, click_y):
                 })
             mask_list.sort(key=lambda m: m['area'])
-        print(f"[MedSAM2] Segmentation complete: {len(mask_list)} masks returned")
         return mask_list
     except Exception as e:

+import os
+os.environ["TORCH_CUDNN_SDPA_ENABLED"] = "0"
+os.environ["SAM2_ALLOW_ALL_KERNELS"] = "1"
+os.environ["U_FLASH_ATTN"] = "0"
+os.environ["MATH_KERNEL_ON"] = "0"
 import numpy as np
 import cv2
 import sys
 import traceback
+import time
+import threading
 _medsam2_model = None
 _medsam2_predictor = None
+_model_lock = threading.Lock()
+MAX_INPUT_SIZE = 512
 MEDSAM2_PATHS = [
     os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'medsam2_pkg'),
         device = "cuda"
         print(f"[MedSAM2] Using CUDA device: {torch.cuda.get_device_name(0)}")
         print(f"[MedSAM2] CUDA memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")
+        print(f"[MedSAM2] CUDA capability: {torch.cuda.get_device_properties(0).major}.{torch.cuda.get_device_properties(0).minor}")
     else:
         device = "cpu"
+        print("[MedSAM2] Using CPU device (no CUDA available)")
     return device
+def _resize_for_model(image_rgb, click_x, click_y):
+    h, w = image_rgb.shape[:2]
+    if max(h, w) <= MAX_INPUT_SIZE:
+        return image_rgb, click_x, click_y, 1.0
+    scale = MAX_INPUT_SIZE / max(h, w)
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    resized = cv2.resize(image_rgb, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    new_click_x = int(click_x * scale)
+    new_click_y = int(click_y * scale)
+    new_click_x = max(0, min(new_click_x, new_w - 1))
+    new_click_y = max(0, min(new_click_y, new_h - 1))
+    print(f"[MedSAM2] Resized input: {w}x{h} -> {new_w}x{new_h} (scale={scale:.3f})")
+    print(f"[MedSAM2] Scaled click: ({click_x},{click_y}) -> ({new_click_x},{new_click_y})")
+    return resized, new_click_x, new_click_y, scale
 def load_medsam2_model():
     global _medsam2_model, _medsam2_predictor
     if _medsam2_predictor is not None:
         return _medsam2_predictor
+    with _model_lock:
+        if _medsam2_predictor is not None:
+            return _medsam2_predictor
+        if not is_medsam2_available():
+            print("[MedSAM2] Dependencies not available")
+            return None
+        from models.checkpoint_manager import CheckpointManager
+        checkpoint_path = CheckpointManager.get_medsam2_checkpoint()
+        if checkpoint_path is None:
+            print("[MedSAM2] Checkpoint not available")
             return None
+        try:
+            import torch
+            device = _get_device()
+            print(f"[MedSAM2] Loading model on device: {device}")
+            print(f"[MedSAM2] Checkpoint: {checkpoint_path}")
+            print(f"[MedSAM2] PyTorch version: {torch.__version__}")
+            print(f"[MedSAM2] CUDA available: {torch.cuda.is_available()}")
+            _ensure_medsam2_path()
+            try:
+                from sam2.build_sam import build_sam2
+                from sam2.sam2_image_predictor import SAM2ImagePredictor
+            except ImportError as e:
+                print(f"[MedSAM2] SAM2 library not importable: {e}")
+                return None
+            medsam2_path = _find_medsam2_path()
+            config_dir = os.path.join(medsam2_path, 'sam2', 'configs')
+            config_yaml = os.path.join(config_dir, 'sam2.1_hiera_t512.yaml')
+            if not os.path.exists(config_yaml):
+                yaml_files = [f for f in os.listdir(config_dir)] if os.path.isdir(config_dir) else []
+                print(f"[MedSAM2] Config not found at {config_yaml}, available: {yaml_files}")
+                return None
+            abs_config = '/' + os.path.abspath(config_yaml)
+            t0 = time.time()
+            with torch.inference_mode():
+                _medsam2_model = build_sam2(
+                    abs_config,
+                    ckpt_path=str(checkpoint_path),
+                    device=device
+                )
+            load_time = time.time() - t0
+            _medsam2_predictor = SAM2ImagePredictor(_medsam2_model)
+            print(f"[MedSAM2] Model loaded in {load_time:.1f}s on {device}")
+            print(f"[MedSAM2] Model device: {_medsam2_predictor.device}")
+            print(f"[MedSAM2] Model image_size: {_medsam2_model.image_size}")
+            if device == "cuda":
+                mem_alloc = torch.cuda.memory_allocated() / 1e9
+                mem_reserved = torch.cuda.memory_reserved() / 1e9
+                print(f"[MedSAM2] GPU memory: allocated={mem_alloc:.2f}GB, reserved={mem_reserved:.2f}GB")
+            return _medsam2_predictor
+        except Exception as e:
+            print(f"[MedSAM2] Failed to load model: {e}")
+            traceback.print_exc()
+            _medsam2_model = None
+            _medsam2_predictor = None
             return None
 def segment_with_medsam2(image, click_x, click_y):
     import torch
         click_x = int(max(0, min(click_x, img_w - 1)))
         click_y = int(max(0, min(click_y, img_h - 1)))
         if len(image.shape) == 2:
             image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
         else:
             image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         if image_rgb.dtype != np.uint8:
             if image_rgb.max() <= 1.0:
                 image_rgb = (image_rgb * 255).astype(np.uint8)
             else:
                 image_rgb = image_rgb.astype(np.uint8)
+        image_rgb, click_x, click_y, scale = _resize_for_model(image_rgb, click_x, click_y)
+        print(f"[MedSAM2] Final input: {image_rgb.shape}, click=({click_x},{click_y})")
         point_coords = np.array([[click_x, click_y]], dtype=np.float32)
         point_labels = np.array([1], dtype=np.int32)
+        print("[MedSAM2] Setting image on predictor...")
+        t0 = time.time()
+        with torch.inference_mode():
+            predictor.set_image(image_rgb)
+        set_time = time.time() - t0
+        print(f"[MedSAM2] Image set in {set_time:.2f}s")
+        print(f"[MedSAM2] Running predict(): coords={point_coords}, labels={point_labels}")
+        t0 = time.time()
+        with torch.inference_mode():
             masks, scores, logits = predictor.predict(
                 point_coords=point_coords,
                 point_labels=point_labels,
                 multimask_output=True
             )
+        pred_time = time.time() - t0
+        print(f"[MedSAM2] predict() completed in {pred_time:.2f}s")
         if masks is None:
             print("[MedSAM2] ERROR: predict() returned None for masks")
         print(f"[MedSAM2] Masks shape: {masks.shape}, dtype: {masks.dtype}")
         print(f"[MedSAM2] Scores: {scores}")
         if len(masks) == 0:
             print("[MedSAM2] ERROR: predict() returned empty masks array")
             return None
+        if scale < 1.0:
+            orig_h, orig_w = img_h, img_w
+            upscaled_masks = []
+            for m in masks:
+                m_uint8 = m.astype(np.uint8) * 255
+                m_up = cv2.resize(m_uint8, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST)
+                upscaled_masks.append(m_up > 127)
+            masks = np.array(upscaled_masks)
+            print(f"[MedSAM2] Upscaled masks back to {orig_w}x{orig_h}")
         for i, (mask, score) in enumerate(zip(masks, scores)):
             nonzero = np.count_nonzero(mask)
+            print(f"[MedSAM2] Mask {i}: nonzero={nonzero}, score={score:.4f}")
         from utils.image_processing import postprocess_mask
                 'score': float(score),
                 'area': area
             })
         mask_list.sort(key=lambda m: m['area'])
         total_area = sum(m['area'] for m in mask_list)
         if total_area == 0:
+            print("[MedSAM2] WARNING: All masks zero area after postprocessing, returning raw")
             mask_list = []
             for i, (mask, score) in enumerate(zip(masks, scores)):
                 binary = (mask.astype(np.uint8)) * 255
                 })
             mask_list.sort(key=lambda m: m['area'])
+        print(f"[MedSAM2] Segmentation complete: {len(mask_list)} masks, total time={set_time + pred_time:.2f}s")
+        if predictor.device.type == "cuda":
+            torch.cuda.empty_cache()
         return mask_list
     except Exception as e: