Justin331
/

sam3

@@ -68,15 +68,46 @@ class EndpointHandler:
             logger.info(f"CUDA Version: {torch.version.cuda}")
             logger.info(f"Total GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
         # Build SAM3 video predictor
         try:
             logger.info("Building SAM3 video predictor...")
             start_time = time.time()
             # Ensure BPE tokenizer file exists
             bpe_path = self._ensure_bpe_file()
             logger.info(f"BPE tokenizer path: {bpe_path}")
             # Build predictor with explicit bpe_path
             self.predictor = build_sam3_video_predictor(
                 gpus_to_use=[0],
@@ -87,28 +118,125 @@ class EndpointHandler:
             # This fixes: "Input type (c10::BFloat16) and bias type (float) should be the same"
             logger.info("Converting model to float32 to avoid dtype mismatch...")
-            dtype_conversion_count = 0
-            # SAM3 predictor has a 'model' attribute that contains the actual model
-            if hasattr(self.predictor, 'model') and self.predictor.model is not None:
-                # Convert model to float32
-                self.predictor.model = self.predictor.model.float()
-                # Ensure all parameters are float32
-                for name, param in self.predictor.model.named_parameters():
                     if param.dtype != torch.float32:
                         param.data = param.data.float()
-                        dtype_conversion_count += 1
-                # Convert buffers to float32 (important for batch norm, etc.)
-                for buffer_name, buffer in self.predictor.model.named_buffers():
                     if buffer.dtype != torch.float32 and buffer.dtype in [torch.float16, torch.bfloat16]:
-                        self.predictor.model.register_buffer(buffer_name, buffer.float())
-                        dtype_conversion_count += 1
-                logger.info(f"✓ Model converted to float32 ({dtype_conversion_count} tensors converted)")
             else:
-                logger.warning("⚠ Could not find model attribute in predictor - dtype fix may not have been applied")
             elapsed = time.time() - start_time
             logger.info(f"✓ SAM3 video predictor loaded successfully in {elapsed:.2f}s")

             logger.info(f"CUDA Version: {torch.version.cuda}")
             logger.info(f"Total GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+        # CRITICAL FIX: Patch torch.autocast BEFORE building the predictor
+        # SAM3 has @torch.autocast decorators hardcoded to use BFloat16
+        # We need to override the autocast context manager to be a no-op
+        logger.info("Patching torch.autocast to disable BFloat16 (before model loading)...")
+        # Store the original autocast
+        self._original_autocast = torch.autocast
+        # Create a no-op autocast that always disables mixed precision
+        class Float32Autocast:
+            def __init__(self, device_type, dtype=None, enabled=True):
+                # Completely disable autocast
+                self.device_type = device_type
+                self.dtype = torch.float32
+                self.enabled = False
+            def __enter__(self):
+                return self
+            def __exit__(self, *args):
+                pass
+        # Monkey-patch torch.autocast globally BEFORE importing/building
+        torch.autocast = Float32Autocast
+        if hasattr(torch.cuda.amp, 'autocast'):
+            torch.cuda.amp.autocast = Float32Autocast
+        if hasattr(torch.amp, 'autocast'):
+            torch.amp.autocast = Float32Autocast
+        logger.info("✓ Patched torch.autocast to be a no-op (forces float32)")
         # Build SAM3 video predictor
         try:
             logger.info("Building SAM3 video predictor...")
             start_time = time.time()
             # Ensure BPE tokenizer file exists
             bpe_path = self._ensure_bpe_file()
             logger.info(f"BPE tokenizer path: {bpe_path}")
             # Build predictor with explicit bpe_path
             self.predictor = build_sam3_video_predictor(
                 gpus_to_use=[0],
             # This fixes: "Input type (c10::BFloat16) and bias type (float) should be the same"
             logger.info("Converting model to float32 to avoid dtype mismatch...")
+            def convert_model_to_float32(model):
+                """Recursively convert all model components to float32."""
+                conversion_count = 0
+                # Convert the model itself
+                model.float()
+                # Convert all parameters
+                for name, param in model.named_parameters():
                     if param.dtype != torch.float32:
                         param.data = param.data.float()
+                        conversion_count += 1
+                        logger.debug(f"  Converted parameter: {name}")
+                # Convert all buffers (batch norm running stats, etc.)
+                for buffer_name, buffer in model.named_buffers():
                     if buffer.dtype != torch.float32 and buffer.dtype in [torch.float16, torch.bfloat16]:
+                        model.register_buffer(buffer_name, buffer.float())
+                        conversion_count += 1
+                        logger.debug(f"  Converted buffer: {buffer_name}")
+                # Also convert submodules explicitly
+                for name, module in model.named_modules():
+                    if module is not model:  # Skip the root module
+                        try:
+                            module.float()
+                        except Exception:
+                            pass  # Some modules may not support .float()
+                return conversion_count
+            total_conversions = 0
+            # Convert the main model
+            if hasattr(self.predictor, 'model') and self.predictor.model is not None:
+                logger.info("  Converting main model...")
+                total_conversions += convert_model_to_float32(self.predictor.model)
+            # SAM3 may have additional models (detector, tracker, etc.)
+            # Check for other potential model attributes
+            for attr_name in ['detector', 'tracker', 'image_encoder', 'text_encoder']:
+                if hasattr(self.predictor, attr_name):
+                    attr = getattr(self.predictor, attr_name)
+                    if attr is not None and hasattr(attr, 'float'):
+                        logger.info(f"  Converting {attr_name}...")
+                        try:
+                            total_conversions += convert_model_to_float32(attr)
+                        except Exception as e:
+                            logger.warning(f"  Could not convert {attr_name}: {e}")
+            # Check if model has nested models
+            if hasattr(self.predictor, 'model') and self.predictor.model is not None:
+                model = self.predictor.model
+                for attr_name in dir(model):
+                    if not attr_name.startswith('_'):
+                        try:
+                            attr = getattr(model, attr_name)
+                            if hasattr(attr, 'parameters') and hasattr(attr, 'float'):
+                                # This looks like a submodel
+                                if attr_name not in ['model', 'detector', 'tracker']:
+                                    logger.debug(f"  Found submodel: {attr_name}")
+                                    try:
+                                        convert_model_to_float32(attr)
+                                    except Exception:
+                                        pass
+                        except Exception:
+                            pass
+            if total_conversions > 0:
+                logger.info(f"✓ Model converted to float32 ({total_conversions} tensors converted)")
             else:
+                logger.warning("⚠ No tensors were converted - dtype fix may not have been applied correctly")
+            # Additional safety: Wrap handle_request to ensure inputs are float32
+            original_handle_request = self.predictor.handle_request
+            def float32_handle_request(request):
+                """Wrapper to ensure all tensor inputs are float32."""
+                # Recursively convert any tensors in the request to float32
+                def ensure_float32(obj):
+                    if isinstance(obj, torch.Tensor):
+                        if obj.dtype in [torch.float16, torch.bfloat16]:
+                            return obj.float()
+                        return obj
+                    elif isinstance(obj, dict):
+                        return {k: ensure_float32(v) for k, v in obj.items()}
+                    elif isinstance(obj, (list, tuple)):
+                        return type(obj)(ensure_float32(item) for item in obj)
+                    return obj
+                request = ensure_float32(request)
+                return original_handle_request(request)
+            self.predictor.handle_request = float32_handle_request
+            # Also wrap handle_stream_request if it exists
+            if hasattr(self.predictor, 'handle_stream_request'):
+                original_handle_stream_request = self.predictor.handle_stream_request
+                def float32_handle_stream_request(request):
+                    """Wrapper to ensure all tensor inputs are float32."""
+                    def ensure_float32(obj):
+                        if isinstance(obj, torch.Tensor):
+                            if obj.dtype in [torch.float16, torch.bfloat16]:
+                                return obj.float()
+                            return obj
+                        elif isinstance(obj, dict):
+                            return {k: ensure_float32(v) for k, v in obj.items()}
+                        elif isinstance(obj, (list, tuple)):
+                            return type(obj)(ensure_float32(item) for item in obj)
+                        return obj
+                    request = ensure_float32(request)
+                    for response in original_handle_stream_request(request):
+                        yield response
+                self.predictor.handle_stream_request = float32_handle_stream_request
+            logger.info("✓ Added float32 enforcement wrappers to predictor methods")
             elapsed = time.time() - start_time
             logger.info(f"✓ SAM3 video predictor loaded successfully in {elapsed:.2f}s")