Justin331
/

sam3

@@ -82,7 +82,34 @@ class EndpointHandler:
                 gpus_to_use=[0],
                 bpe_path=bpe_path
             )
             elapsed = time.time() - start_time
             logger.info(f"✓ SAM3 video predictor loaded successfully in {elapsed:.2f}s")
@@ -453,15 +480,26 @@ class EndpointHandler:
         Returns path to the BPE file.
         """
         logger.info("Checking for BPE tokenizer file...")
-        # Default expected path
         assets_dir = Path("/repository/assets")
         bpe_file = assets_dir / "bpe_simple_vocab_16e6.txt.gz"
-        if bpe_file.exists():
-            logger.info(f"  ✓ BPE file found: {bpe_file}")
-            return str(bpe_file)
         logger.warning(f"  BPE file not found at {bpe_file}")
         logger.info("  Downloading from HuggingFace...")

                 gpus_to_use=[0],
                 bpe_path=bpe_path
             )
+            # Fix dtype mismatch: Convert all model parameters and buffers to float32
+            # This fixes: "Input type (c10::BFloat16) and bias type (float) should be the same"
+            logger.info("Converting model to float32 to avoid dtype mismatch...")
+            dtype_conversion_count = 0
+            # SAM3 predictor has a 'model' attribute that contains the actual model
+            if hasattr(self.predictor, 'model') and self.predictor.model is not None:
+                # Convert model to float32
+                self.predictor.model = self.predictor.model.float()
+                # Ensure all parameters are float32
+                for name, param in self.predictor.model.named_parameters():
+                    if param.dtype != torch.float32:
+                        param.data = param.data.float()
+                        dtype_conversion_count += 1
+                # Convert buffers to float32 (important for batch norm, etc.)
+                for buffer_name, buffer in self.predictor.model.named_buffers():
+                    if buffer.dtype != torch.float32 and buffer.dtype in [torch.float16, torch.bfloat16]:
+                        self.predictor.model.register_buffer(buffer_name, buffer.float())
+                        dtype_conversion_count += 1
+                logger.info(f"✓ Model converted to float32 ({dtype_conversion_count} tensors converted)")
+            else:
+                logger.warning("⚠ Could not find model attribute in predictor - dtype fix may not have been applied")
             elapsed = time.time() - start_time
             logger.info(f"✓ SAM3 video predictor loaded successfully in {elapsed:.2f}s")
         Returns path to the BPE file.
         """
         logger.info("Checking for BPE tokenizer file...")
+        # Try multiple possible paths
+        possible_paths = [
+            Path("/repository/assets/bpe_simple_vocab_16e6.txt.gz"),
+            Path("./assets/bpe_simple_vocab_16e6.txt.gz"),
+            Path("../assets/bpe_simple_vocab_16e6.txt.gz"),
+            Path("/app/assets/bpe_simple_vocab_16e6.txt.gz"),
+        ]
+        for bpe_file in possible_paths:
+            if bpe_file.exists():
+                logger.info(f"  ✓ BPE file found: {bpe_file}")
+                return str(bpe_file)
+        logger.warning("  BPE file not found in any expected location")
+        # Use first path as default for download
         assets_dir = Path("/repository/assets")
         bpe_file = assets_dir / "bpe_simple_vocab_16e6.txt.gz"
         logger.warning(f"  BPE file not found at {bpe_file}")
         logger.info("  Downloading from HuggingFace...")