sam-vit-base-with-handler

@@ -64,54 +64,25 @@ class EndpointHandler():
         # 4. Process and select the best mask
         try:
-            # Get the original and reshaped sizes
-            original_sizes = inputs["original_sizes"][0].tolist()  # [H, W]
-            reshaped_input_sizes = inputs["reshaped_input_sizes"][0].tolist()  # [H, W]
-            # Get predicted masks and scores
-            pred_masks = outputs.pred_masks.cpu()  # Shape: (batch, num_masks, H, W)
-            iou_scores = outputs.iou_scores.cpu()[0]  # Shape: (num_masks,)
-            # Handle different tensor dimensions
-            if pred_masks.ndim == 5:
-                pred_masks = pred_masks.squeeze(1)  # Remove extra dimension if present
-            # Select the best mask
             best_mask_idx = torch.argmax(iou_scores)
-            best_mask = pred_masks[0, best_mask_idx, :, :]  # Shape: (H, W)
-            # The mask is currently at the model's internal resolution
-            # We need to resize it to the reshaped input size first, then crop/pad to original size
-            # Step 1: Resize to reshaped input size
-            resized_mask = F.interpolate(
-                best_mask.unsqueeze(0).unsqueeze(0).float(),
-                size=reshaped_input_sizes,
-                mode='bilinear',
-                align_corners=False
-            ).squeeze()
-            # Step 2: Handle padding/cropping to get back to original size
-            original_h, original_w = original_sizes
-            reshaped_h, reshaped_w = reshaped_input_sizes
-            # Calculate padding that was added during preprocessing
-            if reshaped_h > original_h or reshaped_w > original_w:
-                # There was padding, we need to crop
-                start_h = (reshaped_h - original_h) // 2
-                start_w = (reshaped_w - original_w) // 2
-                final_mask = resized_mask[start_h:start_h + original_h, start_w:start_w + original_w]
-            else:
-                # No padding or different scaling, just resize directly
-                final_mask = F.interpolate(
-                    resized_mask.unsqueeze(0).unsqueeze(0),
-                    size=original_sizes,
-                    mode='bilinear',
-                    align_corners=False
-                ).squeeze()
-            # Convert to binary mask
-            mask_binary = (final_mask > 0.0).numpy().astype(np.uint8) * 255
         except Exception as e:
             print("Error processing masks: {}".format(e))

         # 4. Process and select the best mask
         try:
+            # Use the processor's post_process_masks method correctly
+            # This method expects the raw model outputs and the input metadata
+            post_processed_masks = self.processor.post_process_masks(
+                outputs.pred_masks,
+                inputs["original_sizes"],
+                inputs["reshaped_input_sizes"]
+            )
+            # post_processed_masks is a list with one element (for batch size 1)
+            # Each element has shape (num_masks, original_height, original_width)
+            masks = post_processed_masks[0]  # Shape: (num_masks, H, W)
+            # Get IoU scores and select the best mask
+            iou_scores = outputs.iou_scores[0]  # Shape: (num_masks,)
             best_mask_idx = torch.argmax(iou_scores)
+            best_mask = masks[best_mask_idx]  # Shape: (H, W)
+            # Convert to numpy and create binary mask
+            mask_binary = (best_mask > 0.0).cpu().numpy().astype(np.uint8) * 255
         except Exception as e:
             print("Error processing masks: {}".format(e))