sam-vit-base-with-handler

@@ -4,35 +4,26 @@ import io
 import base64
 import numpy as np
 from PIL import Image
-from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
 from typing import Dict, List, Any
 class EndpointHandler():
     def __init__(self, path=""):
         """
         Called once at startup.
-        The model files are mounted under /opt/ml/model by default in Inference Endpoints.
         """
-        # Try different possible checkpoint paths
-        import os
-        possible_paths = [
-            os.path.join(path, "pytorch_model.bin"),
-            os.path.join(path, "model.safetensors"),
-            "/opt/ml/model/pytorch_model.bin",
-            "/opt/ml/model/model.safetensors"
-        ]
-        checkpoint = None
-        for p in possible_paths:
-            if os.path.exists(p):
-                checkpoint = p
-                break
-        if checkpoint is None:
-            raise FileNotFoundError("Could not find model checkpoint in any of the expected locations")
-        sam = sam_model_registry["vit_b"](checkpoint=checkpoint)
-        self.mask_generator = SamAutomaticMaskGenerator(sam)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
@@ -57,17 +48,28 @@ class EndpointHandler():
         # Process the image
         img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-        img_np = np.array(img)
-        # Generate masks
-        masks = self.mask_generator.generate(img_np)
-        combined = np.zeros(img_np.shape[:2], dtype=np.uint8)
-        for m in masks:
-            combined[m["segmentation"]] = 255
         # Convert result to base64
         out = io.BytesIO()
-        Image.fromarray(combined).save(out, format="PNG")
         out.seek(0)
         mask_base64 = base64.b64encode(out.getvalue()).decode('utf-8')

 import base64
 import numpy as np
 from PIL import Image
+import torch
+from transformers import SamModel, SamProcessor
 from typing import Dict, List, Any
 class EndpointHandler():
     def __init__(self, path=""):
         """
         Called once at startup.
+        Load the SAM model using Hugging Face Transformers.
         """
+        try:
+            # Load the model and processor from the local path
+            self.model = SamModel.from_pretrained(path)
+            self.processor = SamProcessor.from_pretrained(path)
+        except Exception as e:
+            # Fallback to loading from a known SAM model if local loading fails
+            print(f"Failed to load from local path: {e}")
+            print("Attempting to load from facebook/sam-vit-base")
+            self.model = SamModel.from_pretrained("facebook/sam-vit-base")
+            self.processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         # Process the image
         img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        # Prepare inputs for the model
+        inputs = self.processor(img, return_tensors="pt")
+        # Generate masks using the model
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Process the outputs to get masks
+        masks = self.processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )[0]
+        # Convert the first mask to a binary mask
+        mask = masks[0].squeeze().numpy()
+        mask_binary = (mask > 0.0).astype(np.uint8) * 255
         # Convert result to base64
         out = io.BytesIO()
+        Image.fromarray(mask_binary).save(out, format="PNG")
         out.seek(0)
         mask_base64 = base64.b64encode(out.getvalue()).decode('utf-8')

requirements.txt CHANGED Viewed

@@ -2,4 +2,4 @@
 torch
 numpy
 Pillow
-segment-anything

 torch
 numpy
 Pillow
+transformers[vision]