caball21
/

glove_labelling

Image Segmentation

sports-analytics

computer-vision

Model card Files Files and versions

caball21 commited on Aug 2, 2025

Commit

fc03d28

·

verified ·

1 Parent(s): 6a79cbc

Update handler.py

Files changed (1) hide show

handler.py +24 -11

handler.py CHANGED Viewed

@@ -6,6 +6,8 @@ from PIL import Image
 import io
 import json
 # Define class labels (same order as training)
 CLASS_LABELS = [
     "glove_outline",
@@ -16,39 +18,50 @@ CLASS_LABELS = [
     "glove_exterior"
 ]
-# Load model from disk
 def load_model():
-    model = torch.load("pytorch_model.bin", map_location="cpu")
     model.eval()
     return model
 model = load_model()
-# Preprocessing transform
 transform = T.Compose([
-    T.Resize((720, 1280)),   # or whatever input size model expects
     T.ToTensor()
 ])
-# Input: raw image bytes
 def preprocess(input_bytes):
     image = Image.open(io.BytesIO(input_bytes)).convert("RGB")
     tensor = transform(image).unsqueeze(0)  # [1, 3, H, W]
     return tensor
-# Postprocess output: convert logits to mask
 def postprocess(output_tensor):
-    # Argmax over channel dimension (assumes shape [1, C, H, W])
     pred = torch.argmax(output_tensor, dim=1)[0].cpu().numpy()
-    return pred.tolist()  # List of H x W values from 0 to 5
-# TorchServe/HF entrypoint
 def infer(payload):
-    # If input is multipart/form-data, raw bytes
     if isinstance(payload, bytes):
         image_tensor = preprocess(payload)
     elif isinstance(payload, dict) and "inputs" in payload:
-        # Hugging Face Inference API passes {"inputs": "base64 image data"}
         from base64 import b64decode
         image_tensor = preprocess(b64decode(payload["inputs"]))
     else:

 import io
 import json
+from sam2_model_stub import SAM2Hierarchical  # 👈 stub class we define separately
 # Define class labels (same order as training)
 CLASS_LABELS = [
     "glove_outline",
     "glove_exterior"
 ]
+# ----------------------------
+# Load model weights + class
+# ----------------------------
 def load_model():
+    model = SAM2Hierarchical(
+        num_classes=len(CLASS_LABELS),
+        in_channels=3,
+        backbone="vit_b",              # <-- match your config.yaml
+        freeze_backbone=True,
+        use_cls_head=True
+    )
+    model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu"))
     model.eval()
     return model
 model = load_model()
+# ----------------------------
+# Preprocessing
+# ----------------------------
 transform = T.Compose([
+    T.Resize((720, 1280)),
     T.ToTensor()
 ])
 def preprocess(input_bytes):
     image = Image.open(io.BytesIO(input_bytes)).convert("RGB")
     tensor = transform(image).unsqueeze(0)  # [1, 3, H, W]
     return tensor
+# ----------------------------
+# Postprocessing
+# ----------------------------
 def postprocess(output_tensor):
     pred = torch.argmax(output_tensor, dim=1)[0].cpu().numpy()
+    return pred.tolist()
+# ----------------------------
+# Inference Entry Point
+# ----------------------------
 def infer(payload):
     if isinstance(payload, bytes):
         image_tensor = preprocess(payload)
     elif isinstance(payload, dict) and "inputs" in payload:
         from base64 import b64decode
         image_tensor = preprocess(b64decode(payload["inputs"]))
     else: