Spaces:

sajabdoli
/

SAM

Sleeping

App Files Files Community

sajabdoli commited on Apr 7, 2025

Commit

cb317f0

verified ·

1 Parent(s): fd54065

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -19

app.py CHANGED Viewed

@@ -1,14 +1,26 @@
-from fastapi import FastAPI, File, UploadFile
 from segment_anything import sam_model_registry, SamPredictor
 from PIL import Image
 import numpy as np
 import torch
 import io
 app = FastAPI()
 # Load SAM Model
-sam_checkpoint = "sam_vit_b.pth"  # Add the weights file manually in the Space
 model_type = "vit_b"
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,22 +33,101 @@ def read_root():
 @app.post("/segment")
 async def segment_image(file: UploadFile = File(...)):
-    image_bytes = await file.read()
-    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-    image_np = np.array(image)
-    predictor.set_image(image_np)
-    input_point = np.array([[100, 100]])
-    input_label = np.array([1])
-    masks, scores, _ = predictor.predict(
-        point_coords=input_point,
-        point_labels=input_label,
-        multimask_output=False
-    )
-    return {
-        "score": float(scores[0]),
-        "mask": masks[0].tolist()
-    }

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from segment_anything import sam_model_registry, SamPredictor
 from PIL import Image
 import numpy as np
 import torch
 import io
+import base64
+import json
 app = FastAPI()
+# Add CORS middleware for CVAT
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Load SAM Model
+sam_checkpoint = "sam_vit_b.pth"
 model_type = "vit_b"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 @app.post("/segment")
 async def segment_image(file: UploadFile = File(...)):
+    try:
+        image_bytes = await file.read()
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        image_np = np.array(image)
+        # Get image dimensions
+        height, width = image_np.shape[:2]
+        # Use center point instead of fixed point
+        center_point = np.array([[width // 2, height // 2]])
+        input_label = np.array([1])
+        predictor.set_image(image_np)
+        masks, scores, _ = predictor.predict(
+            point_coords=center_point,
+            point_labels=input_label,
+            multimask_output=True  # Return multiple masks
+        )
+        # Return the best mask
+        best_mask_idx = np.argmax(scores)
+        mask = masks[best_mask_idx].astype(bool)
+        return {
+            "score": float(scores[best_mask_idx]),
+            "mask": mask.tolist()
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# CVAT-specific endpoint
+@app.post("/predict")
+async def predict_for_cvat(body: str = Form(...)):
+    try:
+        data = json.loads(body)
+        image_data = data.get('image', '')
+        # Decode base64 image
+        image_bytes = base64.b64decode(image_data)
+        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        image_np = np.array(image)
+        # Get points from CVAT request
+        points = data.get('points', [])
+        if not points:
+            # If no points, use center of image
+            height, width = image_np.shape[:2]
+            points = [[width // 2, height // 2]]
+        input_points = np.array(points)
+        input_labels = np.ones(len(points))
+        predictor.set_image(image_np)
+        masks, scores, _ = predictor.predict(
+            point_coords=input_points,
+            point_labels=input_labels,
+            multimask_output=True
+        )
+        # Get best mask
+        best_mask_idx = np.argmax(scores)
+        mask = masks[best_mask_idx].astype(bool)
+        # Convert mask to CVAT format
+        height, width = mask.shape
+        rle = mask_to_rle(mask)
+        return {
+            "annotations": [{
+                "ObjectID": 1,
+                "ObjectScore": float(scores[best_mask_idx]),
+                "RLE": rle,
+                "PredictionType": "mask",
+                "width": width,
+                "height": height
+            }]
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Helper function to convert mask to RLE (Run-Length Encoding)
+def mask_to_rle(mask):
+    """Convert mask to RLE format expected by CVAT"""
+    flattened_mask = mask.flatten()
+    rle = []
+    current_pixel = 0
+    count = 0
+    for pixel in flattened_mask:
+        if pixel == current_pixel:
+            count += 1
+        else:
+            rle.append(count)
+            current_pixel = pixel
+            count = 1
+    rle.append(count)
+    return rle