Spaces:

Tremick
/

SAM

Sleeping

App Files Files Community

Tremick commited on Feb 3

Commit

7ea3f76

verified ·

1 Parent(s): 4c19a9f

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -191

app.py DELETED Viewed

@@ -1,191 +0,0 @@
-import os
-import io
-import base64
-import torch
-import numpy as np
-import cv2
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from typing import List, Optional, Union
-from PIL import Image
-from transformers import Sam3Processor, Sam3Model
-app = FastAPI(title="SAM 3 API", description="Segment Anything Model 3 API for HF Spaces")
-# CORS Setup - Allow all for simplicity in this demo, restrict in production
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --- Global Model Variables ---
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = None
-processor = None
-# --- Startup Event ---
-@app.on_event("startup")
-async def startup_event():
-    global model, processor
-    print(f"Loading SAM 3 Model on {device}...")
-    try:
-        processor = Sam3Processor.from_pretrained("facebook/sam3")
-        model = Sam3Model.from_pretrained("facebook/sam3").to(device)
-        print("Model loaded successfully!")
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        # In a real deployed environment, we might want to crash or retry.
-        # For now, we print error.
-# --- Data Models ---
-class Point(BaseModel):
-    x: int
-    y: int
-    label: int  # 1 for positive, 0 for negative
-class Box(BaseModel):
-    x1: int
-    y1: int
-    x2: int
-    y2: int
-    label: int = 1 # 1 for positive, 0 for negative
-class InferenceRequest(BaseModel):
-    image: str  # Base64 encoded image
-    prompt_type: str  # 'point', 'box', 'text', 'everything'
-    points: Optional[List[Point]] = None
-    boxes: Optional[List[Box]] = None
-    text_prompt: Optional[str] = None
-# --- Helper Functions ---
-def decode_image(base64_string):
-    if "," in base64_string:
-        base64_string = base64_string.split(",")[1]
-    image_data = base64.b64decode(base64_string)
-    image = Image.open(io.BytesIO(image_data)).convert("RGB")
-    return image
-def encode_image(image: Image.Image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    return base64.b64encode(buffered.getvalue()).decode("utf-8")
-def numpy_to_base64_mask(mask_np):
-    # mask_np is bool or uint8 (0/1)
-    mask_img = Image.fromarray((mask_np * 255).astype(np.uint8))
-    return encode_image(mask_img)
-# --- Endpoints ---
-@app.get("/")
-def home():
-    return {"status": "running", "device": device}
-@app.post("/predict")
-async def predict(request: InferenceRequest):
-    global model, processor
-    if not model or not processor:
-        raise HTTPException(status_code=503, detail="Model not loaded yet")
-    try:
-        image = decode_image(request.image)
-        inputs = None
-        # Prepare inputs based on prompt type
-        if request.prompt_type == "text":
-            if not request.text_prompt:
-                raise HTTPException(status_code=400, detail="Text prompt required")
-            inputs = processor(images=image, text=request.text_prompt, return_tensors="pt").to(device)
-        elif request.prompt_type == "box":
-            if not request.boxes:
-                 raise HTTPException(status_code=400, detail="Box prompt required")
-            # Format: [[ [x1, y1, x2, y2], ... ]] - Batch size 1
-            input_boxes = [[[b.x1, b.y1, b.x2, b.y2] for b in request.boxes]]
-            input_labels = [[[b.label] for b in request.boxes]]
-            inputs = processor(
-                images=image,
-                input_boxes=input_boxes,
-                input_boxes_labels=input_labels,
-                return_tensors="pt"
-            ).to(device)
-        elif request.prompt_type == "point":
-             if not request.points:
-                 raise HTTPException(status_code=400, detail="Point prompt required")
-             # Format: [[ [x, y], ... ]] - Batch size 1
-             input_points = [[[p.x, p.y] for p in request.points]]
-             input_labels = [[[p.label] for p in request.points]]
-             inputs = processor(
-                 images=image,
-                 input_points=input_points,
-                 input_labels=input_labels,
-                 return_tensors="pt"
-             ).to(device)
-        elif request.prompt_type == "everything":
-            # For "everything", we might need a different strategy or just use grid points
-            # SAM 3 doesn't have a built-in "everything" function in the same way SAM 1 did (AutomaticMaskGenerator)
-            # but we can simulate it or check if transformers supports it.
-            # For this MVP, let's just return an error or implement a simple grid if possible.
-            # Transformers Sam3 integration is new. Let's stick to prompts for now or try a grid of points.
-            # We'll use a simple grid of points for now.
-             width, height = image.size
-             grid_size = 32
-             x = np.linspace(0, width, grid_size)
-             y = np.linspace(0, height, grid_size)
-             xv, yv = np.meshgrid(x, y)
-             grid_points = list(zip(xv.flatten(), yv.flatten()))
-             input_points = [[list(p) for p in grid_points]]
-             input_labels = [[1] * len(grid_points)] # All positive
-             # This might just get one big mask or many. Let's try it.
-             # Actually, simpler to just say feature not fully supported in this snippet without more complex logic.
-             # But let's try sending a generic text prompt "object" or "everything" :D
-             # Let's fallback to text "objects".
-             inputs = processor(images=image, text="objects", return_tensors="pt").to(device)
-        else:
-             raise HTTPException(status_code=400, detail="Invalid prompt type")
-        # Inference
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Post-process
-        results = processor.post_process_instance_segmentation(
-            outputs,
-            threshold=0.5,
-            mask_threshold=0.5,
-            target_sizes=[image.size[::-1]] # (height, width)
-        )[0]
-        # Convert results to JSON-serializable format
-        # results['masks'] is a boolean tensor of shape (num_masks, H, W)
-        masks = results['masks'].cpu().numpy()
-        scores = results['scores'].cpu().numpy().tolist()
-        boxes_out = results['boxes'].cpu().numpy().tolist() # [x1, y1, x2, y2]
-        encoded_masks = []
-        for mask in masks:
-            encoded_masks.append(numpy_to_base64_mask(mask))
-        return {
-            "masks": encoded_masks,
-            "scores": scores,
-            "boxes": boxes_out,
-            "count": len(scores)
-        }
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail=str(e))
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)