sumitsingh830 commited on
Commit
39d0c75
·
verified ·
1 Parent(s): 5f1bb4e

yes commit for file

Browse files
Files changed (4) hide show
  1. .dockerignore +83 -0
  2. Dockerfile +56 -0
  3. requirements_hf.txt +26 -0
  4. server.py +594 -0
.dockerignore ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ venv/
9
+ ENV/
10
+ build/
11
+ develop-eggs/
12
+ dist/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ lib/
17
+ lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+
26
+ # Models (don't copy large model files, they'll be downloaded at runtime)
27
+ models/*.pth
28
+ models/*.pt
29
+ *.pth
30
+ *.pt
31
+
32
+ # IDE
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ *~
38
+
39
+ # OS
40
+ .DS_Store
41
+ Thumbs.db
42
+
43
+ # Logs
44
+ *.log
45
+
46
+ # Environment
47
+ .env
48
+ .env.local
49
+
50
+ # Git
51
+ .git/
52
+ .gitignore
53
+
54
+ # Documentation (optional, can be included if needed)
55
+ # README*.md
56
+ # *.md
57
+
58
+ # Test files
59
+ test/
60
+ tests/
61
+ *_test.py
62
+ *_tests.py
63
+
64
+ # Jupyter notebooks (optional)
65
+ *.ipynb
66
+ notebooks/
67
+
68
+ # Demo files (optional, can be included if needed)
69
+ sam2/demo/
70
+ sam2/notebooks/
71
+ sam2/sav_dataset/
72
+
73
+ # Training files (not needed for inference)
74
+ sam2/training/
75
+
76
+ # Assets (optional)
77
+ sam2/assets/
78
+
79
+ # Build artifacts
80
+ sam2/SAM_2.egg-info/
81
+ sam2/build/
82
+ sam2/dist/
83
+
Dockerfile ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile for Hugging Face Spaces deployment
2
+ # Uses Python 3.10 with CUDA support for GPU acceleration
3
+
4
+ FROM python:3.10-slim
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ build-essential \
9
+ libgl1-mesa-glx \
10
+ libglib2.0-0 \
11
+ libsm6 \
12
+ libxext6 \
13
+ libxrender-dev \
14
+ libgomp1 \
15
+ wget \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Set working directory
19
+ WORKDIR /app
20
+
21
+ # Copy requirements first for better caching
22
+ COPY requirements_hf.txt requirements.txt
23
+
24
+ # Install Python dependencies (excluding sam2, which will be installed from local directory)
25
+ RUN pip install --no-cache-dir -r requirements.txt
26
+
27
+ # Copy and install sam2 package from local directory
28
+ # This must be done before copying app code since app imports from sam2
29
+ COPY sam2/ ./sam2/
30
+ WORKDIR /app/sam2
31
+ # Install sam2 in editable mode, skip CUDA extension build for faster deployment
32
+ # (CUDA extension is optional and doesn't affect core functionality)
33
+ RUN SAM2_BUILD_CUDA=0 pip install --no-cache-dir -e .
34
+
35
+ # Return to app directory
36
+ WORKDIR /app
37
+
38
+ # Copy application code
39
+ COPY app/ ./app/
40
+ COPY server.py ./
41
+
42
+ # Create necessary directories
43
+ RUN mkdir -p /app/models
44
+
45
+ # Expose port (Hugging Face Spaces will map this automatically)
46
+ EXPOSE 7860
47
+
48
+ # Set environment variables
49
+ ENV PYTHONUNBUFFERED=1
50
+ ENV SAM2_BUILD_CUDA=0
51
+
52
+ # Run the FastAPI application
53
+ # Hugging Face Spaces sets PORT environment variable automatically (usually 7860)
54
+ # The application will listen on 0.0.0.0 to accept external connections
55
+ CMD python -c "import os; port = int(os.environ.get('PORT', 7860)); import uvicorn; uvicorn.run('server:app', host='0.0.0.0', port=port, log_level='info')"
56
+
requirements_hf.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Requirements for Hugging Face Spaces deployment with GPU support
2
+ # FastAPI and server
3
+ fastapi>=0.104.1
4
+ uvicorn[standard]>=0.24.0
5
+
6
+ # Image processing
7
+ numpy>=1.26.0
8
+ opencv-python>=4.8.0
9
+ Pillow>=10.0.0
10
+ scikit-image>=0.21.0
11
+
12
+ # Deep learning and SAM2
13
+ # Note: sam2 package will be installed from local directory in Dockerfile
14
+ torch>=2.0.0
15
+ torchvision>=0.15.0
16
+ huggingface_hub>=0.20.0
17
+
18
+ # SAM2 dependencies (required for sam2 package)
19
+ hydra-core>=1.3.2
20
+ iopath>=0.1.10
21
+ tqdm>=4.66.1
22
+
23
+ # Utilities
24
+ requests>=2.31.0
25
+ psutil>=5.9.0
26
+
server.py ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Spaces deployment for SAM2 Auto Annotation API.
3
+ This file serves as the entry point for the FastAPI application on Hugging Face Spaces.
4
+ """
5
+ import sys
6
+ import os
7
+
8
+ # Add sam2 folder to path to import from local sam2 directory
9
+ _current_dir = os.path.dirname(os.path.abspath(__file__))
10
+ _sam2_dir = os.path.join(_current_dir, "sam2")
11
+ # Add sam2 directory to sys.path if not already there
12
+ abs_sam2_dir = os.path.abspath(_sam2_dir)
13
+ if abs_sam2_dir not in sys.path:
14
+ sys.path.insert(0, abs_sam2_dir)
15
+
16
+ from fastapi import FastAPI, HTTPException
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ import cv2
19
+ import numpy as np
20
+ import torch
21
+ import psutil
22
+ import PIL.Image
23
+
24
+ # Import sam2 from local folder
25
+ from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
26
+ from app.sam_model import predict_polygon, predict_polygon_from_point
27
+ from app.utils import load_image_from_url, mask_to_polygon
28
+ from app.sam2_detection_function import SAM2AutoAnnotation, create_sam2_auto_annotation
29
+
30
+ # Hugging Face model ID for SAM2.1 Hiera Large model
31
+ HUGGINGFACE_MODEL_ID = "facebook/sam2.1-hiera-large"
32
+ device = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+ # Global SAM2 auto annotation (initialized once)
35
+ sam2_auto_annotation_global = None
36
+
37
+ app = FastAPI(
38
+ title="SAM Auto Annotation API (BBox ➜ Polygon)",
39
+ description="AI-powered auto-annotation API using Meta's Segment Anything Model (SAM)",
40
+ version="1.0.0"
41
+ )
42
+
43
+ # Add CORS middleware to handle preflight OPTIONS requests
44
+ app.add_middleware(
45
+ CORSMiddleware,
46
+ allow_origins=["*"], # Allows all origins
47
+ allow_credentials=True,
48
+ allow_methods=["*"], # Allows all methods including OPTIONS
49
+ allow_headers=["*"], # Allows all headers
50
+ )
51
+
52
+
53
+ @app.get("/")
54
+ def root():
55
+ """Root endpoint - API information."""
56
+ return {
57
+ "status": "Service is up and running!",
58
+ "message": "Backend service is active",
59
+ "api": "SAM Auto Annotation API",
60
+ "version": "1.0.0"
61
+ }
62
+
63
+
64
+ @app.get("/health")
65
+ def health_check():
66
+ """Health check endpoint."""
67
+ return {"status": "healthy", "service": "same model segmenticAPI"}
68
+
69
+
70
+ @app.post("/segment")
71
+ def segment(data: dict):
72
+ """
73
+ Segment image using SAM2 model to convert bounding box to polygon (CVAT-style).
74
+ Bbox is used as a prompt to identify the object, not as a constraint.
75
+
76
+ **Input:**
77
+ ```json
78
+ {
79
+ "imageUrl": "https://example.com/image.jpg",
80
+ "bbox": {"x": 494.97, "y": 187.22, "width": 137.99, "height": 98.00, "label": "Object"},
81
+ "imageSize": {"width": 663.07, "height": 442}
82
+ }
83
+ ```
84
+
85
+ OR
86
+
87
+ ```json
88
+ {
89
+ "imageUrl": "https://example.com/image.jpg",
90
+ "bbox": [494.97, 187.22, 137.99, 98.00], // [x, y, width, height]
91
+ "imageSize": [663.07, 442] // [width, height]
92
+ }
93
+ ```
94
+
95
+ **Output:**
96
+ ```json
97
+ {
98
+ "polygon": [x1, y1, x2, y2, x3, y3, ...], // CVAT format: flattened coordinates
99
+ "confidence": 0.96
100
+ }
101
+ ```
102
+ """
103
+ try:
104
+ # Validate input
105
+ if "imageUrl" not in data:
106
+ raise HTTPException(status_code=400, detail="Missing required field: imageUrl")
107
+ if "bbox" not in data:
108
+ raise HTTPException(status_code=400, detail="Missing required field: bbox")
109
+
110
+ image_url = data["imageUrl"]
111
+ bbox = data["bbox"]
112
+ image_size = data.get("imageSize") # Optional: for coordinate scaling
113
+
114
+ # Validate bbox format
115
+ if isinstance(bbox, dict):
116
+ required_keys = ["x", "y", "width", "height"]
117
+ if not all(key in bbox for key in required_keys):
118
+ raise HTTPException(
119
+ status_code=400,
120
+ detail=f"bbox dict must contain: {required_keys}"
121
+ )
122
+ elif isinstance(bbox, list):
123
+ if len(bbox) != 4:
124
+ raise HTTPException(
125
+ status_code=400,
126
+ detail="bbox list must contain exactly 4 values: [x, y, width, height]"
127
+ )
128
+ else:
129
+ raise HTTPException(
130
+ status_code=400,
131
+ detail="bbox must be either a dict or a list"
132
+ )
133
+
134
+ # Validate imageSize format if provided
135
+ if image_size is not None:
136
+ if isinstance(image_size, dict):
137
+ if not ("width" in image_size and "height" in image_size):
138
+ raise HTTPException(
139
+ status_code=400,
140
+ detail="imageSize dict must contain 'width' and 'height'"
141
+ )
142
+ elif isinstance(image_size, list):
143
+ if len(image_size) != 2:
144
+ raise HTTPException(
145
+ status_code=400,
146
+ detail="imageSize list must contain exactly 2 values: [width, height]"
147
+ )
148
+ else:
149
+ raise HTTPException(
150
+ status_code=400,
151
+ detail="imageSize must be either a dict or a list"
152
+ )
153
+
154
+ # Load image from URL
155
+ img_bgr = load_image_from_url(image_url)
156
+ img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
157
+
158
+ # Predict polygon using SAM2 (bbox as prompt, CVAT-style)
159
+ mask, confidence, scale_factors = predict_polygon(img_rgb, bbox, image_size)
160
+
161
+ # Convert mask to polygon (CVAT-style)
162
+ polygon = mask_to_polygon(mask, scale_factors)
163
+
164
+ if not polygon:
165
+ raise HTTPException(status_code=400, detail="No polygon found in mask")
166
+
167
+ return {
168
+ "polygon": polygon, # CVAT format: flattened coordinates
169
+ "confidence": confidence
170
+ }
171
+ except KeyError as e:
172
+ raise HTTPException(status_code=400, detail=f"Missing required field: {str(e)}")
173
+ except ValueError as e:
174
+ raise HTTPException(status_code=400, detail=str(e))
175
+ except FileNotFoundError as e:
176
+ raise HTTPException(status_code=500, detail=str(e))
177
+ except ImportError as e:
178
+ raise HTTPException(
179
+ status_code=500,
180
+ detail=f"Segment Anything library not installed. Please run: pip install -e . in segment-anything directory"
181
+ )
182
+ except HTTPException:
183
+ raise
184
+ except Exception as e:
185
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
186
+
187
+
188
+ @app.post("/segment/point")
189
+ def segment_from_point(data: dict):
190
+ """
191
+ Segment image using SAM2 model with a point click to select object.
192
+ The point identifies which object to segment.
193
+
194
+ **Input:**
195
+ ```json
196
+ {
197
+ "imageUrl": "https://example.com/image.jpg",
198
+ "point": {"x": 494.97, "y": 187.22},
199
+ "imageSize": {"width": 663.07, "height": 442}
200
+ }
201
+ ```
202
+
203
+ OR
204
+
205
+ ```json
206
+ {
207
+ "imageUrl": "https://example.com/image.jpg",
208
+ "point": [494.97, 187.22], // [x, y]
209
+ "imageSize": [663.07, 442] // [width, height]
210
+ }
211
+ ```
212
+
213
+ **Output:**
214
+ ```json
215
+ {
216
+ "polygon": [x1, y1, x2, y2, x3, y3, ...], // CVAT format: flattened coordinates
217
+ "confidence": 0.96
218
+ }
219
+ ```
220
+ """
221
+ try:
222
+ # Validate input
223
+ if "imageUrl" not in data:
224
+ raise HTTPException(status_code=400, detail="Missing required field: imageUrl")
225
+ if "point" not in data:
226
+ raise HTTPException(status_code=400, detail="Missing required field: point")
227
+
228
+ image_url = data["imageUrl"]
229
+ point = data["point"]
230
+ image_size = data.get("imageSize") # Optional: for coordinate scaling
231
+
232
+ # Validate point format
233
+ if isinstance(point, dict):
234
+ required_keys = ["x", "y"]
235
+ if not all(key in point for key in required_keys):
236
+ raise HTTPException(
237
+ status_code=400,
238
+ detail=f"point dict must contain: {required_keys}"
239
+ )
240
+ elif isinstance(point, list):
241
+ if len(point) != 2:
242
+ raise HTTPException(
243
+ status_code=400,
244
+ detail="point list must contain exactly 2 values: [x, y]"
245
+ )
246
+ else:
247
+ raise HTTPException(
248
+ status_code=400,
249
+ detail="point must be either a dict or a list"
250
+ )
251
+
252
+ # Validate imageSize format if provided
253
+ if image_size is not None:
254
+ if isinstance(image_size, dict):
255
+ if not ("width" in image_size and "height" in image_size):
256
+ raise HTTPException(
257
+ status_code=400,
258
+ detail="imageSize dict must contain 'width' and 'height'"
259
+ )
260
+ elif isinstance(image_size, list):
261
+ if len(image_size) != 2:
262
+ raise HTTPException(
263
+ status_code=400,
264
+ detail="imageSize list must contain exactly 2 values: [width, height]"
265
+ )
266
+ else:
267
+ raise HTTPException(
268
+ status_code=400,
269
+ detail="imageSize must be either a dict or a list"
270
+ )
271
+
272
+ # Load image from URL
273
+ img_bgr = load_image_from_url(image_url)
274
+ img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
275
+
276
+ # Predict polygon using SAM2 (point click as prompt)
277
+ mask, confidence, scale_factors = predict_polygon_from_point(img_rgb, point, image_size)
278
+
279
+ # Convert mask to polygon (CVAT-style)
280
+ polygon = mask_to_polygon(mask, scale_factors)
281
+
282
+ if not polygon:
283
+ raise HTTPException(status_code=400, detail="No polygon found in mask. Try clicking on a different point.")
284
+
285
+ return {
286
+ "polygon": polygon, # CVAT format: flattened coordinates
287
+ "confidence": confidence
288
+ }
289
+ except KeyError as e:
290
+ raise HTTPException(status_code=400, detail=f"Missing required field: {str(e)}")
291
+ except ValueError as e:
292
+ raise HTTPException(status_code=400, detail=str(e))
293
+ except FileNotFoundError as e:
294
+ raise HTTPException(status_code=500, detail=str(e))
295
+ except ImportError as e:
296
+ raise HTTPException(
297
+ status_code=500,
298
+ detail=f"Segment Anything library not installed. Please run: pip install -e . in segment-anything directory"
299
+ )
300
+ except HTTPException:
301
+ raise
302
+ except Exception as e:
303
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
304
+
305
+
306
+ @app.post("/auto-annotate")
307
+ def auto_annotate(data: dict):
308
+ """
309
+ Automatically detect and segment all objects in an image using SAM2 from Hugging Face.
310
+ Uses SAM2AutomaticMaskGenerator (facebook/sam2.1-hiera-large) to detect all objects without requiring prompts (bbox or points).
311
+
312
+ **Input:**
313
+ ```json
314
+ {
315
+ "imageUrl": "https://example.com/image.jpg",
316
+ "imageSize": {"width": 663.07, "height": 442},
317
+ "minArea": 100,
318
+ "minConfidence": 0.5,
319
+ "maxImageDimension": 1024,
320
+ "pointsPerSide": 32,
321
+ "pointsPerBatch": 64,
322
+ "filterObjectsOnly": true
323
+ }
324
+ ```
325
+
326
+ **Output:**
327
+ ```json
328
+ {
329
+ "masks": [
330
+ {
331
+ "polygon": [x1, y1, x2, y2, x3, y3, ...],
332
+ "confidence": 0.93,
333
+ "area": 12345
334
+ },
335
+ ...
336
+ ],
337
+ "count": 10,
338
+ "memoryInfo": {
339
+ "before_mb": 512.5,
340
+ "after_mb": 1024.3,
341
+ "peak_mb": 1024.3,
342
+ "estimated_mb": 800.0,
343
+ "memory_used_mb": 511.8
344
+ },
345
+ "imageInfo": {
346
+ "wasResized": true,
347
+ "originalSize": [1920, 1080],
348
+ "processedSize": [1024, 576],
349
+ "resizeScale": [1.875, 1.875]
350
+ }
351
+ }
352
+ ```
353
+ """
354
+ try:
355
+ # Validate input
356
+ if "imageUrl" not in data:
357
+ raise HTTPException(status_code=400, detail="Missing required field: imageUrl")
358
+
359
+ image_url = data["imageUrl"]
360
+ image_size = data.get("imageSize") # Optional: for coordinate scaling
361
+ min_area = data.get("minArea", 100) # Optional: minimum mask area
362
+ min_confidence = data.get("minConfidence", 0.5) # Optional: minimum confidence
363
+ max_image_dimension = data.get("maxImageDimension", 1024) # Optional: max dimension before resizing
364
+ # Lower default values for faster processing
365
+ points_per_side = data.get("pointsPerSide", 32) # Optional: points per side (lower = faster)
366
+ points_per_batch = data.get("pointsPerBatch", 64) # Optional: points per batch (lower = faster)
367
+ filter_objects_only = data.get("filterObjectsOnly", False) # Optional: filter out background masks
368
+
369
+ # Validate imageSize format if provided
370
+ if image_size is not None:
371
+ if isinstance(image_size, dict):
372
+ if not ("width" in image_size and "height" in image_size):
373
+ raise HTTPException(
374
+ status_code=400,
375
+ detail="imageSize dict must contain 'width' and 'height'"
376
+ )
377
+ elif isinstance(image_size, list):
378
+ if len(image_size) != 2:
379
+ raise HTTPException(
380
+ status_code=400,
381
+ detail="imageSize list must contain exactly 2 values: [width, height]"
382
+ )
383
+ else:
384
+ raise HTTPException(
385
+ status_code=400,
386
+ detail="imageSize must be either a dict or a list"
387
+ )
388
+
389
+ # Validate minArea and minConfidence
390
+ try:
391
+ min_area = int(min_area)
392
+ if min_area < 0:
393
+ raise HTTPException(status_code=400, detail="minArea must be >= 0")
394
+ except (ValueError, TypeError):
395
+ raise HTTPException(status_code=400, detail="minArea must be an integer")
396
+
397
+ try:
398
+ min_confidence = float(min_confidence)
399
+ if not (0.0 <= min_confidence <= 1.0):
400
+ raise HTTPException(status_code=400, detail="minConfidence must be between 0.0 and 1.0")
401
+ except (ValueError, TypeError):
402
+ raise HTTPException(status_code=400, detail="minConfidence must be a float between 0.0 and 1.0")
403
+
404
+ # Validate maxImageDimension
405
+ try:
406
+ max_image_dimension = int(max_image_dimension)
407
+ if max_image_dimension < 256:
408
+ raise HTTPException(status_code=400, detail="maxImageDimension must be >= 256")
409
+ if max_image_dimension > 4096:
410
+ raise HTTPException(status_code=400, detail="maxImageDimension must be <= 4096")
411
+ except (ValueError, TypeError):
412
+ raise HTTPException(status_code=400, detail="maxImageDimension must be an integer between 256 and 4096")
413
+
414
+ # Validate pointsPerSide
415
+ try:
416
+ points_per_side = int(points_per_side)
417
+ if points_per_side < 8:
418
+ raise HTTPException(status_code=400, detail="pointsPerSide must be >= 8")
419
+ if points_per_side > 128:
420
+ raise HTTPException(status_code=400, detail="pointsPerSide must be <= 128")
421
+ except (ValueError, TypeError):
422
+ raise HTTPException(status_code=400, detail="pointsPerSide must be an integer between 8 and 128")
423
+
424
+ # Validate pointsPerBatch
425
+ try:
426
+ points_per_batch = int(points_per_batch)
427
+ if points_per_batch < 16:
428
+ raise HTTPException(status_code=400, detail="pointsPerBatch must be >= 16")
429
+ if points_per_batch > 256:
430
+ raise HTTPException(status_code=400, detail="pointsPerBatch must be <= 256")
431
+ except (ValueError, TypeError):
432
+ raise HTTPException(status_code=400, detail="pointsPerBatch must be an integer between 16 and 256")
433
+
434
+ # Get memory before processing
435
+ process = psutil.Process(os.getpid())
436
+ memory_before = process.memory_info().rss / (1024 * 1024) # MB
437
+
438
+ # Load image from URL
439
+ img_bgr = load_image_from_url(image_url)
440
+ img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
441
+
442
+ # Resize image if needed to reduce memory usage
443
+ original_h, original_w = img_rgb.shape[:2]
444
+ original_size = [original_w, original_h]
445
+
446
+ processed_image = img_rgb
447
+ resize_scale = [1.0, 1.0]
448
+ was_resized = False
449
+
450
+ if max(original_h, original_w) > max_image_dimension:
451
+ was_resized = True
452
+ if original_h > original_w:
453
+ new_h = max_image_dimension
454
+ new_w = int(original_w * (max_image_dimension / original_h))
455
+ else:
456
+ new_w = max_image_dimension
457
+ new_h = int(original_h * (max_image_dimension / original_w))
458
+ processed_image = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
459
+ resize_scale = [original_w / new_w, original_h / new_h]
460
+
461
+ processed_h, processed_w = processed_image.shape[:2]
462
+ processed_size = [processed_w, processed_h]
463
+
464
+ # Estimate memory requirements
465
+ estimated_mb = ((processed_w * processed_h * 3 * 4) + (processed_w * processed_h * 256 * 4) + (processed_w * processed_h * 100 * 1)) / (1024 * 1024)
466
+
467
+ # Calculate scale factors for coordinate scaling (matching predict_polygon_from_point logic)
468
+ # We need to scale FROM processed image TO display size (imageSize)
469
+ # mask_to_polygon expects scale_factors that represent: FROM processed TO display
470
+ # It divides by these factors, so we pass (processed_w/display_w, processed_h/display_h)
471
+ scale_factor_x, scale_factor_y = 1.0, 1.0
472
+
473
+ if image_size is not None:
474
+ if isinstance(image_size, dict):
475
+ display_w = float(image_size.get("width", processed_w))
476
+ display_h = float(image_size.get("height", processed_h))
477
+ else:
478
+ display_w, display_h = float(image_size[0]), float(image_size[1])
479
+
480
+ # Calculate scale factors: FROM processed image TO display size
481
+ # These will be used in mask_to_polygon: polygon / scale_factor = display coords
482
+ scale_factor_x = processed_w / display_w if display_w > 0 else 1.0
483
+ scale_factor_y = processed_h / display_h if display_h > 0 else 1.0
484
+
485
+ # Get image dimensions for filtering
486
+ total_image_area = processed_w * processed_h
487
+
488
+ # Initialize SAM2 Auto Annotation
489
+ # This uses facebook/sam2.1-hiera-large model from Hugging Face
490
+ # Cache the annotation instance globally to avoid reloading on every request
491
+ global sam2_auto_annotation_global
492
+
493
+ if sam2_auto_annotation_global is None:
494
+ try:
495
+ sam2_auto_annotation_global = create_sam2_auto_annotation(
496
+ points_per_side=points_per_side,
497
+ points_per_batch=points_per_batch,
498
+ pred_iou_thresh=0.88,
499
+ stability_score_thresh=0.95,
500
+ min_mask_region_area=min_area,
501
+ )
502
+ except ImportError as e:
503
+ raise HTTPException(
504
+ status_code=500,
505
+ detail=f"Failed to import required modules. Please ensure 'sam2' and 'huggingface_hub' are installed. Error: {str(e)}"
506
+ )
507
+ except Exception as e:
508
+ raise HTTPException(
509
+ status_code=500,
510
+ detail=f"Failed to load SAM2 Auto Annotation from Hugging Face ({HUGGINGFACE_MODEL_ID}). Error: {str(e)}"
511
+ )
512
+
513
+ # Generate masks using SAM2AutoAnnotation with proper scaling (matching predict_polygon_from_point)
514
+ # Pass scale_factors to scale FROM processed image TO display size
515
+ mask_results = sam2_auto_annotation_global.generate_masks(
516
+ image=processed_image,
517
+ min_confidence=min_confidence,
518
+ min_area=min_area,
519
+ filter_blank_regions=True,
520
+ scale_factors=(scale_factor_x, scale_factor_y)
521
+ )
522
+
523
+ # Get memory after processing
524
+ memory_after = process.memory_info().rss / (1024 * 1024) # MB
525
+ memory_used = memory_after - memory_before
526
+
527
+ # Process mask results (polygons are already scaled to display size by generate_masks)
528
+ results = []
529
+
530
+ for mask_result in mask_results:
531
+ # Extract mask information
532
+ polygon = mask_result.get("polygon")
533
+ score = mask_result.get("confidence")
534
+ area = mask_result.get("area")
535
+
536
+ # Early filtering: Skip masks that don't meet basic criteria
537
+ if area < min_area or score < min_confidence:
538
+ continue
539
+
540
+ # Filter out background masks if filterObjectsOnly is True
541
+ if filter_objects_only:
542
+ coverage_ratio = area / total_image_area if total_image_area > 0 else 0
543
+ if coverage_ratio >= 0.8: # Skip masks covering >80% (likely background)
544
+ continue
545
+
546
+ # Polygon is already scaled to display size by generate_masks (using mask_to_polygon with scale_factors)
547
+ # Return polygon in flattened format [x1, y1, x2, y2, ...]
548
+ if polygon and len(polygon) >= 6: # At least 3 points
549
+ mask_obj = {
550
+ "polygon": polygon # Already in flattened format and scaled to display size
551
+ }
552
+ if score is not None:
553
+ mask_obj["confidence"] = score
554
+ if area is not None:
555
+ mask_obj["area"] = area
556
+ results.append(mask_obj)
557
+
558
+ # Build response with all required fields
559
+ response = {
560
+ "masks": results,
561
+ "count": len(results),
562
+ "memoryInfo": {
563
+ "before_mb": round(memory_before, 2),
564
+ "after_mb": round(memory_after, 2),
565
+ "peak_mb": round(memory_after, 2),
566
+ "estimated_mb": round(estimated_mb, 2),
567
+ "memory_used_mb": round(memory_used, 2)
568
+ },
569
+ "imageInfo": {
570
+ "wasResized": was_resized,
571
+ "originalSize": original_size,
572
+ "processedSize": processed_size,
573
+ "resizeScale": resize_scale
574
+ }
575
+ }
576
+
577
+ return response
578
+
579
+ except KeyError as e:
580
+ raise HTTPException(status_code=400, detail=f"Missing required field: {str(e)}")
581
+ except ValueError as e:
582
+ raise HTTPException(status_code=400, detail=str(e))
583
+ except FileNotFoundError as e:
584
+ raise HTTPException(status_code=500, detail=str(e))
585
+ except ImportError as e:
586
+ raise HTTPException(
587
+ status_code=500,
588
+ detail=f"Segment Anything library not installed. Please ensure 'sam2' and 'huggingface_hub' are installed."
589
+ )
590
+ except HTTPException:
591
+ raise
592
+ except Exception as e:
593
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
594
+