yusef commited on
Commit
df64c50
ยท
0 Parent(s):

Initial commit - V5.1 API

Browse files
Files changed (6) hide show
  1. Dockerfile +53 -0
  2. README.md +39 -0
  3. app.py +131 -0
  4. inference.py +447 -0
  5. model_manager.py +94 -0
  6. post_processor.py +333 -0
Dockerfile ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # System dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ git \
7
+ libgl1 \
8
+ libglib2.0-0 \
9
+ libsm6 \
10
+ libxext6 \
11
+ libxrender-dev \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Create app directory
15
+ WORKDIR /app
16
+
17
+ # Step 1: Install PyTorch FIRST (CPU-only to save space)
18
+ RUN pip install --no-cache-dir \
19
+ torch torchvision --index-url https://download.pytorch.org/whl/cpu
20
+
21
+ # Step 2: Install Detectron2 (needs torch already installed)
22
+ RUN pip install --no-cache-dir \
23
+ 'git+https://github.com/facebookresearch/detectron2.git'
24
+
25
+ # Step 3: Install remaining dependencies
26
+ RUN pip install --no-cache-dir \
27
+ fastapi>=0.104.0 \
28
+ "uvicorn[standard]>=0.24.0" \
29
+ opencv-python-headless>=4.8.0 \
30
+ numpy>=1.24.0 \
31
+ Pillow>=10.0.0 \
32
+ requests>=2.31.0 \
33
+ huggingface_hub>=0.19.0 \
34
+ python-multipart>=0.0.6
35
+
36
+ # Step 4: V5.1 Pipeline โ€” MobileSAM + SigLIP
37
+ RUN pip install --no-cache-dir \
38
+ transformers>=4.37.0 \
39
+ timm>=0.9.0 \
40
+ 'git+https://github.com/ChaoningZhang/MobileSAM.git'
41
+
42
+ # Copy app code
43
+ COPY . .
44
+
45
+ # Create a non-root user (HF Spaces requirement)
46
+ RUN useradd -m -u 1000 user
47
+ USER user
48
+
49
+ # Expose port (HF Spaces uses 7860)
50
+ EXPOSE 7860
51
+
52
+ # Start the server
53
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Building Detection API
3
+ emoji: ๐Ÿ—๏ธ
4
+ colorFrom: orange
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: true
9
+ ---
10
+
11
+ # ๐Ÿ—๏ธ Building Detection API
12
+
13
+ Detect buildings from satellite imagery using Mask R-CNN V5.
14
+
15
+ ## API Endpoints
16
+
17
+ - `GET /` โ€” Health check + model info
18
+ - `GET /health` โ€” Health check
19
+ - `POST /detect` โ€” Detect buildings in a polygon area
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ curl -X POST https://your-space.hf.space/detect \
25
+ -H "Content-Type: application/json" \
26
+ -d '{
27
+ "coordinates": [[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
28
+ "threshold": 0.3
29
+ }'
30
+ ```
31
+
32
+ ## Environment Variables
33
+
34
+ | Variable | Default | Description |
35
+ |----------|---------|-------------|
36
+ | `MODEL_REPO` | `yusef75/building-detection-models` | HF model repository |
37
+ | `MODEL_VERSION` | `v5` | Model version folder |
38
+ | `MODEL_FILENAME` | `model_final.pth` | Model file name |
39
+ | `SCORE_THRESHOLD` | `0.3` | Default detection threshold |
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Building Detection API โ€” FastAPI Backend for Hugging Face Spaces.
3
+
4
+ Endpoints:
5
+ GET / โ†’ Health check + model info
6
+ GET /health โ†’ Health check
7
+ POST /detect โ†’ Detect buildings in a polygon area
8
+ """
9
+
10
+ from fastapi import FastAPI, HTTPException
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from pydantic import BaseModel, Field
13
+ from typing import List, Optional
14
+ import uvicorn
15
+
16
+ from model_manager import load_model, get_model_info
17
+ from inference import detect_buildings
18
+
19
+ # ==========================================
20
+ # === App Setup ===
21
+ # ==========================================
22
+ app = FastAPI(
23
+ title="๐Ÿ—๏ธ Building Detection API",
24
+ description="Detect buildings from satellite imagery using Mask R-CNN V5",
25
+ version="1.0.0",
26
+ )
27
+
28
+ # Allow CORS for Vercel frontend
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"], # In production, restrict to your Vercel domain
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+
38
+ # ==========================================
39
+ # === Request / Response Models ===
40
+ # ==========================================
41
+ class DetectRequest(BaseModel):
42
+ coordinates: List[List[float]] = Field(
43
+ ...,
44
+ description="Polygon coordinates as [[lng, lat], ...] in GeoJSON format",
45
+ example=[[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
46
+ )
47
+ threshold: Optional[float] = Field(
48
+ default=0.5,
49
+ ge=0.1,
50
+ le=0.95,
51
+ description="Detection confidence threshold",
52
+ )
53
+ use_v51: Optional[bool] = Field(
54
+ default=True,
55
+ description="Enable V5.1 pipeline (MobileSAM + SigLIP) for better accuracy",
56
+ )
57
+
58
+
59
+ class DetectResponse(BaseModel):
60
+ geojson: dict
61
+ stats: dict
62
+
63
+
64
+ class HealthResponse(BaseModel):
65
+ status: str
66
+ model: dict
67
+
68
+
69
+ # ==========================================
70
+ # === Startup Event ===
71
+ # ==========================================
72
+ @app.on_event("startup")
73
+ async def startup():
74
+ """Load model when the server starts."""
75
+ print("๐Ÿš€ Starting Building Detection API...")
76
+ load_model()
77
+ print("โœ… API ready!")
78
+
79
+
80
+ # ==========================================
81
+ # === Endpoints ===
82
+ # ==========================================
83
+ @app.get("/", response_model=HealthResponse)
84
+ async def root():
85
+ """Health check and model info."""
86
+ return {
87
+ "status": "๐ŸŸข online",
88
+ "model": get_model_info(),
89
+ }
90
+
91
+
92
+ @app.get("/health", response_model=HealthResponse)
93
+ async def health():
94
+ """Health check endpoint."""
95
+ return {
96
+ "status": "๐ŸŸข online",
97
+ "model": get_model_info(),
98
+ }
99
+
100
+
101
+ @app.post("/detect", response_model=DetectResponse)
102
+ async def detect(request: DetectRequest):
103
+ """
104
+ Detect buildings in the specified polygon area.
105
+
106
+ Send polygon coordinates in GeoJSON format [[lng, lat], ...].
107
+ Returns a GeoJSON FeatureCollection with detected building polygons.
108
+ """
109
+ try:
110
+ result = detect_buildings(
111
+ coordinates=request.coordinates,
112
+ threshold=request.threshold,
113
+ use_v51=request.use_v51,
114
+ )
115
+
116
+ if "error" in result:
117
+ raise HTTPException(status_code=400, detail=result["error"])
118
+
119
+ return result
120
+
121
+ except HTTPException:
122
+ raise
123
+ except Exception as e:
124
+ raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
125
+
126
+
127
+ # ==========================================
128
+ # === Run ===
129
+ # ==========================================
130
+ if __name__ == "__main__":
131
+ uvicorn.run(app, host="0.0.0.0", port=7860)
inference.py ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Inference Engine โ€” Tile downloading + Building detection + Deduplication.
3
+ Adapted from MaskRCNN_V5_MapFlow.py for server deployment.
4
+ """
5
+
6
+ import math
7
+ import time
8
+ import numpy as np
9
+ import cv2
10
+ import requests
11
+ from PIL import Image
12
+ from io import BytesIO
13
+ from model_manager import get_predictor, set_threshold
14
+ from post_processor import run_v51_pipeline
15
+
16
+ # ==========================================
17
+ # === Constants ===
18
+ # ==========================================
19
+ ZOOM = 18
20
+ TILE_SIZE = 256
21
+ TILES_PER_IMG = 2
22
+ IMG_SIZE = 512
23
+ MAX_TILES = 60 # Safety limit
24
+ MIN_BUILDING_AREA = 200 # Min contour area in pixels (filters tiny false positives)
25
+
26
+
27
+ # ==========================================
28
+ # === Coordinate Utils ===
29
+ # ==========================================
30
+ def lon_to_tile_x(lon):
31
+ return (lon + 180) / 360 * (2 ** ZOOM)
32
+
33
+
34
+ def lat_to_tile_y(lat):
35
+ lat_r = math.radians(lat)
36
+ return (1 - math.log(math.tan(lat_r) + 1 / math.cos(lat_r)) / math.pi) / 2 * (2 ** ZOOM)
37
+
38
+
39
+ def tile_x_to_lon(tx):
40
+ return tx / (2 ** ZOOM) * 360 - 180
41
+
42
+
43
+ def tile_y_to_lat(ty):
44
+ n = math.pi - 2 * math.pi * ty / (2 ** ZOOM)
45
+ return math.degrees(math.atan(math.sinh(n)))
46
+
47
+
48
+ def pixel_to_geo(px, py, grid_x, grid_y):
49
+ tx = grid_x * TILES_PER_IMG + px / TILE_SIZE
50
+ ty = grid_y * TILES_PER_IMG + py / TILE_SIZE
51
+ return tile_x_to_lon(tx), tile_y_to_lat(ty)
52
+
53
+
54
+ # ==========================================
55
+ # === Tile Downloading ===
56
+ # ==========================================
57
+ session = requests.Session()
58
+ session.headers.update({"User-Agent": "Mozilla/5.0"})
59
+
60
+
61
+ def download_tile_512(grid_x, grid_y):
62
+ """Download 2ร—2 tiles to create a 512ร—512 satellite image."""
63
+ img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
64
+ base_tx = grid_x * TILES_PER_IMG
65
+ base_ty = grid_y * TILES_PER_IMG
66
+
67
+ for dy in range(TILES_PER_IMG):
68
+ for dx in range(TILES_PER_IMG):
69
+ tx, ty = base_tx + dx, base_ty + dy
70
+ s = (tx + ty) % 4
71
+ url = f"https://mt{s}.google.com/vt/lyrs=s&x={tx}&y={ty}&z={ZOOM}"
72
+ try:
73
+ r = session.get(url, timeout=15)
74
+ tile = np.array(Image.open(BytesIO(r.content)).convert("RGB"))
75
+ img[dy * TILE_SIZE:(dy + 1) * TILE_SIZE,
76
+ dx * TILE_SIZE:(dx + 1) * TILE_SIZE] = tile
77
+ except Exception:
78
+ pass
79
+ return img
80
+
81
+
82
+ # ==========================================
83
+ # === Polygon โ†’ Tiles ===
84
+ # ==========================================
85
+ def get_tiles_for_polygon(polygon_coords):
86
+ """
87
+ Convert polygon coordinates to grid tile indices.
88
+ Input: list of [lat, lon] pairs.
89
+ Returns: list of (grid_x, grid_y) tuples and bounds.
90
+ """
91
+ lats = [c[0] for c in polygon_coords]
92
+ lons = [c[1] for c in polygon_coords]
93
+
94
+ min_lat, max_lat = min(lats), max(lats)
95
+ min_lon, max_lon = min(lons), max(lons)
96
+
97
+ min_tx = lon_to_tile_x(min_lon)
98
+ max_tx = lon_to_tile_x(max_lon)
99
+ min_ty = lat_to_tile_y(max_lat)
100
+ max_ty = lat_to_tile_y(min_lat)
101
+
102
+ min_gx = int(min_tx) // TILES_PER_IMG
103
+ max_gx = int(max_tx) // TILES_PER_IMG
104
+ min_gy = int(min_ty) // TILES_PER_IMG
105
+ max_gy = int(max_ty) // TILES_PER_IMG
106
+
107
+ tiles = []
108
+ for gy in range(min_gy, max_gy + 1):
109
+ for gx in range(min_gx, max_gx + 1):
110
+ tiles.append((gx, gy))
111
+
112
+ return tiles, (min_lat, max_lat, min_lon, max_lon)
113
+
114
+
115
+ # ==========================================
116
+ # === Polygon Regularization ===
117
+ # ==========================================
118
+ def regularize_polygon(contour, rect):
119
+ """
120
+ Regularize polygon edges by snapping to the building's dominant direction.
121
+
122
+ 1. Get dominant angle from minAreaRect
123
+ 2. Rotate polygon so dominant direction = horizontal
124
+ 3. Snap nearly-horizontal edges โ†’ exact horizontal
125
+ Snap nearly-vertical edges โ†’ exact vertical
126
+ 4. Rotate back
127
+ """
128
+ points = contour.reshape(-1, 2).astype(float)
129
+ n = len(points)
130
+ if n < 4:
131
+ return contour
132
+
133
+ angle = rect[2]
134
+ angle_rad = math.radians(angle)
135
+ cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
136
+
137
+ center = np.mean(points, axis=0)
138
+
139
+ # Rotate to align dominant direction with horizontal axis
140
+ rotated = np.zeros_like(points)
141
+ for i, p in enumerate(points):
142
+ dx, dy = p[0] - center[0], p[1] - center[1]
143
+ rotated[i] = [dx * cos_a + dy * sin_a, -dx * sin_a + dy * cos_a]
144
+
145
+ # Snap edges within 15ยฐ of horizontal/vertical
146
+ SNAP_ANGLE = 15
147
+ for i in range(n):
148
+ j = (i + 1) % n
149
+ dx = rotated[j][0] - rotated[i][0]
150
+ dy = rotated[j][1] - rotated[i][1]
151
+ if abs(dx) < 1e-6 and abs(dy) < 1e-6:
152
+ continue
153
+ edge_angle = abs(math.degrees(math.atan2(abs(dy), abs(dx))))
154
+
155
+ if edge_angle < SNAP_ANGLE: # Nearly horizontal
156
+ rotated[j][1] = rotated[i][1]
157
+ elif edge_angle > (90 - SNAP_ANGLE): # Nearly vertical
158
+ rotated[j][0] = rotated[i][0]
159
+
160
+ # Rotate back
161
+ result = np.zeros_like(points)
162
+ for i, p in enumerate(rotated):
163
+ rx = p[0] * cos_a - p[1] * sin_a + center[0]
164
+ ry = p[0] * sin_a + p[1] * cos_a + center[1]
165
+ result[i] = [round(rx), round(ry)]
166
+
167
+ return result.astype(int)
168
+
169
+
170
+ # ==========================================
171
+ # === Mask โ†’ GeoJSON (with regularization) ===
172
+ # ==========================================
173
+ def mask_to_geo_polygon(mask, grid_x, grid_y, score):
174
+ """Convert a binary mask to a GeoJSON Feature with angle regularization."""
175
+ contours, _ = cv2.findContours(
176
+ mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
177
+
178
+ if not contours:
179
+ return None
180
+
181
+ contour = max(contours, key=cv2.contourArea)
182
+ if cv2.contourArea(contour) < MIN_BUILDING_AREA:
183
+ return None
184
+
185
+ # Simplify the contour
186
+ epsilon = 0.008 * cv2.arcLength(contour, True)
187
+ approx = cv2.approxPolyDP(contour, epsilon, True)
188
+ if len(approx) < 3:
189
+ return None
190
+
191
+ # Regularize angles (snap edges toward 90ยฐ)
192
+ rect = cv2.minAreaRect(contour)
193
+ if len(approx) >= 4:
194
+ pixel_points = regularize_polygon(approx, rect)
195
+ else:
196
+ pixel_points = approx.reshape(-1, 2)
197
+
198
+ # Convert pixel coordinates to geographic coordinates
199
+ geo_coords = []
200
+ for pt in pixel_points:
201
+ px, py = int(pt[0]), int(pt[1])
202
+ lon, lat = pixel_to_geo(px, py, grid_x, grid_y)
203
+ geo_coords.append([lon, lat])
204
+ geo_coords.append(geo_coords[0]) # Close polygon
205
+
206
+ return {
207
+ "type": "Feature",
208
+ "properties": {"confidence": round(float(score), 3)},
209
+ "geometry": {"type": "Polygon", "coordinates": [geo_coords]},
210
+ }
211
+
212
+
213
+ def polygon_area(coords):
214
+ """Calculate area of a polygon using Shoelace formula."""
215
+ n = len(coords)
216
+ if n < 3:
217
+ return 0
218
+ area = 0
219
+ for i in range(n):
220
+ j = (i + 1) % n
221
+ area += coords[i][0] * coords[j][1]
222
+ area -= coords[j][0] * coords[i][1]
223
+ return abs(area) / 2
224
+
225
+
226
+ def bboxes_overlap(coords1, coords2):
227
+ """Check if bounding boxes of two polygons overlap."""
228
+ xs1 = [c[0] for c in coords1]
229
+ ys1 = [c[1] for c in coords1]
230
+ xs2 = [c[0] for c in coords2]
231
+ ys2 = [c[1] for c in coords2]
232
+
233
+ return not (max(xs1) < min(xs2) or max(xs2) < min(xs1) or
234
+ max(ys1) < min(ys2) or max(ys2) < min(ys1))
235
+
236
+
237
+ def deduplicate_buildings(features, distance_threshold=0.0003):
238
+ """
239
+ Remove duplicate buildings from overlapping tiles.
240
+
241
+ Uses centroid distance + area similarity + bbox overlap.
242
+ distance_threshold โ‰ˆ 30 meters at the equator.
243
+ """
244
+ if not features:
245
+ return features
246
+
247
+ # Pre-compute centroids and areas
248
+ centroids = []
249
+ areas = []
250
+ for f in features:
251
+ coords = f["geometry"]["coordinates"][0]
252
+ cx = np.mean([c[0] for c in coords])
253
+ cy = np.mean([c[1] for c in coords])
254
+ centroids.append((cx, cy))
255
+ areas.append(polygon_area(coords))
256
+
257
+ # Sort by confidence (keep higher confidence ones)
258
+ indices = sorted(
259
+ range(len(features)),
260
+ key=lambda i: features[i]["properties"]["confidence"],
261
+ reverse=True,
262
+ )
263
+
264
+ keep = []
265
+ removed = set()
266
+
267
+ for i in indices:
268
+ if i in removed:
269
+ continue
270
+ keep.append(i)
271
+ cx1, cy1 = centroids[i]
272
+ area1 = areas[i]
273
+ coords1 = features[i]["geometry"]["coordinates"][0]
274
+
275
+ for j in indices:
276
+ if j in removed or j == i or j in set(keep):
277
+ continue
278
+ cx2, cy2 = centroids[j]
279
+ area2 = areas[j]
280
+
281
+ # Quick centroid distance check
282
+ dist = math.sqrt((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2)
283
+ if dist > distance_threshold:
284
+ continue
285
+
286
+ # Area similarity check (within 3x of each other)
287
+ if area1 > 0 and area2 > 0:
288
+ ratio = max(area1, area2) / min(area1, area2)
289
+ if ratio > 2.0:
290
+ continue # Very different sizes โ€” probably different buildings
291
+
292
+ # Bounding box overlap check
293
+ coords2 = features[j]["geometry"]["coordinates"][0]
294
+ if bboxes_overlap(coords1, coords2):
295
+ removed.add(j)
296
+
297
+ return [features[i] for i in keep]
298
+
299
+
300
+ # ==========================================
301
+ # === Point-in-Polygon Test ===
302
+ # ==========================================
303
+ def point_in_polygon(px, py, polygon):
304
+ """
305
+ Ray casting algorithm to check if point (px, py) is inside polygon.
306
+ polygon: list of [x, y] pairs.
307
+ """
308
+ n = len(polygon)
309
+ inside = False
310
+ j = n - 1
311
+ for i in range(n):
312
+ xi, yi = polygon[i]
313
+ xj, yj = polygon[j]
314
+ if ((yi > py) != (yj > py)) and (px < (xj - xi) * (py - yi) / (yj - yi) + xi):
315
+ inside = not inside
316
+ j = i
317
+ return inside
318
+
319
+
320
+ # ==========================================
321
+ # === Main Processing Function ===
322
+ # ==========================================
323
+ def detect_buildings(coordinates, threshold=0.5, use_v51=False):
324
+ """
325
+ Process a polygon area and detect buildings.
326
+
327
+ Args:
328
+ coordinates: list of [lng, lat] pairs (GeoJSON format)
329
+ threshold: detection confidence threshold
330
+
331
+ Returns:
332
+ dict with GeoJSON FeatureCollection + stats
333
+ """
334
+ # Convert from GeoJSON [lng, lat] to [lat, lng]
335
+ coords = []
336
+ for point in coordinates:
337
+ if isinstance(point, list) and len(point) == 2:
338
+ coords.append([point[1], point[0]])
339
+
340
+ if len(coords) < 3:
341
+ return {"error": "Need at least 3 points to form a polygon"}
342
+
343
+ # Build user polygon in [lng, lat] format for clipping
344
+ user_polygon = [[c[1], c[0]] for c in coords] # [lng, lat]
345
+
346
+ predictor = get_predictor()
347
+
348
+ # Get tiles
349
+ tiles, bounds = get_tiles_for_polygon(coords)
350
+ n_tiles = len(tiles)
351
+
352
+ if n_tiles > MAX_TILES:
353
+ return {
354
+ "error": f"Area too large! {n_tiles} tiles needed, max is {MAX_TILES}. Draw a smaller polygon.",
355
+ "tiles_needed": n_tiles,
356
+ "max_tiles": MAX_TILES,
357
+ }
358
+
359
+ # Process tiles
360
+ all_features = []
361
+ start_time = time.time()
362
+
363
+ for idx, (gx, gy) in enumerate(tiles):
364
+ img = download_tile_512(gx, gy)
365
+
366
+ # Skip dark/empty tiles
367
+ if np.mean(img) < 10:
368
+ continue
369
+
370
+ img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
371
+ outputs = predictor(img_bgr)
372
+ instances = outputs["instances"].to("cpu")
373
+
374
+ if len(instances) == 0:
375
+ continue
376
+
377
+ raw_masks = instances.pred_masks.numpy()
378
+ raw_scores = instances.scores.numpy()
379
+
380
+ # โ”€โ”€ V5.1 Pipeline (optional) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
381
+ if use_v51:
382
+ # Pre-filter by confidence first (faster)
383
+ conf_masks = [m for m, s in zip(raw_masks, raw_scores) if float(s) >= threshold]
384
+ conf_scores = [float(s) for s in raw_scores if float(s) >= threshold]
385
+
386
+ if conf_masks:
387
+ print(f" [V5.1] Tile {idx+1}/{len(tiles)}: {len(conf_masks)} masks โ†’ pipeline...")
388
+ v51_results = run_v51_pipeline(
389
+ image_rgb=img,
390
+ v5_masks=conf_masks,
391
+ v5_scores=conf_scores,
392
+ use_sam=True,
393
+ use_siglip=True,
394
+ )
395
+ for res in v51_results:
396
+ feature = mask_to_geo_polygon(res["mask"], gx, gy, res["score"])
397
+ if feature:
398
+ feature["properties"]["area_m2"] = res["area_m2"]
399
+ all_features.append(feature)
400
+
401
+ # โ”€โ”€ V5 Original Pipeline โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
402
+ else:
403
+ for mask, score in zip(raw_masks, raw_scores):
404
+ if float(score) < threshold:
405
+ continue
406
+ feature = mask_to_geo_polygon(mask, gx, gy, score)
407
+ if feature:
408
+ all_features.append(feature)
409
+
410
+ # Clip to user polygon โ€” only keep buildings whose centroid is inside
411
+ clipped_features = []
412
+ for f in all_features:
413
+ poly_coords = f["geometry"]["coordinates"][0]
414
+ cx = np.mean([c[0] for c in poly_coords]) # lng
415
+ cy = np.mean([c[1] for c in poly_coords]) # lat
416
+ if point_in_polygon(cx, cy, user_polygon):
417
+ clipped_features.append(f)
418
+
419
+ all_features = clipped_features
420
+
421
+ # Deduplicate
422
+ before_dedup = len(all_features)
423
+ all_features = deduplicate_buildings(all_features)
424
+ after_dedup = len(all_features)
425
+ elapsed = time.time() - start_time
426
+
427
+ # Build response
428
+ geojson = {
429
+ "type": "FeatureCollection",
430
+ "features": all_features,
431
+ }
432
+
433
+ stats = {
434
+ "buildings_detected": after_dedup,
435
+ "duplicates_removed": before_dedup - after_dedup,
436
+ "tiles_processed": n_tiles,
437
+ "processing_time_seconds": round(elapsed, 1),
438
+ "threshold": threshold,
439
+ "bounds": {
440
+ "min_lat": bounds[0],
441
+ "max_lat": bounds[1],
442
+ "min_lon": bounds[2],
443
+ "max_lon": bounds[3],
444
+ },
445
+ }
446
+
447
+ return {"geojson": geojson, "stats": stats}
model_manager.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model Manager โ€” Easy version swapping for Building Detection models.
3
+
4
+ To swap models:
5
+ 1. Upload new model to HF repo (e.g., v6/model_final.pth)
6
+ 2. Set MODEL_VERSION env var to "v6"
7
+ 3. Restart the Space
8
+ """
9
+
10
+ import os
11
+ import torch
12
+ from detectron2.config import get_cfg
13
+ from detectron2 import model_zoo
14
+ from detectron2.engine import DefaultPredictor
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ # ==========================================
18
+ # === Configuration ===
19
+ # ==========================================
20
+ MODEL_REPO = os.environ.get("MODEL_REPO", "yusef75/building-detection-models")
21
+ MODEL_VERSION = os.environ.get("MODEL_VERSION", "v5")
22
+ MODEL_FILENAME = os.environ.get("MODEL_FILENAME", "model_final.pth")
23
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
24
+ SCORE_THRESHOLD = float(os.environ.get("SCORE_THRESHOLD", "0.3"))
25
+
26
+ # Global predictor
27
+ _predictor = None
28
+ _model_info = {}
29
+
30
+
31
+ def load_model():
32
+ """Load model from Hugging Face Hub. Called once at startup."""
33
+ global _predictor, _model_info
34
+
35
+ print(f"๐Ÿ” Loading model: {MODEL_REPO} / {MODEL_VERSION} / {MODEL_FILENAME}")
36
+ print(f"๐Ÿ–ฅ๏ธ Device: {DEVICE}")
37
+
38
+ # Download model from HF Hub
39
+ model_path = hf_hub_download(
40
+ repo_id=MODEL_REPO,
41
+ filename=f"{MODEL_VERSION}/{MODEL_FILENAME}",
42
+ cache_dir="/tmp/models",
43
+ )
44
+ print(f"โœ… Model downloaded to: {model_path}")
45
+
46
+ # Configure Detectron2
47
+ cfg = get_cfg()
48
+ cfg.merge_from_file(model_zoo.get_config_file(
49
+ "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
50
+ cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
51
+ cfg.MODEL.WEIGHTS = model_path
52
+ cfg.MODEL.DEVICE = DEVICE
53
+ cfg.INPUT.MIN_SIZE_TEST = 512
54
+ cfg.INPUT.MAX_SIZE_TEST = 512
55
+
56
+ # === Detection quality settings ===
57
+ # Low base threshold โ€” actual filtering happens in inference.py
58
+ cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
59
+
60
+ # NMS: Aggressively remove overlapping detections (lower = stricter)
61
+ cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3
62
+
63
+ # Max detections per image (fewer = less overlap)
64
+ cfg.TEST.DETECTIONS_PER_IMAGE = 200
65
+
66
+ _predictor = DefaultPredictor(cfg)
67
+ _model_info = {
68
+ "version": MODEL_VERSION,
69
+ "repo": MODEL_REPO,
70
+ "device": DEVICE,
71
+ "threshold": SCORE_THRESHOLD,
72
+ }
73
+ print(f"๐Ÿš€ Model {MODEL_VERSION} loaded on {DEVICE}!")
74
+ return _predictor
75
+
76
+
77
+ def get_predictor():
78
+ """Get the loaded predictor. Loads model if not loaded yet."""
79
+ global _predictor
80
+ if _predictor is None:
81
+ load_model()
82
+ return _predictor
83
+
84
+
85
+ def get_model_info():
86
+ """Get info about the currently loaded model."""
87
+ return _model_info
88
+
89
+
90
+ def set_threshold(threshold: float):
91
+ """Update the detection threshold dynamically."""
92
+ global _predictor
93
+ if _predictor is not None:
94
+ _predictor.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
post_processor.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ V5.1 Post-Processing Pipeline โ€” "The Hybrid Eye"
3
+ =================================================
4
+ ูŠุดุชุบู„ ุจุนุฏ V5 ู…ุจุงุดุฑุฉ ุจุฏูˆู† ุฃูŠ ุชุฏุฑูŠุจ ุฌุฏูŠุฏ.
5
+
6
+ Pipeline:
7
+ 1. V5 (Hunter) โ†’ masks ุฃูˆู„ูŠุฉ
8
+ 2. MobileSAM โ†’ ูŠู‚ุทุน ุงู„ูƒุชู„ ุงู„ู…ุชู„ุงุตู‚ุฉ ู„ู€ sub-masks
9
+ 3. SigLIP โ†’ Zero-Shot: building vs non-building
10
+ 4. Geometric Rules โ†’ area + shape filter + area_m2
11
+
12
+ ุงู„ุชุซุจูŠุช:
13
+ pip install git+https://github.com/ChaoningZhang/MobileSAM.git
14
+ pip install transformers torch
15
+ """
16
+
17
+ import math
18
+ import numpy as np
19
+ import cv2
20
+ import torch
21
+ from PIL import Image
22
+
23
+ # ============================================================
24
+ # === ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช (ู…ุฑุฉ ูˆุงุญุฏุฉ) ===
25
+ # ============================================================
26
+
27
+ _mobile_sam = None
28
+ _sam_predictor = None
29
+ _siglip_model = None
30
+ _siglip_processor = None
31
+
32
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
33
+
34
+
35
+ def load_mobile_sam():
36
+ """ุชุญู…ูŠู„ MobileSAM (5.78MB ูู‚ุท โ€” ุฎููŠู ุฌุฏุงู‹)."""
37
+ global _mobile_sam, _sam_predictor
38
+ if _sam_predictor is not None:
39
+ return _sam_predictor
40
+
41
+ try:
42
+ from mobile_sam import sam_model_registry, SamPredictor
43
+ from huggingface_hub import hf_hub_download
44
+
45
+ print("๐Ÿ“ฅ ุชุญู…ูŠู„ MobileSAM...")
46
+ ckpt = hf_hub_download(
47
+ repo_id="dhkim2810/MobileSAM",
48
+ filename="mobile_sam.pt",
49
+ )
50
+ _mobile_sam = sam_model_registry["vit_t"](checkpoint=ckpt)
51
+ _mobile_sam.to(DEVICE).eval()
52
+ _sam_predictor = SamPredictor(_mobile_sam)
53
+ print("โœ… MobileSAM ุฌุงู‡ุฒ!")
54
+ return _sam_predictor
55
+
56
+ except Exception as e:
57
+ print(f"โš ๏ธ MobileSAM ู…ุด ู…ุชุงุญ: {e}")
58
+ return None
59
+
60
+
61
+ def load_siglip():
62
+ """ุชุญู…ูŠู„ SigLIP ู„ู„ู€ Zero-Shot material classification."""
63
+ global _siglip_model, _siglip_processor
64
+ if _siglip_model is not None:
65
+ return _siglip_model, _siglip_processor
66
+
67
+ try:
68
+ from transformers import SiglipProcessor, SiglipModel
69
+
70
+ print("๐Ÿ“ฅ ุชุญู…ูŠู„ SigLIP...")
71
+ model_id = "google/siglip-base-patch16-224"
72
+ _siglip_processor = SiglipProcessor.from_pretrained(model_id)
73
+ _siglip_model = SiglipModel.from_pretrained(
74
+ model_id,
75
+ torch_dtype=torch.float32, # CPU โ†’ float32 ุฏุงูŠู…ุงู‹
76
+ ).to(DEVICE).eval()
77
+ print("โœ… SigLIP ุฌุงู‡ุฒ!")
78
+ return _siglip_model, _siglip_processor
79
+
80
+ except Exception as e:
81
+ print(f"โš ๏ธ SigLIP ู…ุด ู…ุชุงุญ: {e}")
82
+ return None, None
83
+
84
+
85
+ # ============================================================
86
+ # === STEP 1: MobileSAM โ€” Surgical Cutting ===
87
+ # ============================================================
88
+
89
+ def split_mask_with_sam(image_rgb: np.ndarray, mask: np.ndarray, predictor) -> list:
90
+ """
91
+ ุจูŠุงุฎุฏ mask ูˆุงุญุฏ (ู…ู…ูƒู† ูŠูƒูˆู† ููŠู‡ 4 ุจูŠูˆุช) ูˆูŠู‚ุทุนู‡ ู„ู€ sub-masks.
92
+
93
+ Args:
94
+ image_rgb: ุตูˆุฑุฉ ูƒุงู…ู„ุฉ (H, W, 3)
95
+ mask: binary mask (H, W) ู…ู† V5
96
+ predictor: SamPredictor instance
97
+
98
+ Returns:
99
+ list of binary masks โ€” ูƒู„ mask ู…ุจู†ู‰ ู„ูˆุญุฏู‡
100
+ """
101
+ if predictor is None:
102
+ return [mask] # fallback: ุฑุฌู‘ุน ุงู„ู€ mask ุงู„ุฃุตู„ูŠ
103
+
104
+ # ู„ูˆ ุงู„ู€ mask ุตุบูŠุฑ (ู…ุจู†ู‰ ูˆุงุญุฏ) โ†’ ู…ุด ู…ุญุชุงุฌ ู‚ุทุน
105
+ area = mask.sum()
106
+ if area < 2000: # ~45ร—45 pixels โ†’ ู…ุจู†ู‰ ูˆุงุญุฏ ุนู„ู‰ ุงู„ุฃุฑุฌุญ
107
+ return [mask]
108
+
109
+ try:
110
+ # ุฌู‡ู‘ุฒ ุงู„ุตูˆุฑุฉ ู„ู„ู€ SAM
111
+ predictor.set_image(image_rgb)
112
+
113
+ # ุงุณุชุฎุฏู… ุงู„ู€ bounding box ุจุชุงุน ุงู„ู€ mask ูƒู€ Prompt
114
+ ys, xs = np.where(mask)
115
+ x1, x2 = xs.min(), xs.max()
116
+ y1, y2 = ys.min(), ys.max()
117
+ box = np.array([x1, y1, x2, y2])
118
+
119
+ # ุงุทู„ุจ ู…ู† SAM ูŠู‚ุทุน
120
+ masks_out, scores, _ = predictor.predict(
121
+ box=box,
122
+ multimask_output=True, # <-- ุงุทู„ุจ ุฃูƒุชุฑ ู…ู† ุงู‚ุชุฑุงุญ
123
+ )
124
+
125
+ # ูู„ุชุฑ ุงู„ู€ sub-masks ุงู„ู„ูŠ ู…ู†ุทู‚ูŠุฉ (ุฌูˆู‡ ุงู„ู€ mask ุงู„ุฃุตู„ูŠ)
126
+ valid_masks = []
127
+ for sub_mask in masks_out:
128
+ # ุงู„ู€ sub-mask ู„ุงุฒู… ูŠุชุฏุงุฎู„ ู…ุน ุงู„ู€ mask ุงู„ุฃุตู„ูŠ
129
+ overlap = (sub_mask & mask.astype(bool)).sum()
130
+ if overlap > 200: # ุนู„ู‰ ุงู„ุฃู‚ู„ 200 pixel ู…ุดุชุฑูƒุฉ
131
+ valid_masks.append(sub_mask.astype(np.uint8))
132
+
133
+ return valid_masks if valid_masks else [mask]
134
+
135
+ except Exception as e:
136
+ print(f"โš ๏ธ SAM splitter error: {e}")
137
+ return [mask]
138
+
139
+
140
+ # ============================================================
141
+ # === STEP 2: SigLIP โ€” Zero-Shot Material Check ===
142
+ # ============================================================
143
+
144
+ # ู†ุตูˆุต ุงู„ู…ู‚ุงุฑู†ุฉ โ€” ุจุฏูˆู† ุชุฏุฑูŠุจ
145
+ BUILDING_TEXTS = [
146
+ "a satellite view of a building rooftop",
147
+ "concrete roof of a building seen from above",
148
+ "residential building viewed from satellite",
149
+ "rooftop of a house or apartment building",
150
+ ]
151
+
152
+ NON_BUILDING_TEXTS = [
153
+ "farmland and agricultural fields from above",
154
+ "green vegetation and trees from satellite",
155
+ "water surface river or lake from above",
156
+ "empty desert or bare soil from satellite",
157
+ "road or highway seen from above",
158
+ "swimming pool seen from satellite",
159
+ ]
160
+
161
+ ALL_TEXTS = BUILDING_TEXTS + NON_BUILDING_TEXTS
162
+ NUM_BUILDING = len(BUILDING_TEXTS)
163
+
164
+
165
+ @torch.no_grad()
166
+ def is_building_siglip(
167
+ image_rgb: np.ndarray,
168
+ mask: np.ndarray,
169
+ model,
170
+ processor,
171
+ threshold: float = 0.4,
172
+ ) -> bool:
173
+ """
174
+ ุจูŠุณุชุฎุฏู… SigLIP Zero-Shot ุนุดุงู† ูŠุชุฃูƒุฏ ุฅู† ุงู„ู€ mask ุฏู‡ ูุนู„ุงู‹ ู…ุจู†ู‰.
175
+
176
+ Returns True ู„ูˆ ู…ุจู†ู‰ุŒ False ู„ูˆ ู„ุง (ูŠุชุญุฐู).
177
+ """
178
+ if model is None:
179
+ return True # fallback: ุงู‚ุจู„ ูƒู„ ุญุงุฌุฉ ู„ูˆ SigLIP ู…ุด ุดุบุงู„
180
+
181
+ try:
182
+ # Crop ุงู„ู€ bounding box ู…ู† ุงู„ุตูˆุฑุฉ
183
+ ys, xs = np.where(mask)
184
+ if len(ys) == 0:
185
+ return False
186
+ x1, x2 = max(0, xs.min() - 5), min(image_rgb.shape[1], xs.max() + 5)
187
+ y1, y2 = max(0, ys.min() - 5), min(image_rgb.shape[0], ys.max() + 5)
188
+ crop = image_rgb[y1:y2, x1:x2]
189
+
190
+ if crop.size == 0:
191
+ return False
192
+
193
+ pil_crop = Image.fromarray(crop)
194
+
195
+ # ุฌู‡ู‘ุฒ ุงู„ู€ inputs
196
+ inputs = processor(
197
+ text=ALL_TEXTS,
198
+ images=[pil_crop],
199
+ return_tensors="pt",
200
+ padding="max_length",
201
+ )
202
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
203
+ if DEVICE == "cuda":
204
+ inputs["pixel_values"] = inputs["pixel_values"].half()
205
+
206
+ # ุงุญุณุจ ุงู„ู€ similarity scores
207
+ outputs = model(**inputs)
208
+ logits = outputs.logits_per_image[0] # (num_texts,)
209
+ probs = torch.softmax(logits, dim=0).cpu().float().numpy()
210
+
211
+ # ู…ุฌู…ูˆุน probability ุงู„ู€ building texts
212
+ building_score = probs[:NUM_BUILDING].sum()
213
+ non_building_score = probs[NUM_BUILDING:].sum()
214
+
215
+ return building_score > threshold
216
+
217
+ except Exception as e:
218
+ print(f"โš ๏ธ SigLIP check error: {e}")
219
+ return True # fallback: ุงู‚ุจู„
220
+
221
+
222
+ # ============================================================
223
+ # === STEP 3: Geometric Rules ===
224
+ # ============================================================
225
+
226
+ def _mask_area_m2(mask, pixel_size_m=0.597):
227
+ """
228
+ ุชุญูˆูŠู„ ุนุฏุฏ pixels ู„ู€ ู…ุชุฑ ู…ุฑุจุน.
229
+ pixel_size_m = ุญุฌู… ุงู„ุจูŠูƒุณู„ ุนู†ุฏ Zoom 18 (~0.6 ู…ุชุฑ)
230
+ """
231
+ return mask.sum() * (pixel_size_m ** 2)
232
+
233
+
234
+ def _aspect_ratio(mask):
235
+ """ู†ุณุจุฉ ุงู„ุทูˆู„ ู„ู„ุนุฑุถ โ€” ู„ูˆ > 10 ูุงู„ุดูƒู„ ุบุฑูŠุจ ุฌุฏุงู‹."""
236
+ ys, xs = np.where(mask)
237
+ if len(ys) == 0:
238
+ return 1.0
239
+ h = ys.max() - ys.min() + 1
240
+ w = xs.max() - xs.min() + 1
241
+ return max(h, w) / max(min(h, w), 1)
242
+
243
+
244
+ def apply_geometric_rules(masks: list, min_area_m2=20.0, max_area_m2=15000.0, max_aspect=10.0):
245
+ """
246
+ ูŠูู„ุชุฑ ุงู„ู€ masks ุจู‚ูˆุงุนุฏ ู‡ู†ุฏุณูŠุฉ:
247
+
248
+ - ู…ุณุงุญุฉ < 20 ู…ยฒ โ†’ ุงุญุฐู (noise)
249
+ - ู…ุณุงุญุฉ > 15,000 ู…ยฒ โ†’ ุญุฐู‘ุฑ (probably wrong)
250
+ - aspect ratio > 10 โ†’ ุงุญุฐู (ุดูƒู„ ุบุฑูŠุจ ู…ุด ู…ุจู†ู‰)
251
+
252
+ Returns: list of (mask, area_m2) tuples
253
+ """
254
+ result = []
255
+ for mask in masks:
256
+ area = _mask_area_m2(mask)
257
+ if area < min_area_m2:
258
+ continue
259
+ if _aspect_ratio(mask) > max_aspect:
260
+ continue
261
+ result.append((mask, round(area, 1)))
262
+ return result
263
+
264
+
265
+ # ============================================================
266
+ # === MAIN: run_v51_pipeline ===
267
+ # ============================================================
268
+
269
+ def run_v51_pipeline(
270
+ image_rgb: np.ndarray,
271
+ v5_masks: list,
272
+ v5_scores: list,
273
+ use_sam: bool = True,
274
+ use_siglip: bool = True,
275
+ siglip_threshold: float = 0.4,
276
+ ) -> list:
277
+ """
278
+ ุงู„ู€ Pipeline ุงู„ูƒุงู…ู„ ู„ู€ V5.1.
279
+
280
+ Args:
281
+ image_rgb: ุงู„ุตูˆุฑุฉ ูƒู€ numpy array (H, W, 3)
282
+ v5_masks: list of binary masks ู…ู† V5
283
+ v5_scores: list of confidence scores ู…ู† V5
284
+ use_sam: ุชูุนูŠู„ MobileSAM splitting
285
+ use_siglip: ุชูุนูŠู„ SigLIP material check
286
+
287
+ Returns:
288
+ list of dicts: [{"mask": np.array, "score": float, "area_m2": float}]
289
+ """
290
+ # ุชุญู…ูŠู„ ุงู„ู…ูˆุฏูŠู„ุงุช
291
+ sam_predictor = load_mobile_sam() if use_sam else None
292
+ siglip_model, siglip_proc = load_siglip() if use_siglip else (None, None)
293
+
294
+ all_masks = []
295
+ all_scores = []
296
+
297
+ # โ”€โ”€ STEP 1: MobileSAM Splitting โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
298
+ for mask, score in zip(v5_masks, v5_scores):
299
+ sub_masks = split_mask_with_sam(image_rgb, mask, sam_predictor)
300
+ all_masks.extend(sub_masks)
301
+ all_scores.extend([score] * len(sub_masks))
302
+
303
+ print(f" SAM: {len(v5_masks)} โ†’ {len(all_masks)} masks")
304
+
305
+ # โ”€โ”€ STEP 2: SigLIP Material Check โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
306
+ if use_siglip and siglip_model is not None:
307
+ filtered_masks = []
308
+ filtered_scores = []
309
+ removed = 0
310
+ for mask, score in zip(all_masks, all_scores):
311
+ if is_building_siglip(image_rgb, mask, siglip_model, siglip_proc, siglip_threshold):
312
+ filtered_masks.append(mask)
313
+ filtered_scores.append(score)
314
+ else:
315
+ removed += 1
316
+ print(f" SigLIP: ุญุฐู {removed} ุบูŠุฑ ู…ุจุงู†ูŠ")
317
+ all_masks, all_scores = filtered_masks, filtered_scores
318
+
319
+ # โ”€โ”€ STEP 3: Geometric Rules โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
320
+ geo_filtered = apply_geometric_rules(all_masks)
321
+ print(f" Geometric: {len(all_masks)} โ†’ {len(geo_filtered)} masks")
322
+
323
+ # โ”€โ”€ Build result โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
324
+ results = []
325
+ for i, (mask, area_m2) in enumerate(geo_filtered):
326
+ score = all_scores[i] if i < len(all_scores) else 0.5
327
+ results.append({
328
+ "mask": mask,
329
+ "score": score,
330
+ "area_m2": area_m2,
331
+ })
332
+
333
+ return results