Spaces:
Sleeping
Sleeping
yusef commited on
Commit ยท
df64c50
0
Parent(s):
Initial commit - V5.1 API
Browse files- Dockerfile +53 -0
- README.md +39 -0
- app.py +131 -0
- inference.py +447 -0
- model_manager.py +94 -0
- post_processor.py +333 -0
Dockerfile
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
# System dependencies
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
build-essential \
|
| 6 |
+
git \
|
| 7 |
+
libgl1 \
|
| 8 |
+
libglib2.0-0 \
|
| 9 |
+
libsm6 \
|
| 10 |
+
libxext6 \
|
| 11 |
+
libxrender-dev \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Create app directory
|
| 15 |
+
WORKDIR /app
|
| 16 |
+
|
| 17 |
+
# Step 1: Install PyTorch FIRST (CPU-only to save space)
|
| 18 |
+
RUN pip install --no-cache-dir \
|
| 19 |
+
torch torchvision --index-url https://download.pytorch.org/whl/cpu
|
| 20 |
+
|
| 21 |
+
# Step 2: Install Detectron2 (needs torch already installed)
|
| 22 |
+
RUN pip install --no-cache-dir \
|
| 23 |
+
'git+https://github.com/facebookresearch/detectron2.git'
|
| 24 |
+
|
| 25 |
+
# Step 3: Install remaining dependencies
|
| 26 |
+
RUN pip install --no-cache-dir \
|
| 27 |
+
fastapi>=0.104.0 \
|
| 28 |
+
"uvicorn[standard]>=0.24.0" \
|
| 29 |
+
opencv-python-headless>=4.8.0 \
|
| 30 |
+
numpy>=1.24.0 \
|
| 31 |
+
Pillow>=10.0.0 \
|
| 32 |
+
requests>=2.31.0 \
|
| 33 |
+
huggingface_hub>=0.19.0 \
|
| 34 |
+
python-multipart>=0.0.6
|
| 35 |
+
|
| 36 |
+
# Step 4: V5.1 Pipeline โ MobileSAM + SigLIP
|
| 37 |
+
RUN pip install --no-cache-dir \
|
| 38 |
+
transformers>=4.37.0 \
|
| 39 |
+
timm>=0.9.0 \
|
| 40 |
+
'git+https://github.com/ChaoningZhang/MobileSAM.git'
|
| 41 |
+
|
| 42 |
+
# Copy app code
|
| 43 |
+
COPY . .
|
| 44 |
+
|
| 45 |
+
# Create a non-root user (HF Spaces requirement)
|
| 46 |
+
RUN useradd -m -u 1000 user
|
| 47 |
+
USER user
|
| 48 |
+
|
| 49 |
+
# Expose port (HF Spaces uses 7860)
|
| 50 |
+
EXPOSE 7860
|
| 51 |
+
|
| 52 |
+
# Start the server
|
| 53 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Building Detection API
|
| 3 |
+
emoji: ๐๏ธ
|
| 4 |
+
colorFrom: orange
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: true
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# ๐๏ธ Building Detection API
|
| 12 |
+
|
| 13 |
+
Detect buildings from satellite imagery using Mask R-CNN V5.
|
| 14 |
+
|
| 15 |
+
## API Endpoints
|
| 16 |
+
|
| 17 |
+
- `GET /` โ Health check + model info
|
| 18 |
+
- `GET /health` โ Health check
|
| 19 |
+
- `POST /detect` โ Detect buildings in a polygon area
|
| 20 |
+
|
| 21 |
+
## Usage
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
curl -X POST https://your-space.hf.space/detect \
|
| 25 |
+
-H "Content-Type: application/json" \
|
| 26 |
+
-d '{
|
| 27 |
+
"coordinates": [[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
|
| 28 |
+
"threshold": 0.3
|
| 29 |
+
}'
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Environment Variables
|
| 33 |
+
|
| 34 |
+
| Variable | Default | Description |
|
| 35 |
+
|----------|---------|-------------|
|
| 36 |
+
| `MODEL_REPO` | `yusef75/building-detection-models` | HF model repository |
|
| 37 |
+
| `MODEL_VERSION` | `v5` | Model version folder |
|
| 38 |
+
| `MODEL_FILENAME` | `model_final.pth` | Model file name |
|
| 39 |
+
| `SCORE_THRESHOLD` | `0.3` | Default detection threshold |
|
app.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Building Detection API โ FastAPI Backend for Hugging Face Spaces.
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
GET / โ Health check + model info
|
| 6 |
+
GET /health โ Health check
|
| 7 |
+
POST /detect โ Detect buildings in a polygon area
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from fastapi import FastAPI, HTTPException
|
| 11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
+
from pydantic import BaseModel, Field
|
| 13 |
+
from typing import List, Optional
|
| 14 |
+
import uvicorn
|
| 15 |
+
|
| 16 |
+
from model_manager import load_model, get_model_info
|
| 17 |
+
from inference import detect_buildings
|
| 18 |
+
|
| 19 |
+
# ==========================================
|
| 20 |
+
# === App Setup ===
|
| 21 |
+
# ==========================================
|
| 22 |
+
app = FastAPI(
|
| 23 |
+
title="๐๏ธ Building Detection API",
|
| 24 |
+
description="Detect buildings from satellite imagery using Mask R-CNN V5",
|
| 25 |
+
version="1.0.0",
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Allow CORS for Vercel frontend
|
| 29 |
+
app.add_middleware(
|
| 30 |
+
CORSMiddleware,
|
| 31 |
+
allow_origins=["*"], # In production, restrict to your Vercel domain
|
| 32 |
+
allow_credentials=True,
|
| 33 |
+
allow_methods=["*"],
|
| 34 |
+
allow_headers=["*"],
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# ==========================================
|
| 39 |
+
# === Request / Response Models ===
|
| 40 |
+
# ==========================================
|
| 41 |
+
class DetectRequest(BaseModel):
|
| 42 |
+
coordinates: List[List[float]] = Field(
|
| 43 |
+
...,
|
| 44 |
+
description="Polygon coordinates as [[lng, lat], ...] in GeoJSON format",
|
| 45 |
+
example=[[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
|
| 46 |
+
)
|
| 47 |
+
threshold: Optional[float] = Field(
|
| 48 |
+
default=0.5,
|
| 49 |
+
ge=0.1,
|
| 50 |
+
le=0.95,
|
| 51 |
+
description="Detection confidence threshold",
|
| 52 |
+
)
|
| 53 |
+
use_v51: Optional[bool] = Field(
|
| 54 |
+
default=True,
|
| 55 |
+
description="Enable V5.1 pipeline (MobileSAM + SigLIP) for better accuracy",
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class DetectResponse(BaseModel):
|
| 60 |
+
geojson: dict
|
| 61 |
+
stats: dict
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class HealthResponse(BaseModel):
|
| 65 |
+
status: str
|
| 66 |
+
model: dict
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ==========================================
|
| 70 |
+
# === Startup Event ===
|
| 71 |
+
# ==========================================
|
| 72 |
+
@app.on_event("startup")
|
| 73 |
+
async def startup():
|
| 74 |
+
"""Load model when the server starts."""
|
| 75 |
+
print("๐ Starting Building Detection API...")
|
| 76 |
+
load_model()
|
| 77 |
+
print("โ
API ready!")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# ==========================================
|
| 81 |
+
# === Endpoints ===
|
| 82 |
+
# ==========================================
|
| 83 |
+
@app.get("/", response_model=HealthResponse)
|
| 84 |
+
async def root():
|
| 85 |
+
"""Health check and model info."""
|
| 86 |
+
return {
|
| 87 |
+
"status": "๐ข online",
|
| 88 |
+
"model": get_model_info(),
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@app.get("/health", response_model=HealthResponse)
|
| 93 |
+
async def health():
|
| 94 |
+
"""Health check endpoint."""
|
| 95 |
+
return {
|
| 96 |
+
"status": "๐ข online",
|
| 97 |
+
"model": get_model_info(),
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
@app.post("/detect", response_model=DetectResponse)
|
| 102 |
+
async def detect(request: DetectRequest):
|
| 103 |
+
"""
|
| 104 |
+
Detect buildings in the specified polygon area.
|
| 105 |
+
|
| 106 |
+
Send polygon coordinates in GeoJSON format [[lng, lat], ...].
|
| 107 |
+
Returns a GeoJSON FeatureCollection with detected building polygons.
|
| 108 |
+
"""
|
| 109 |
+
try:
|
| 110 |
+
result = detect_buildings(
|
| 111 |
+
coordinates=request.coordinates,
|
| 112 |
+
threshold=request.threshold,
|
| 113 |
+
use_v51=request.use_v51,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
if "error" in result:
|
| 117 |
+
raise HTTPException(status_code=400, detail=result["error"])
|
| 118 |
+
|
| 119 |
+
return result
|
| 120 |
+
|
| 121 |
+
except HTTPException:
|
| 122 |
+
raise
|
| 123 |
+
except Exception as e:
|
| 124 |
+
raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
# ==========================================
|
| 128 |
+
# === Run ===
|
| 129 |
+
# ==========================================
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
inference.py
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Inference Engine โ Tile downloading + Building detection + Deduplication.
|
| 3 |
+
Adapted from MaskRCNN_V5_MapFlow.py for server deployment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import math
|
| 7 |
+
import time
|
| 8 |
+
import numpy as np
|
| 9 |
+
import cv2
|
| 10 |
+
import requests
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from io import BytesIO
|
| 13 |
+
from model_manager import get_predictor, set_threshold
|
| 14 |
+
from post_processor import run_v51_pipeline
|
| 15 |
+
|
| 16 |
+
# ==========================================
|
| 17 |
+
# === Constants ===
|
| 18 |
+
# ==========================================
|
| 19 |
+
ZOOM = 18
|
| 20 |
+
TILE_SIZE = 256
|
| 21 |
+
TILES_PER_IMG = 2
|
| 22 |
+
IMG_SIZE = 512
|
| 23 |
+
MAX_TILES = 60 # Safety limit
|
| 24 |
+
MIN_BUILDING_AREA = 200 # Min contour area in pixels (filters tiny false positives)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ==========================================
|
| 28 |
+
# === Coordinate Utils ===
|
| 29 |
+
# ==========================================
|
| 30 |
+
def lon_to_tile_x(lon):
|
| 31 |
+
return (lon + 180) / 360 * (2 ** ZOOM)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def lat_to_tile_y(lat):
|
| 35 |
+
lat_r = math.radians(lat)
|
| 36 |
+
return (1 - math.log(math.tan(lat_r) + 1 / math.cos(lat_r)) / math.pi) / 2 * (2 ** ZOOM)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def tile_x_to_lon(tx):
|
| 40 |
+
return tx / (2 ** ZOOM) * 360 - 180
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def tile_y_to_lat(ty):
|
| 44 |
+
n = math.pi - 2 * math.pi * ty / (2 ** ZOOM)
|
| 45 |
+
return math.degrees(math.atan(math.sinh(n)))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def pixel_to_geo(px, py, grid_x, grid_y):
|
| 49 |
+
tx = grid_x * TILES_PER_IMG + px / TILE_SIZE
|
| 50 |
+
ty = grid_y * TILES_PER_IMG + py / TILE_SIZE
|
| 51 |
+
return tile_x_to_lon(tx), tile_y_to_lat(ty)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ==========================================
|
| 55 |
+
# === Tile Downloading ===
|
| 56 |
+
# ==========================================
|
| 57 |
+
session = requests.Session()
|
| 58 |
+
session.headers.update({"User-Agent": "Mozilla/5.0"})
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def download_tile_512(grid_x, grid_y):
|
| 62 |
+
"""Download 2ร2 tiles to create a 512ร512 satellite image."""
|
| 63 |
+
img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
|
| 64 |
+
base_tx = grid_x * TILES_PER_IMG
|
| 65 |
+
base_ty = grid_y * TILES_PER_IMG
|
| 66 |
+
|
| 67 |
+
for dy in range(TILES_PER_IMG):
|
| 68 |
+
for dx in range(TILES_PER_IMG):
|
| 69 |
+
tx, ty = base_tx + dx, base_ty + dy
|
| 70 |
+
s = (tx + ty) % 4
|
| 71 |
+
url = f"https://mt{s}.google.com/vt/lyrs=s&x={tx}&y={ty}&z={ZOOM}"
|
| 72 |
+
try:
|
| 73 |
+
r = session.get(url, timeout=15)
|
| 74 |
+
tile = np.array(Image.open(BytesIO(r.content)).convert("RGB"))
|
| 75 |
+
img[dy * TILE_SIZE:(dy + 1) * TILE_SIZE,
|
| 76 |
+
dx * TILE_SIZE:(dx + 1) * TILE_SIZE] = tile
|
| 77 |
+
except Exception:
|
| 78 |
+
pass
|
| 79 |
+
return img
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ==========================================
|
| 83 |
+
# === Polygon โ Tiles ===
|
| 84 |
+
# ==========================================
|
| 85 |
+
def get_tiles_for_polygon(polygon_coords):
|
| 86 |
+
"""
|
| 87 |
+
Convert polygon coordinates to grid tile indices.
|
| 88 |
+
Input: list of [lat, lon] pairs.
|
| 89 |
+
Returns: list of (grid_x, grid_y) tuples and bounds.
|
| 90 |
+
"""
|
| 91 |
+
lats = [c[0] for c in polygon_coords]
|
| 92 |
+
lons = [c[1] for c in polygon_coords]
|
| 93 |
+
|
| 94 |
+
min_lat, max_lat = min(lats), max(lats)
|
| 95 |
+
min_lon, max_lon = min(lons), max(lons)
|
| 96 |
+
|
| 97 |
+
min_tx = lon_to_tile_x(min_lon)
|
| 98 |
+
max_tx = lon_to_tile_x(max_lon)
|
| 99 |
+
min_ty = lat_to_tile_y(max_lat)
|
| 100 |
+
max_ty = lat_to_tile_y(min_lat)
|
| 101 |
+
|
| 102 |
+
min_gx = int(min_tx) // TILES_PER_IMG
|
| 103 |
+
max_gx = int(max_tx) // TILES_PER_IMG
|
| 104 |
+
min_gy = int(min_ty) // TILES_PER_IMG
|
| 105 |
+
max_gy = int(max_ty) // TILES_PER_IMG
|
| 106 |
+
|
| 107 |
+
tiles = []
|
| 108 |
+
for gy in range(min_gy, max_gy + 1):
|
| 109 |
+
for gx in range(min_gx, max_gx + 1):
|
| 110 |
+
tiles.append((gx, gy))
|
| 111 |
+
|
| 112 |
+
return tiles, (min_lat, max_lat, min_lon, max_lon)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# ==========================================
|
| 116 |
+
# === Polygon Regularization ===
|
| 117 |
+
# ==========================================
|
| 118 |
+
def regularize_polygon(contour, rect):
|
| 119 |
+
"""
|
| 120 |
+
Regularize polygon edges by snapping to the building's dominant direction.
|
| 121 |
+
|
| 122 |
+
1. Get dominant angle from minAreaRect
|
| 123 |
+
2. Rotate polygon so dominant direction = horizontal
|
| 124 |
+
3. Snap nearly-horizontal edges โ exact horizontal
|
| 125 |
+
Snap nearly-vertical edges โ exact vertical
|
| 126 |
+
4. Rotate back
|
| 127 |
+
"""
|
| 128 |
+
points = contour.reshape(-1, 2).astype(float)
|
| 129 |
+
n = len(points)
|
| 130 |
+
if n < 4:
|
| 131 |
+
return contour
|
| 132 |
+
|
| 133 |
+
angle = rect[2]
|
| 134 |
+
angle_rad = math.radians(angle)
|
| 135 |
+
cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
|
| 136 |
+
|
| 137 |
+
center = np.mean(points, axis=0)
|
| 138 |
+
|
| 139 |
+
# Rotate to align dominant direction with horizontal axis
|
| 140 |
+
rotated = np.zeros_like(points)
|
| 141 |
+
for i, p in enumerate(points):
|
| 142 |
+
dx, dy = p[0] - center[0], p[1] - center[1]
|
| 143 |
+
rotated[i] = [dx * cos_a + dy * sin_a, -dx * sin_a + dy * cos_a]
|
| 144 |
+
|
| 145 |
+
# Snap edges within 15ยฐ of horizontal/vertical
|
| 146 |
+
SNAP_ANGLE = 15
|
| 147 |
+
for i in range(n):
|
| 148 |
+
j = (i + 1) % n
|
| 149 |
+
dx = rotated[j][0] - rotated[i][0]
|
| 150 |
+
dy = rotated[j][1] - rotated[i][1]
|
| 151 |
+
if abs(dx) < 1e-6 and abs(dy) < 1e-6:
|
| 152 |
+
continue
|
| 153 |
+
edge_angle = abs(math.degrees(math.atan2(abs(dy), abs(dx))))
|
| 154 |
+
|
| 155 |
+
if edge_angle < SNAP_ANGLE: # Nearly horizontal
|
| 156 |
+
rotated[j][1] = rotated[i][1]
|
| 157 |
+
elif edge_angle > (90 - SNAP_ANGLE): # Nearly vertical
|
| 158 |
+
rotated[j][0] = rotated[i][0]
|
| 159 |
+
|
| 160 |
+
# Rotate back
|
| 161 |
+
result = np.zeros_like(points)
|
| 162 |
+
for i, p in enumerate(rotated):
|
| 163 |
+
rx = p[0] * cos_a - p[1] * sin_a + center[0]
|
| 164 |
+
ry = p[0] * sin_a + p[1] * cos_a + center[1]
|
| 165 |
+
result[i] = [round(rx), round(ry)]
|
| 166 |
+
|
| 167 |
+
return result.astype(int)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# ==========================================
|
| 171 |
+
# === Mask โ GeoJSON (with regularization) ===
|
| 172 |
+
# ==========================================
|
| 173 |
+
def mask_to_geo_polygon(mask, grid_x, grid_y, score):
|
| 174 |
+
"""Convert a binary mask to a GeoJSON Feature with angle regularization."""
|
| 175 |
+
contours, _ = cv2.findContours(
|
| 176 |
+
mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 177 |
+
|
| 178 |
+
if not contours:
|
| 179 |
+
return None
|
| 180 |
+
|
| 181 |
+
contour = max(contours, key=cv2.contourArea)
|
| 182 |
+
if cv2.contourArea(contour) < MIN_BUILDING_AREA:
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
# Simplify the contour
|
| 186 |
+
epsilon = 0.008 * cv2.arcLength(contour, True)
|
| 187 |
+
approx = cv2.approxPolyDP(contour, epsilon, True)
|
| 188 |
+
if len(approx) < 3:
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
# Regularize angles (snap edges toward 90ยฐ)
|
| 192 |
+
rect = cv2.minAreaRect(contour)
|
| 193 |
+
if len(approx) >= 4:
|
| 194 |
+
pixel_points = regularize_polygon(approx, rect)
|
| 195 |
+
else:
|
| 196 |
+
pixel_points = approx.reshape(-1, 2)
|
| 197 |
+
|
| 198 |
+
# Convert pixel coordinates to geographic coordinates
|
| 199 |
+
geo_coords = []
|
| 200 |
+
for pt in pixel_points:
|
| 201 |
+
px, py = int(pt[0]), int(pt[1])
|
| 202 |
+
lon, lat = pixel_to_geo(px, py, grid_x, grid_y)
|
| 203 |
+
geo_coords.append([lon, lat])
|
| 204 |
+
geo_coords.append(geo_coords[0]) # Close polygon
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
"type": "Feature",
|
| 208 |
+
"properties": {"confidence": round(float(score), 3)},
|
| 209 |
+
"geometry": {"type": "Polygon", "coordinates": [geo_coords]},
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def polygon_area(coords):
|
| 214 |
+
"""Calculate area of a polygon using Shoelace formula."""
|
| 215 |
+
n = len(coords)
|
| 216 |
+
if n < 3:
|
| 217 |
+
return 0
|
| 218 |
+
area = 0
|
| 219 |
+
for i in range(n):
|
| 220 |
+
j = (i + 1) % n
|
| 221 |
+
area += coords[i][0] * coords[j][1]
|
| 222 |
+
area -= coords[j][0] * coords[i][1]
|
| 223 |
+
return abs(area) / 2
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def bboxes_overlap(coords1, coords2):
|
| 227 |
+
"""Check if bounding boxes of two polygons overlap."""
|
| 228 |
+
xs1 = [c[0] for c in coords1]
|
| 229 |
+
ys1 = [c[1] for c in coords1]
|
| 230 |
+
xs2 = [c[0] for c in coords2]
|
| 231 |
+
ys2 = [c[1] for c in coords2]
|
| 232 |
+
|
| 233 |
+
return not (max(xs1) < min(xs2) or max(xs2) < min(xs1) or
|
| 234 |
+
max(ys1) < min(ys2) or max(ys2) < min(ys1))
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def deduplicate_buildings(features, distance_threshold=0.0003):
|
| 238 |
+
"""
|
| 239 |
+
Remove duplicate buildings from overlapping tiles.
|
| 240 |
+
|
| 241 |
+
Uses centroid distance + area similarity + bbox overlap.
|
| 242 |
+
distance_threshold โ 30 meters at the equator.
|
| 243 |
+
"""
|
| 244 |
+
if not features:
|
| 245 |
+
return features
|
| 246 |
+
|
| 247 |
+
# Pre-compute centroids and areas
|
| 248 |
+
centroids = []
|
| 249 |
+
areas = []
|
| 250 |
+
for f in features:
|
| 251 |
+
coords = f["geometry"]["coordinates"][0]
|
| 252 |
+
cx = np.mean([c[0] for c in coords])
|
| 253 |
+
cy = np.mean([c[1] for c in coords])
|
| 254 |
+
centroids.append((cx, cy))
|
| 255 |
+
areas.append(polygon_area(coords))
|
| 256 |
+
|
| 257 |
+
# Sort by confidence (keep higher confidence ones)
|
| 258 |
+
indices = sorted(
|
| 259 |
+
range(len(features)),
|
| 260 |
+
key=lambda i: features[i]["properties"]["confidence"],
|
| 261 |
+
reverse=True,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
keep = []
|
| 265 |
+
removed = set()
|
| 266 |
+
|
| 267 |
+
for i in indices:
|
| 268 |
+
if i in removed:
|
| 269 |
+
continue
|
| 270 |
+
keep.append(i)
|
| 271 |
+
cx1, cy1 = centroids[i]
|
| 272 |
+
area1 = areas[i]
|
| 273 |
+
coords1 = features[i]["geometry"]["coordinates"][0]
|
| 274 |
+
|
| 275 |
+
for j in indices:
|
| 276 |
+
if j in removed or j == i or j in set(keep):
|
| 277 |
+
continue
|
| 278 |
+
cx2, cy2 = centroids[j]
|
| 279 |
+
area2 = areas[j]
|
| 280 |
+
|
| 281 |
+
# Quick centroid distance check
|
| 282 |
+
dist = math.sqrt((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2)
|
| 283 |
+
if dist > distance_threshold:
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
# Area similarity check (within 3x of each other)
|
| 287 |
+
if area1 > 0 and area2 > 0:
|
| 288 |
+
ratio = max(area1, area2) / min(area1, area2)
|
| 289 |
+
if ratio > 2.0:
|
| 290 |
+
continue # Very different sizes โ probably different buildings
|
| 291 |
+
|
| 292 |
+
# Bounding box overlap check
|
| 293 |
+
coords2 = features[j]["geometry"]["coordinates"][0]
|
| 294 |
+
if bboxes_overlap(coords1, coords2):
|
| 295 |
+
removed.add(j)
|
| 296 |
+
|
| 297 |
+
return [features[i] for i in keep]
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
# ==========================================
|
| 301 |
+
# === Point-in-Polygon Test ===
|
| 302 |
+
# ==========================================
|
| 303 |
+
def point_in_polygon(px, py, polygon):
|
| 304 |
+
"""
|
| 305 |
+
Ray casting algorithm to check if point (px, py) is inside polygon.
|
| 306 |
+
polygon: list of [x, y] pairs.
|
| 307 |
+
"""
|
| 308 |
+
n = len(polygon)
|
| 309 |
+
inside = False
|
| 310 |
+
j = n - 1
|
| 311 |
+
for i in range(n):
|
| 312 |
+
xi, yi = polygon[i]
|
| 313 |
+
xj, yj = polygon[j]
|
| 314 |
+
if ((yi > py) != (yj > py)) and (px < (xj - xi) * (py - yi) / (yj - yi) + xi):
|
| 315 |
+
inside = not inside
|
| 316 |
+
j = i
|
| 317 |
+
return inside
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
# ==========================================
|
| 321 |
+
# === Main Processing Function ===
|
| 322 |
+
# ==========================================
|
| 323 |
+
def detect_buildings(coordinates, threshold=0.5, use_v51=False):
|
| 324 |
+
"""
|
| 325 |
+
Process a polygon area and detect buildings.
|
| 326 |
+
|
| 327 |
+
Args:
|
| 328 |
+
coordinates: list of [lng, lat] pairs (GeoJSON format)
|
| 329 |
+
threshold: detection confidence threshold
|
| 330 |
+
|
| 331 |
+
Returns:
|
| 332 |
+
dict with GeoJSON FeatureCollection + stats
|
| 333 |
+
"""
|
| 334 |
+
# Convert from GeoJSON [lng, lat] to [lat, lng]
|
| 335 |
+
coords = []
|
| 336 |
+
for point in coordinates:
|
| 337 |
+
if isinstance(point, list) and len(point) == 2:
|
| 338 |
+
coords.append([point[1], point[0]])
|
| 339 |
+
|
| 340 |
+
if len(coords) < 3:
|
| 341 |
+
return {"error": "Need at least 3 points to form a polygon"}
|
| 342 |
+
|
| 343 |
+
# Build user polygon in [lng, lat] format for clipping
|
| 344 |
+
user_polygon = [[c[1], c[0]] for c in coords] # [lng, lat]
|
| 345 |
+
|
| 346 |
+
predictor = get_predictor()
|
| 347 |
+
|
| 348 |
+
# Get tiles
|
| 349 |
+
tiles, bounds = get_tiles_for_polygon(coords)
|
| 350 |
+
n_tiles = len(tiles)
|
| 351 |
+
|
| 352 |
+
if n_tiles > MAX_TILES:
|
| 353 |
+
return {
|
| 354 |
+
"error": f"Area too large! {n_tiles} tiles needed, max is {MAX_TILES}. Draw a smaller polygon.",
|
| 355 |
+
"tiles_needed": n_tiles,
|
| 356 |
+
"max_tiles": MAX_TILES,
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
# Process tiles
|
| 360 |
+
all_features = []
|
| 361 |
+
start_time = time.time()
|
| 362 |
+
|
| 363 |
+
for idx, (gx, gy) in enumerate(tiles):
|
| 364 |
+
img = download_tile_512(gx, gy)
|
| 365 |
+
|
| 366 |
+
# Skip dark/empty tiles
|
| 367 |
+
if np.mean(img) < 10:
|
| 368 |
+
continue
|
| 369 |
+
|
| 370 |
+
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 371 |
+
outputs = predictor(img_bgr)
|
| 372 |
+
instances = outputs["instances"].to("cpu")
|
| 373 |
+
|
| 374 |
+
if len(instances) == 0:
|
| 375 |
+
continue
|
| 376 |
+
|
| 377 |
+
raw_masks = instances.pred_masks.numpy()
|
| 378 |
+
raw_scores = instances.scores.numpy()
|
| 379 |
+
|
| 380 |
+
# โโ V5.1 Pipeline (optional) โโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 381 |
+
if use_v51:
|
| 382 |
+
# Pre-filter by confidence first (faster)
|
| 383 |
+
conf_masks = [m for m, s in zip(raw_masks, raw_scores) if float(s) >= threshold]
|
| 384 |
+
conf_scores = [float(s) for s in raw_scores if float(s) >= threshold]
|
| 385 |
+
|
| 386 |
+
if conf_masks:
|
| 387 |
+
print(f" [V5.1] Tile {idx+1}/{len(tiles)}: {len(conf_masks)} masks โ pipeline...")
|
| 388 |
+
v51_results = run_v51_pipeline(
|
| 389 |
+
image_rgb=img,
|
| 390 |
+
v5_masks=conf_masks,
|
| 391 |
+
v5_scores=conf_scores,
|
| 392 |
+
use_sam=True,
|
| 393 |
+
use_siglip=True,
|
| 394 |
+
)
|
| 395 |
+
for res in v51_results:
|
| 396 |
+
feature = mask_to_geo_polygon(res["mask"], gx, gy, res["score"])
|
| 397 |
+
if feature:
|
| 398 |
+
feature["properties"]["area_m2"] = res["area_m2"]
|
| 399 |
+
all_features.append(feature)
|
| 400 |
+
|
| 401 |
+
# โโ V5 Original Pipeline โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 402 |
+
else:
|
| 403 |
+
for mask, score in zip(raw_masks, raw_scores):
|
| 404 |
+
if float(score) < threshold:
|
| 405 |
+
continue
|
| 406 |
+
feature = mask_to_geo_polygon(mask, gx, gy, score)
|
| 407 |
+
if feature:
|
| 408 |
+
all_features.append(feature)
|
| 409 |
+
|
| 410 |
+
# Clip to user polygon โ only keep buildings whose centroid is inside
|
| 411 |
+
clipped_features = []
|
| 412 |
+
for f in all_features:
|
| 413 |
+
poly_coords = f["geometry"]["coordinates"][0]
|
| 414 |
+
cx = np.mean([c[0] for c in poly_coords]) # lng
|
| 415 |
+
cy = np.mean([c[1] for c in poly_coords]) # lat
|
| 416 |
+
if point_in_polygon(cx, cy, user_polygon):
|
| 417 |
+
clipped_features.append(f)
|
| 418 |
+
|
| 419 |
+
all_features = clipped_features
|
| 420 |
+
|
| 421 |
+
# Deduplicate
|
| 422 |
+
before_dedup = len(all_features)
|
| 423 |
+
all_features = deduplicate_buildings(all_features)
|
| 424 |
+
after_dedup = len(all_features)
|
| 425 |
+
elapsed = time.time() - start_time
|
| 426 |
+
|
| 427 |
+
# Build response
|
| 428 |
+
geojson = {
|
| 429 |
+
"type": "FeatureCollection",
|
| 430 |
+
"features": all_features,
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
stats = {
|
| 434 |
+
"buildings_detected": after_dedup,
|
| 435 |
+
"duplicates_removed": before_dedup - after_dedup,
|
| 436 |
+
"tiles_processed": n_tiles,
|
| 437 |
+
"processing_time_seconds": round(elapsed, 1),
|
| 438 |
+
"threshold": threshold,
|
| 439 |
+
"bounds": {
|
| 440 |
+
"min_lat": bounds[0],
|
| 441 |
+
"max_lat": bounds[1],
|
| 442 |
+
"min_lon": bounds[2],
|
| 443 |
+
"max_lon": bounds[3],
|
| 444 |
+
},
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
return {"geojson": geojson, "stats": stats}
|
model_manager.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Model Manager โ Easy version swapping for Building Detection models.
|
| 3 |
+
|
| 4 |
+
To swap models:
|
| 5 |
+
1. Upload new model to HF repo (e.g., v6/model_final.pth)
|
| 6 |
+
2. Set MODEL_VERSION env var to "v6"
|
| 7 |
+
3. Restart the Space
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import torch
|
| 12 |
+
from detectron2.config import get_cfg
|
| 13 |
+
from detectron2 import model_zoo
|
| 14 |
+
from detectron2.engine import DefaultPredictor
|
| 15 |
+
from huggingface_hub import hf_hub_download
|
| 16 |
+
|
| 17 |
+
# ==========================================
|
| 18 |
+
# === Configuration ===
|
| 19 |
+
# ==========================================
|
| 20 |
+
MODEL_REPO = os.environ.get("MODEL_REPO", "yusef75/building-detection-models")
|
| 21 |
+
MODEL_VERSION = os.environ.get("MODEL_VERSION", "v5")
|
| 22 |
+
MODEL_FILENAME = os.environ.get("MODEL_FILENAME", "model_final.pth")
|
| 23 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 24 |
+
SCORE_THRESHOLD = float(os.environ.get("SCORE_THRESHOLD", "0.3"))
|
| 25 |
+
|
| 26 |
+
# Global predictor
|
| 27 |
+
_predictor = None
|
| 28 |
+
_model_info = {}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_model():
|
| 32 |
+
"""Load model from Hugging Face Hub. Called once at startup."""
|
| 33 |
+
global _predictor, _model_info
|
| 34 |
+
|
| 35 |
+
print(f"๐ Loading model: {MODEL_REPO} / {MODEL_VERSION} / {MODEL_FILENAME}")
|
| 36 |
+
print(f"๐ฅ๏ธ Device: {DEVICE}")
|
| 37 |
+
|
| 38 |
+
# Download model from HF Hub
|
| 39 |
+
model_path = hf_hub_download(
|
| 40 |
+
repo_id=MODEL_REPO,
|
| 41 |
+
filename=f"{MODEL_VERSION}/{MODEL_FILENAME}",
|
| 42 |
+
cache_dir="/tmp/models",
|
| 43 |
+
)
|
| 44 |
+
print(f"โ
Model downloaded to: {model_path}")
|
| 45 |
+
|
| 46 |
+
# Configure Detectron2
|
| 47 |
+
cfg = get_cfg()
|
| 48 |
+
cfg.merge_from_file(model_zoo.get_config_file(
|
| 49 |
+
"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
|
| 50 |
+
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
|
| 51 |
+
cfg.MODEL.WEIGHTS = model_path
|
| 52 |
+
cfg.MODEL.DEVICE = DEVICE
|
| 53 |
+
cfg.INPUT.MIN_SIZE_TEST = 512
|
| 54 |
+
cfg.INPUT.MAX_SIZE_TEST = 512
|
| 55 |
+
|
| 56 |
+
# === Detection quality settings ===
|
| 57 |
+
# Low base threshold โ actual filtering happens in inference.py
|
| 58 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
|
| 59 |
+
|
| 60 |
+
# NMS: Aggressively remove overlapping detections (lower = stricter)
|
| 61 |
+
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3
|
| 62 |
+
|
| 63 |
+
# Max detections per image (fewer = less overlap)
|
| 64 |
+
cfg.TEST.DETECTIONS_PER_IMAGE = 200
|
| 65 |
+
|
| 66 |
+
_predictor = DefaultPredictor(cfg)
|
| 67 |
+
_model_info = {
|
| 68 |
+
"version": MODEL_VERSION,
|
| 69 |
+
"repo": MODEL_REPO,
|
| 70 |
+
"device": DEVICE,
|
| 71 |
+
"threshold": SCORE_THRESHOLD,
|
| 72 |
+
}
|
| 73 |
+
print(f"๐ Model {MODEL_VERSION} loaded on {DEVICE}!")
|
| 74 |
+
return _predictor
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def get_predictor():
|
| 78 |
+
"""Get the loaded predictor. Loads model if not loaded yet."""
|
| 79 |
+
global _predictor
|
| 80 |
+
if _predictor is None:
|
| 81 |
+
load_model()
|
| 82 |
+
return _predictor
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def get_model_info():
|
| 86 |
+
"""Get info about the currently loaded model."""
|
| 87 |
+
return _model_info
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def set_threshold(threshold: float):
|
| 91 |
+
"""Update the detection threshold dynamically."""
|
| 92 |
+
global _predictor
|
| 93 |
+
if _predictor is not None:
|
| 94 |
+
_predictor.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
|
post_processor.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
V5.1 Post-Processing Pipeline โ "The Hybrid Eye"
|
| 3 |
+
=================================================
|
| 4 |
+
ูุดุชุบู ุจุนุฏ V5 ู
ุจุงุดุฑุฉ ุจุฏูู ุฃู ุชุฏุฑูุจ ุฌุฏูุฏ.
|
| 5 |
+
|
| 6 |
+
Pipeline:
|
| 7 |
+
1. V5 (Hunter) โ masks ุฃูููุฉ
|
| 8 |
+
2. MobileSAM โ ููุทุน ุงููุชู ุงูู
ุชูุงุตูุฉ ูู sub-masks
|
| 9 |
+
3. SigLIP โ Zero-Shot: building vs non-building
|
| 10 |
+
4. Geometric Rules โ area + shape filter + area_m2
|
| 11 |
+
|
| 12 |
+
ุงูุชุซุจูุช:
|
| 13 |
+
pip install git+https://github.com/ChaoningZhang/MobileSAM.git
|
| 14 |
+
pip install transformers torch
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import math
|
| 18 |
+
import numpy as np
|
| 19 |
+
import cv2
|
| 20 |
+
import torch
|
| 21 |
+
from PIL import Image
|
| 22 |
+
|
| 23 |
+
# ============================================================
|
| 24 |
+
# === ุชุญู
ูู ุงูู
ูุฏููุงุช (ู
ุฑุฉ ูุงุญุฏุฉ) ===
|
| 25 |
+
# ============================================================
|
| 26 |
+
|
| 27 |
+
_mobile_sam = None
|
| 28 |
+
_sam_predictor = None
|
| 29 |
+
_siglip_model = None
|
| 30 |
+
_siglip_processor = None
|
| 31 |
+
|
| 32 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_mobile_sam():
|
| 36 |
+
"""ุชุญู
ูู MobileSAM (5.78MB ููุท โ ุฎููู ุฌุฏุงู)."""
|
| 37 |
+
global _mobile_sam, _sam_predictor
|
| 38 |
+
if _sam_predictor is not None:
|
| 39 |
+
return _sam_predictor
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
from mobile_sam import sam_model_registry, SamPredictor
|
| 43 |
+
from huggingface_hub import hf_hub_download
|
| 44 |
+
|
| 45 |
+
print("๐ฅ ุชุญู
ูู MobileSAM...")
|
| 46 |
+
ckpt = hf_hub_download(
|
| 47 |
+
repo_id="dhkim2810/MobileSAM",
|
| 48 |
+
filename="mobile_sam.pt",
|
| 49 |
+
)
|
| 50 |
+
_mobile_sam = sam_model_registry["vit_t"](checkpoint=ckpt)
|
| 51 |
+
_mobile_sam.to(DEVICE).eval()
|
| 52 |
+
_sam_predictor = SamPredictor(_mobile_sam)
|
| 53 |
+
print("โ
MobileSAM ุฌุงูุฒ!")
|
| 54 |
+
return _sam_predictor
|
| 55 |
+
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"โ ๏ธ MobileSAM ู
ุด ู
ุชุงุญ: {e}")
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def load_siglip():
|
| 62 |
+
"""ุชุญู
ูู SigLIP ููู Zero-Shot material classification."""
|
| 63 |
+
global _siglip_model, _siglip_processor
|
| 64 |
+
if _siglip_model is not None:
|
| 65 |
+
return _siglip_model, _siglip_processor
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
from transformers import SiglipProcessor, SiglipModel
|
| 69 |
+
|
| 70 |
+
print("๐ฅ ุชุญู
ูู SigLIP...")
|
| 71 |
+
model_id = "google/siglip-base-patch16-224"
|
| 72 |
+
_siglip_processor = SiglipProcessor.from_pretrained(model_id)
|
| 73 |
+
_siglip_model = SiglipModel.from_pretrained(
|
| 74 |
+
model_id,
|
| 75 |
+
torch_dtype=torch.float32, # CPU โ float32 ุฏุงูู
ุงู
|
| 76 |
+
).to(DEVICE).eval()
|
| 77 |
+
print("โ
SigLIP ุฌุงูุฒ!")
|
| 78 |
+
return _siglip_model, _siglip_processor
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"โ ๏ธ SigLIP ู
ุด ู
ุชุงุญ: {e}")
|
| 82 |
+
return None, None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# ============================================================
|
| 86 |
+
# === STEP 1: MobileSAM โ Surgical Cutting ===
|
| 87 |
+
# ============================================================
|
| 88 |
+
|
| 89 |
+
def split_mask_with_sam(image_rgb: np.ndarray, mask: np.ndarray, predictor) -> list:
|
| 90 |
+
"""
|
| 91 |
+
ุจูุงุฎุฏ mask ูุงุญุฏ (ู
ู
ูู ูููู ููู 4 ุจููุช) ูููุทุนู ูู sub-masks.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
image_rgb: ุตูุฑุฉ ูุงู
ูุฉ (H, W, 3)
|
| 95 |
+
mask: binary mask (H, W) ู
ู V5
|
| 96 |
+
predictor: SamPredictor instance
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
list of binary masks โ ูู mask ู
ุจูู ููุญุฏู
|
| 100 |
+
"""
|
| 101 |
+
if predictor is None:
|
| 102 |
+
return [mask] # fallback: ุฑุฌูุน ุงูู mask ุงูุฃุตูู
|
| 103 |
+
|
| 104 |
+
# ูู ุงูู mask ุตุบูุฑ (ู
ุจูู ูุงุญุฏ) โ ู
ุด ู
ุญุชุงุฌ ูุทุน
|
| 105 |
+
area = mask.sum()
|
| 106 |
+
if area < 2000: # ~45ร45 pixels โ ู
ุจูู ูุงุญุฏ ุนูู ุงูุฃุฑุฌุญ
|
| 107 |
+
return [mask]
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
# ุฌููุฒ ุงูุตูุฑุฉ ููู SAM
|
| 111 |
+
predictor.set_image(image_rgb)
|
| 112 |
+
|
| 113 |
+
# ุงุณุชุฎุฏู
ุงูู bounding box ุจุชุงุน ุงูู mask ูู Prompt
|
| 114 |
+
ys, xs = np.where(mask)
|
| 115 |
+
x1, x2 = xs.min(), xs.max()
|
| 116 |
+
y1, y2 = ys.min(), ys.max()
|
| 117 |
+
box = np.array([x1, y1, x2, y2])
|
| 118 |
+
|
| 119 |
+
# ุงุทูุจ ู
ู SAM ููุทุน
|
| 120 |
+
masks_out, scores, _ = predictor.predict(
|
| 121 |
+
box=box,
|
| 122 |
+
multimask_output=True, # <-- ุงุทูุจ ุฃูุชุฑ ู
ู ุงูุชุฑุงุญ
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# ููุชุฑ ุงูู sub-masks ุงููู ู
ูุทููุฉ (ุฌูู ุงูู mask ุงูุฃุตูู)
|
| 126 |
+
valid_masks = []
|
| 127 |
+
for sub_mask in masks_out:
|
| 128 |
+
# ุงูู sub-mask ูุงุฒู
ูุชุฏุงุฎู ู
ุน ุงูู mask ุงูุฃุตูู
|
| 129 |
+
overlap = (sub_mask & mask.astype(bool)).sum()
|
| 130 |
+
if overlap > 200: # ุนูู ุงูุฃูู 200 pixel ู
ุดุชุฑูุฉ
|
| 131 |
+
valid_masks.append(sub_mask.astype(np.uint8))
|
| 132 |
+
|
| 133 |
+
return valid_masks if valid_masks else [mask]
|
| 134 |
+
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"โ ๏ธ SAM splitter error: {e}")
|
| 137 |
+
return [mask]
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# ============================================================
|
| 141 |
+
# === STEP 2: SigLIP โ Zero-Shot Material Check ===
|
| 142 |
+
# ============================================================
|
| 143 |
+
|
| 144 |
+
# ูุตูุต ุงูู
ูุงุฑูุฉ โ ุจุฏูู ุชุฏุฑูุจ
|
| 145 |
+
BUILDING_TEXTS = [
|
| 146 |
+
"a satellite view of a building rooftop",
|
| 147 |
+
"concrete roof of a building seen from above",
|
| 148 |
+
"residential building viewed from satellite",
|
| 149 |
+
"rooftop of a house or apartment building",
|
| 150 |
+
]
|
| 151 |
+
|
| 152 |
+
NON_BUILDING_TEXTS = [
|
| 153 |
+
"farmland and agricultural fields from above",
|
| 154 |
+
"green vegetation and trees from satellite",
|
| 155 |
+
"water surface river or lake from above",
|
| 156 |
+
"empty desert or bare soil from satellite",
|
| 157 |
+
"road or highway seen from above",
|
| 158 |
+
"swimming pool seen from satellite",
|
| 159 |
+
]
|
| 160 |
+
|
| 161 |
+
ALL_TEXTS = BUILDING_TEXTS + NON_BUILDING_TEXTS
|
| 162 |
+
NUM_BUILDING = len(BUILDING_TEXTS)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
@torch.no_grad()
|
| 166 |
+
def is_building_siglip(
|
| 167 |
+
image_rgb: np.ndarray,
|
| 168 |
+
mask: np.ndarray,
|
| 169 |
+
model,
|
| 170 |
+
processor,
|
| 171 |
+
threshold: float = 0.4,
|
| 172 |
+
) -> bool:
|
| 173 |
+
"""
|
| 174 |
+
ุจูุณุชุฎุฏู
SigLIP Zero-Shot ุนุดุงู ูุชุฃูุฏ ุฅู ุงูู mask ุฏู ูุนูุงู ู
ุจูู.
|
| 175 |
+
|
| 176 |
+
Returns True ูู ู
ุจููุ False ูู ูุง (ูุชุญุฐู).
|
| 177 |
+
"""
|
| 178 |
+
if model is None:
|
| 179 |
+
return True # fallback: ุงูุจู ูู ุญุงุฌุฉ ูู SigLIP ู
ุด ุดุบุงู
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
# Crop ุงูู bounding box ู
ู ุงูุตูุฑุฉ
|
| 183 |
+
ys, xs = np.where(mask)
|
| 184 |
+
if len(ys) == 0:
|
| 185 |
+
return False
|
| 186 |
+
x1, x2 = max(0, xs.min() - 5), min(image_rgb.shape[1], xs.max() + 5)
|
| 187 |
+
y1, y2 = max(0, ys.min() - 5), min(image_rgb.shape[0], ys.max() + 5)
|
| 188 |
+
crop = image_rgb[y1:y2, x1:x2]
|
| 189 |
+
|
| 190 |
+
if crop.size == 0:
|
| 191 |
+
return False
|
| 192 |
+
|
| 193 |
+
pil_crop = Image.fromarray(crop)
|
| 194 |
+
|
| 195 |
+
# ุฌููุฒ ุงูู inputs
|
| 196 |
+
inputs = processor(
|
| 197 |
+
text=ALL_TEXTS,
|
| 198 |
+
images=[pil_crop],
|
| 199 |
+
return_tensors="pt",
|
| 200 |
+
padding="max_length",
|
| 201 |
+
)
|
| 202 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 203 |
+
if DEVICE == "cuda":
|
| 204 |
+
inputs["pixel_values"] = inputs["pixel_values"].half()
|
| 205 |
+
|
| 206 |
+
# ุงุญุณุจ ุงูู similarity scores
|
| 207 |
+
outputs = model(**inputs)
|
| 208 |
+
logits = outputs.logits_per_image[0] # (num_texts,)
|
| 209 |
+
probs = torch.softmax(logits, dim=0).cpu().float().numpy()
|
| 210 |
+
|
| 211 |
+
# ู
ุฌู
ูุน probability ุงูู building texts
|
| 212 |
+
building_score = probs[:NUM_BUILDING].sum()
|
| 213 |
+
non_building_score = probs[NUM_BUILDING:].sum()
|
| 214 |
+
|
| 215 |
+
return building_score > threshold
|
| 216 |
+
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f"โ ๏ธ SigLIP check error: {e}")
|
| 219 |
+
return True # fallback: ุงูุจู
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
# ============================================================
|
| 223 |
+
# === STEP 3: Geometric Rules ===
|
| 224 |
+
# ============================================================
|
| 225 |
+
|
| 226 |
+
def _mask_area_m2(mask, pixel_size_m=0.597):
|
| 227 |
+
"""
|
| 228 |
+
ุชุญููู ุนุฏุฏ pixels ูู ู
ุชุฑ ู
ุฑุจุน.
|
| 229 |
+
pixel_size_m = ุญุฌู
ุงูุจููุณู ุนูุฏ Zoom 18 (~0.6 ู
ุชุฑ)
|
| 230 |
+
"""
|
| 231 |
+
return mask.sum() * (pixel_size_m ** 2)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def _aspect_ratio(mask):
|
| 235 |
+
"""ูุณุจุฉ ุงูุทูู ููุนุฑุถ โ ูู > 10 ูุงูุดูู ุบุฑูุจ ุฌุฏุงู."""
|
| 236 |
+
ys, xs = np.where(mask)
|
| 237 |
+
if len(ys) == 0:
|
| 238 |
+
return 1.0
|
| 239 |
+
h = ys.max() - ys.min() + 1
|
| 240 |
+
w = xs.max() - xs.min() + 1
|
| 241 |
+
return max(h, w) / max(min(h, w), 1)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def apply_geometric_rules(masks: list, min_area_m2=20.0, max_area_m2=15000.0, max_aspect=10.0):
|
| 245 |
+
"""
|
| 246 |
+
ูููุชุฑ ุงูู masks ุจููุงุนุฏ ููุฏุณูุฉ:
|
| 247 |
+
|
| 248 |
+
- ู
ุณุงุญุฉ < 20 ู
ยฒ โ ุงุญุฐู (noise)
|
| 249 |
+
- ู
ุณุงุญุฉ > 15,000 ู
ยฒ โ ุญุฐูุฑ (probably wrong)
|
| 250 |
+
- aspect ratio > 10 โ ุงุญุฐู (ุดูู ุบุฑูุจ ู
ุด ู
ุจูู)
|
| 251 |
+
|
| 252 |
+
Returns: list of (mask, area_m2) tuples
|
| 253 |
+
"""
|
| 254 |
+
result = []
|
| 255 |
+
for mask in masks:
|
| 256 |
+
area = _mask_area_m2(mask)
|
| 257 |
+
if area < min_area_m2:
|
| 258 |
+
continue
|
| 259 |
+
if _aspect_ratio(mask) > max_aspect:
|
| 260 |
+
continue
|
| 261 |
+
result.append((mask, round(area, 1)))
|
| 262 |
+
return result
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
# ============================================================
|
| 266 |
+
# === MAIN: run_v51_pipeline ===
|
| 267 |
+
# ============================================================
|
| 268 |
+
|
| 269 |
+
def run_v51_pipeline(
|
| 270 |
+
image_rgb: np.ndarray,
|
| 271 |
+
v5_masks: list,
|
| 272 |
+
v5_scores: list,
|
| 273 |
+
use_sam: bool = True,
|
| 274 |
+
use_siglip: bool = True,
|
| 275 |
+
siglip_threshold: float = 0.4,
|
| 276 |
+
) -> list:
|
| 277 |
+
"""
|
| 278 |
+
ุงูู Pipeline ุงููุงู
ู ูู V5.1.
|
| 279 |
+
|
| 280 |
+
Args:
|
| 281 |
+
image_rgb: ุงูุตูุฑุฉ ูู numpy array (H, W, 3)
|
| 282 |
+
v5_masks: list of binary masks ู
ู V5
|
| 283 |
+
v5_scores: list of confidence scores ู
ู V5
|
| 284 |
+
use_sam: ุชูุนูู MobileSAM splitting
|
| 285 |
+
use_siglip: ุชูุนูู SigLIP material check
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
list of dicts: [{"mask": np.array, "score": float, "area_m2": float}]
|
| 289 |
+
"""
|
| 290 |
+
# ุชุญู
ูู ุงูู
ูุฏููุงุช
|
| 291 |
+
sam_predictor = load_mobile_sam() if use_sam else None
|
| 292 |
+
siglip_model, siglip_proc = load_siglip() if use_siglip else (None, None)
|
| 293 |
+
|
| 294 |
+
all_masks = []
|
| 295 |
+
all_scores = []
|
| 296 |
+
|
| 297 |
+
# โโ STEP 1: MobileSAM Splitting โโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 298 |
+
for mask, score in zip(v5_masks, v5_scores):
|
| 299 |
+
sub_masks = split_mask_with_sam(image_rgb, mask, sam_predictor)
|
| 300 |
+
all_masks.extend(sub_masks)
|
| 301 |
+
all_scores.extend([score] * len(sub_masks))
|
| 302 |
+
|
| 303 |
+
print(f" SAM: {len(v5_masks)} โ {len(all_masks)} masks")
|
| 304 |
+
|
| 305 |
+
# โโ STEP 2: SigLIP Material Check โโโโโโโโโโโโโโโโโโโโโโโ
|
| 306 |
+
if use_siglip and siglip_model is not None:
|
| 307 |
+
filtered_masks = []
|
| 308 |
+
filtered_scores = []
|
| 309 |
+
removed = 0
|
| 310 |
+
for mask, score in zip(all_masks, all_scores):
|
| 311 |
+
if is_building_siglip(image_rgb, mask, siglip_model, siglip_proc, siglip_threshold):
|
| 312 |
+
filtered_masks.append(mask)
|
| 313 |
+
filtered_scores.append(score)
|
| 314 |
+
else:
|
| 315 |
+
removed += 1
|
| 316 |
+
print(f" SigLIP: ุญุฐู {removed} ุบูุฑ ู
ุจุงูู")
|
| 317 |
+
all_masks, all_scores = filtered_masks, filtered_scores
|
| 318 |
+
|
| 319 |
+
# โโ STEP 3: Geometric Rules โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 320 |
+
geo_filtered = apply_geometric_rules(all_masks)
|
| 321 |
+
print(f" Geometric: {len(all_masks)} โ {len(geo_filtered)} masks")
|
| 322 |
+
|
| 323 |
+
# โโ Build result โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 324 |
+
results = []
|
| 325 |
+
for i, (mask, area_m2) in enumerate(geo_filtered):
|
| 326 |
+
score = all_scores[i] if i < len(all_scores) else 0.5
|
| 327 |
+
results.append({
|
| 328 |
+
"mask": mask,
|
| 329 |
+
"score": score,
|
| 330 |
+
"area_m2": area_m2,
|
| 331 |
+
})
|
| 332 |
+
|
| 333 |
+
return results
|