Spaces:

yusef75
/

building-detection

Sleeping

App Files Files Community

yusef commited on Feb 26

Commit

df64c50

0 Parent(s):

Initial commit - V5.1 API

Browse files

Files changed (6) hide show

Dockerfile +53 -0
README.md +39 -0
app.py +131 -0
inference.py +447 -0
model_manager.py +94 -0
post_processor.py +333 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,53 @@

+FROM python:3.10-slim
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Create app directory
+WORKDIR /app
+# Step 1: Install PyTorch FIRST (CPU-only to save space)
+RUN pip install --no-cache-dir \
+    torch torchvision --index-url https://download.pytorch.org/whl/cpu
+# Step 2: Install Detectron2 (needs torch already installed)
+RUN pip install --no-cache-dir \
+    'git+https://github.com/facebookresearch/detectron2.git'
+# Step 3: Install remaining dependencies
+RUN pip install --no-cache-dir \
+    fastapi>=0.104.0 \
+    "uvicorn[standard]>=0.24.0" \
+    opencv-python-headless>=4.8.0 \
+    numpy>=1.24.0 \
+    Pillow>=10.0.0 \
+    requests>=2.31.0 \
+    huggingface_hub>=0.19.0 \
+    python-multipart>=0.0.6
+# Step 4: V5.1 Pipeline — MobileSAM + SigLIP
+RUN pip install --no-cache-dir \
+    transformers>=4.37.0 \
+    timm>=0.9.0 \
+    'git+https://github.com/ChaoningZhang/MobileSAM.git'
+# Copy app code
+COPY . .
+# Create a non-root user (HF Spaces requirement)
+RUN useradd -m -u 1000 user
+USER user
+# Expose port (HF Spaces uses 7860)
+EXPOSE 7860
+# Start the server
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Building Detection API
+emoji: 🏗️
+colorFrom: orange
+colorTo: red
+sdk: docker
+app_port: 7860
+pinned: true
+---
+# 🏗️ Building Detection API
+Detect buildings from satellite imagery using Mask R-CNN V5.
+## API Endpoints
+- `GET /` — Health check + model info
+- `GET /health` — Health check
+- `POST /detect` — Detect buildings in a polygon area
+## Usage
+```bash
+curl -X POST https://your-space.hf.space/detect \
+  -H "Content-Type: application/json" \
+  -d '{
+    "coordinates": [[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
+    "threshold": 0.3
+  }'
+```
+## Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `MODEL_REPO` | `yusef75/building-detection-models` | HF model repository |
+| `MODEL_VERSION` | `v5` | Model version folder |
+| `MODEL_FILENAME` | `model_final.pth` | Model file name |
+| `SCORE_THRESHOLD` | `0.3` | Default detection threshold |

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""
+Building Detection API — FastAPI Backend for Hugging Face Spaces.
+Endpoints:
+    GET  /          → Health check + model info
+    GET  /health    → Health check
+    POST /detect    → Detect buildings in a polygon area
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Optional
+import uvicorn
+from model_manager import load_model, get_model_info
+from inference import detect_buildings
+# ==========================================
+# === App Setup ===
+# ==========================================
+app = FastAPI(
+    title="🏗️ Building Detection API",
+    description="Detect buildings from satellite imagery using Mask R-CNN V5",
+    version="1.0.0",
+)
+# Allow CORS for Vercel frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, restrict to your Vercel domain
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ==========================================
+# === Request / Response Models ===
+# ==========================================
+class DetectRequest(BaseModel):
+    coordinates: List[List[float]] = Field(
+        ...,
+        description="Polygon coordinates as [[lng, lat], ...] in GeoJSON format",
+        example=[[31.24, 30.04], [31.25, 30.04], [31.25, 30.05], [31.24, 30.05]],
+    )
+    threshold: Optional[float] = Field(
+        default=0.5,
+        ge=0.1,
+        le=0.95,
+        description="Detection confidence threshold",
+    )
+    use_v51: Optional[bool] = Field(
+        default=True,
+        description="Enable V5.1 pipeline (MobileSAM + SigLIP) for better accuracy",
+    )
+class DetectResponse(BaseModel):
+    geojson: dict
+    stats: dict
+class HealthResponse(BaseModel):
+    status: str
+    model: dict
+# ==========================================
+# === Startup Event ===
+# ==========================================
+@app.on_event("startup")
+async def startup():
+    """Load model when the server starts."""
+    print("🚀 Starting Building Detection API...")
+    load_model()
+    print("✅ API ready!")
+# ==========================================
+# === Endpoints ===
+# ==========================================
+@app.get("/", response_model=HealthResponse)
+async def root():
+    """Health check and model info."""
+    return {
+        "status": "🟢 online",
+        "model": get_model_info(),
+    }
+@app.get("/health", response_model=HealthResponse)
+async def health():
+    """Health check endpoint."""
+    return {
+        "status": "🟢 online",
+        "model": get_model_info(),
+    }
+@app.post("/detect", response_model=DetectResponse)
+async def detect(request: DetectRequest):
+    """
+    Detect buildings in the specified polygon area.
+    Send polygon coordinates in GeoJSON format [[lng, lat], ...].
+    Returns a GeoJSON FeatureCollection with detected building polygons.
+    """
+    try:
+        result = detect_buildings(
+            coordinates=request.coordinates,
+            threshold=request.threshold,
+            use_v51=request.use_v51,
+        )
+        if "error" in result:
+            raise HTTPException(status_code=400, detail=result["error"])
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
+# ==========================================
+# === Run ===
+# ==========================================
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

inference.py ADDED Viewed

	@@ -0,0 +1,447 @@

+"""
+Inference Engine — Tile downloading + Building detection + Deduplication.
+Adapted from MaskRCNN_V5_MapFlow.py for server deployment.
+"""
+import math
+import time
+import numpy as np
+import cv2
+import requests
+from PIL import Image
+from io import BytesIO
+from model_manager import get_predictor, set_threshold
+from post_processor import run_v51_pipeline
+# ==========================================
+# === Constants ===
+# ==========================================
+ZOOM = 18
+TILE_SIZE = 256
+TILES_PER_IMG = 2
+IMG_SIZE = 512
+MAX_TILES = 60  # Safety limit
+MIN_BUILDING_AREA = 200  # Min contour area in pixels (filters tiny false positives)
+# ==========================================
+# === Coordinate Utils ===
+# ==========================================
+def lon_to_tile_x(lon):
+    return (lon + 180) / 360 * (2 ** ZOOM)
+def lat_to_tile_y(lat):
+    lat_r = math.radians(lat)
+    return (1 - math.log(math.tan(lat_r) + 1 / math.cos(lat_r)) / math.pi) / 2 * (2 ** ZOOM)
+def tile_x_to_lon(tx):
+    return tx / (2 ** ZOOM) * 360 - 180
+def tile_y_to_lat(ty):
+    n = math.pi - 2 * math.pi * ty / (2 ** ZOOM)
+    return math.degrees(math.atan(math.sinh(n)))
+def pixel_to_geo(px, py, grid_x, grid_y):
+    tx = grid_x * TILES_PER_IMG + px / TILE_SIZE
+    ty = grid_y * TILES_PER_IMG + py / TILE_SIZE
+    return tile_x_to_lon(tx), tile_y_to_lat(ty)
+# ==========================================
+# === Tile Downloading ===
+# ==========================================
+session = requests.Session()
+session.headers.update({"User-Agent": "Mozilla/5.0"})
+def download_tile_512(grid_x, grid_y):
+    """Download 2×2 tiles to create a 512×512 satellite image."""
+    img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
+    base_tx = grid_x * TILES_PER_IMG
+    base_ty = grid_y * TILES_PER_IMG
+    for dy in range(TILES_PER_IMG):
+        for dx in range(TILES_PER_IMG):
+            tx, ty = base_tx + dx, base_ty + dy
+            s = (tx + ty) % 4
+            url = f"https://mt{s}.google.com/vt/lyrs=s&x={tx}&y={ty}&z={ZOOM}"
+            try:
+                r = session.get(url, timeout=15)
+                tile = np.array(Image.open(BytesIO(r.content)).convert("RGB"))
+                img[dy * TILE_SIZE:(dy + 1) * TILE_SIZE,
+                    dx * TILE_SIZE:(dx + 1) * TILE_SIZE] = tile
+            except Exception:
+                pass
+    return img
+# ==========================================
+# === Polygon → Tiles ===
+# ==========================================
+def get_tiles_for_polygon(polygon_coords):
+    """
+    Convert polygon coordinates to grid tile indices.
+    Input: list of [lat, lon] pairs.
+    Returns: list of (grid_x, grid_y) tuples and bounds.
+    """
+    lats = [c[0] for c in polygon_coords]
+    lons = [c[1] for c in polygon_coords]
+    min_lat, max_lat = min(lats), max(lats)
+    min_lon, max_lon = min(lons), max(lons)
+    min_tx = lon_to_tile_x(min_lon)
+    max_tx = lon_to_tile_x(max_lon)
+    min_ty = lat_to_tile_y(max_lat)
+    max_ty = lat_to_tile_y(min_lat)
+    min_gx = int(min_tx) // TILES_PER_IMG
+    max_gx = int(max_tx) // TILES_PER_IMG
+    min_gy = int(min_ty) // TILES_PER_IMG
+    max_gy = int(max_ty) // TILES_PER_IMG
+    tiles = []
+    for gy in range(min_gy, max_gy + 1):
+        for gx in range(min_gx, max_gx + 1):
+            tiles.append((gx, gy))
+    return tiles, (min_lat, max_lat, min_lon, max_lon)
+# ==========================================
+# === Polygon Regularization ===
+# ==========================================
+def regularize_polygon(contour, rect):
+    """
+    Regularize polygon edges by snapping to the building's dominant direction.
+    1. Get dominant angle from minAreaRect
+    2. Rotate polygon so dominant direction = horizontal
+    3. Snap nearly-horizontal edges → exact horizontal
+       Snap nearly-vertical edges → exact vertical
+    4. Rotate back
+    """
+    points = contour.reshape(-1, 2).astype(float)
+    n = len(points)
+    if n < 4:
+        return contour
+    angle = rect[2]
+    angle_rad = math.radians(angle)
+    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
+    center = np.mean(points, axis=0)
+    # Rotate to align dominant direction with horizontal axis
+    rotated = np.zeros_like(points)
+    for i, p in enumerate(points):
+        dx, dy = p[0] - center[0], p[1] - center[1]
+        rotated[i] = [dx * cos_a + dy * sin_a, -dx * sin_a + dy * cos_a]
+    # Snap edges within 15° of horizontal/vertical
+    SNAP_ANGLE = 15
+    for i in range(n):
+        j = (i + 1) % n
+        dx = rotated[j][0] - rotated[i][0]
+        dy = rotated[j][1] - rotated[i][1]
+        if abs(dx) < 1e-6 and abs(dy) < 1e-6:
+            continue
+        edge_angle = abs(math.degrees(math.atan2(abs(dy), abs(dx))))
+        if edge_angle < SNAP_ANGLE:  # Nearly horizontal
+            rotated[j][1] = rotated[i][1]
+        elif edge_angle > (90 - SNAP_ANGLE):  # Nearly vertical
+            rotated[j][0] = rotated[i][0]
+    # Rotate back
+    result = np.zeros_like(points)
+    for i, p in enumerate(rotated):
+        rx = p[0] * cos_a - p[1] * sin_a + center[0]
+        ry = p[0] * sin_a + p[1] * cos_a + center[1]
+        result[i] = [round(rx), round(ry)]
+    return result.astype(int)
+# ==========================================
+# === Mask → GeoJSON (with regularization) ===
+# ==========================================
+def mask_to_geo_polygon(mask, grid_x, grid_y, score):
+    """Convert a binary mask to a GeoJSON Feature with angle regularization."""
+    contours, _ = cv2.findContours(
+        mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if not contours:
+        return None
+    contour = max(contours, key=cv2.contourArea)
+    if cv2.contourArea(contour) < MIN_BUILDING_AREA:
+        return None
+    # Simplify the contour
+    epsilon = 0.008 * cv2.arcLength(contour, True)
+    approx = cv2.approxPolyDP(contour, epsilon, True)
+    if len(approx) < 3:
+        return None
+    # Regularize angles (snap edges toward 90°)
+    rect = cv2.minAreaRect(contour)
+    if len(approx) >= 4:
+        pixel_points = regularize_polygon(approx, rect)
+    else:
+        pixel_points = approx.reshape(-1, 2)
+    # Convert pixel coordinates to geographic coordinates
+    geo_coords = []
+    for pt in pixel_points:
+        px, py = int(pt[0]), int(pt[1])
+        lon, lat = pixel_to_geo(px, py, grid_x, grid_y)
+        geo_coords.append([lon, lat])
+    geo_coords.append(geo_coords[0])  # Close polygon
+    return {
+        "type": "Feature",
+        "properties": {"confidence": round(float(score), 3)},
+        "geometry": {"type": "Polygon", "coordinates": [geo_coords]},
+    }
+def polygon_area(coords):
+    """Calculate area of a polygon using Shoelace formula."""
+    n = len(coords)
+    if n < 3:
+        return 0
+    area = 0
+    for i in range(n):
+        j = (i + 1) % n
+        area += coords[i][0] * coords[j][1]
+        area -= coords[j][0] * coords[i][1]
+    return abs(area) / 2
+def bboxes_overlap(coords1, coords2):
+    """Check if bounding boxes of two polygons overlap."""
+    xs1 = [c[0] for c in coords1]
+    ys1 = [c[1] for c in coords1]
+    xs2 = [c[0] for c in coords2]
+    ys2 = [c[1] for c in coords2]
+    return not (max(xs1) < min(xs2) or max(xs2) < min(xs1) or
+                max(ys1) < min(ys2) or max(ys2) < min(ys1))
+def deduplicate_buildings(features, distance_threshold=0.0003):
+    """
+    Remove duplicate buildings from overlapping tiles.
+    Uses centroid distance + area similarity + bbox overlap.
+    distance_threshold ≈ 30 meters at the equator.
+    """
+    if not features:
+        return features
+    # Pre-compute centroids and areas
+    centroids = []
+    areas = []
+    for f in features:
+        coords = f["geometry"]["coordinates"][0]
+        cx = np.mean([c[0] for c in coords])
+        cy = np.mean([c[1] for c in coords])
+        centroids.append((cx, cy))
+        areas.append(polygon_area(coords))
+    # Sort by confidence (keep higher confidence ones)
+    indices = sorted(
+        range(len(features)),
+        key=lambda i: features[i]["properties"]["confidence"],
+        reverse=True,
+    )
+    keep = []
+    removed = set()
+    for i in indices:
+        if i in removed:
+            continue
+        keep.append(i)
+        cx1, cy1 = centroids[i]
+        area1 = areas[i]
+        coords1 = features[i]["geometry"]["coordinates"][0]
+        for j in indices:
+            if j in removed or j == i or j in set(keep):
+                continue
+            cx2, cy2 = centroids[j]
+            area2 = areas[j]
+            # Quick centroid distance check
+            dist = math.sqrt((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2)
+            if dist > distance_threshold:
+                continue
+            # Area similarity check (within 3x of each other)
+            if area1 > 0 and area2 > 0:
+                ratio = max(area1, area2) / min(area1, area2)
+                if ratio > 2.0:
+                    continue  # Very different sizes — probably different buildings
+            # Bounding box overlap check
+            coords2 = features[j]["geometry"]["coordinates"][0]
+            if bboxes_overlap(coords1, coords2):
+                removed.add(j)
+    return [features[i] for i in keep]
+# ==========================================
+# === Point-in-Polygon Test ===
+# ==========================================
+def point_in_polygon(px, py, polygon):
+    """
+    Ray casting algorithm to check if point (px, py) is inside polygon.
+    polygon: list of [x, y] pairs.
+    """
+    n = len(polygon)
+    inside = False
+    j = n - 1
+    for i in range(n):
+        xi, yi = polygon[i]
+        xj, yj = polygon[j]
+        if ((yi > py) != (yj > py)) and (px < (xj - xi) * (py - yi) / (yj - yi) + xi):
+            inside = not inside
+        j = i
+    return inside
+# ==========================================
+# === Main Processing Function ===
+# ==========================================
+def detect_buildings(coordinates, threshold=0.5, use_v51=False):
+    """
+    Process a polygon area and detect buildings.
+    Args:
+        coordinates: list of [lng, lat] pairs (GeoJSON format)
+        threshold: detection confidence threshold
+    Returns:
+        dict with GeoJSON FeatureCollection + stats
+    """
+    # Convert from GeoJSON [lng, lat] to [lat, lng]
+    coords = []
+    for point in coordinates:
+        if isinstance(point, list) and len(point) == 2:
+            coords.append([point[1], point[0]])
+    if len(coords) < 3:
+        return {"error": "Need at least 3 points to form a polygon"}
+    # Build user polygon in [lng, lat] format for clipping
+    user_polygon = [[c[1], c[0]] for c in coords]  # [lng, lat]
+    predictor = get_predictor()
+    # Get tiles
+    tiles, bounds = get_tiles_for_polygon(coords)
+    n_tiles = len(tiles)
+    if n_tiles > MAX_TILES:
+        return {
+            "error": f"Area too large! {n_tiles} tiles needed, max is {MAX_TILES}. Draw a smaller polygon.",
+            "tiles_needed": n_tiles,
+            "max_tiles": MAX_TILES,
+        }
+    # Process tiles
+    all_features = []
+    start_time = time.time()
+    for idx, (gx, gy) in enumerate(tiles):
+        img = download_tile_512(gx, gy)
+        # Skip dark/empty tiles
+        if np.mean(img) < 10:
+            continue
+        img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        outputs = predictor(img_bgr)
+        instances = outputs["instances"].to("cpu")
+        if len(instances) == 0:
+            continue
+        raw_masks  = instances.pred_masks.numpy()
+        raw_scores = instances.scores.numpy()
+        # ── V5.1 Pipeline (optional) ──────────────────────────
+        if use_v51:
+            # Pre-filter by confidence first (faster)
+            conf_masks  = [m for m, s in zip(raw_masks, raw_scores) if float(s) >= threshold]
+            conf_scores = [float(s) for s in raw_scores if float(s) >= threshold]
+            if conf_masks:
+                print(f"  [V5.1] Tile {idx+1}/{len(tiles)}: {len(conf_masks)} masks → pipeline...")
+                v51_results = run_v51_pipeline(
+                    image_rgb=img,
+                    v5_masks=conf_masks,
+                    v5_scores=conf_scores,
+                    use_sam=True,
+                    use_siglip=True,
+                )
+                for res in v51_results:
+                    feature = mask_to_geo_polygon(res["mask"], gx, gy, res["score"])
+                    if feature:
+                        feature["properties"]["area_m2"] = res["area_m2"]
+                        all_features.append(feature)
+        # ── V5 Original Pipeline ──────────────────────────────
+        else:
+            for mask, score in zip(raw_masks, raw_scores):
+                if float(score) < threshold:
+                    continue
+                feature = mask_to_geo_polygon(mask, gx, gy, score)
+                if feature:
+                    all_features.append(feature)
+    # Clip to user polygon — only keep buildings whose centroid is inside
+    clipped_features = []
+    for f in all_features:
+        poly_coords = f["geometry"]["coordinates"][0]
+        cx = np.mean([c[0] for c in poly_coords])  # lng
+        cy = np.mean([c[1] for c in poly_coords])  # lat
+        if point_in_polygon(cx, cy, user_polygon):
+            clipped_features.append(f)
+    all_features = clipped_features
+    # Deduplicate
+    before_dedup = len(all_features)
+    all_features = deduplicate_buildings(all_features)
+    after_dedup = len(all_features)
+    elapsed = time.time() - start_time
+    # Build response
+    geojson = {
+        "type": "FeatureCollection",
+        "features": all_features,
+    }
+    stats = {
+        "buildings_detected": after_dedup,
+        "duplicates_removed": before_dedup - after_dedup,
+        "tiles_processed": n_tiles,
+        "processing_time_seconds": round(elapsed, 1),
+        "threshold": threshold,
+        "bounds": {
+            "min_lat": bounds[0],
+            "max_lat": bounds[1],
+            "min_lon": bounds[2],
+            "max_lon": bounds[3],
+        },
+    }
+    return {"geojson": geojson, "stats": stats}

model_manager.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""
+Model Manager — Easy version swapping for Building Detection models.
+To swap models:
+1. Upload new model to HF repo (e.g., v6/model_final.pth)
+2. Set MODEL_VERSION env var to "v6"
+3. Restart the Space
+"""
+import os
+import torch
+from detectron2.config import get_cfg
+from detectron2 import model_zoo
+from detectron2.engine import DefaultPredictor
+from huggingface_hub import hf_hub_download
+# ==========================================
+# === Configuration ===
+# ==========================================
+MODEL_REPO = os.environ.get("MODEL_REPO", "yusef75/building-detection-models")
+MODEL_VERSION = os.environ.get("MODEL_VERSION", "v5")
+MODEL_FILENAME = os.environ.get("MODEL_FILENAME", "model_final.pth")
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SCORE_THRESHOLD = float(os.environ.get("SCORE_THRESHOLD", "0.3"))
+# Global predictor
+_predictor = None
+_model_info = {}
+def load_model():
+    """Load model from Hugging Face Hub. Called once at startup."""
+    global _predictor, _model_info
+    print(f"🔍 Loading model: {MODEL_REPO} / {MODEL_VERSION} / {MODEL_FILENAME}")
+    print(f"🖥️ Device: {DEVICE}")
+    # Download model from HF Hub
+    model_path = hf_hub_download(
+        repo_id=MODEL_REPO,
+        filename=f"{MODEL_VERSION}/{MODEL_FILENAME}",
+        cache_dir="/tmp/models",
+    )
+    print(f"✅ Model downloaded to: {model_path}")
+    # Configure Detectron2
+    cfg = get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file(
+        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
+    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
+    cfg.MODEL.WEIGHTS = model_path
+    cfg.MODEL.DEVICE = DEVICE
+    cfg.INPUT.MIN_SIZE_TEST = 512
+    cfg.INPUT.MAX_SIZE_TEST = 512
+    # === Detection quality settings ===
+    # Low base threshold — actual filtering happens in inference.py
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
+    # NMS: Aggressively remove overlapping detections (lower = stricter)
+    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.3
+    # Max detections per image (fewer = less overlap)
+    cfg.TEST.DETECTIONS_PER_IMAGE = 200
+    _predictor = DefaultPredictor(cfg)
+    _model_info = {
+        "version": MODEL_VERSION,
+        "repo": MODEL_REPO,
+        "device": DEVICE,
+        "threshold": SCORE_THRESHOLD,
+    }
+    print(f"🚀 Model {MODEL_VERSION} loaded on {DEVICE}!")
+    return _predictor
+def get_predictor():
+    """Get the loaded predictor. Loads model if not loaded yet."""
+    global _predictor
+    if _predictor is None:
+        load_model()
+    return _predictor
+def get_model_info():
+    """Get info about the currently loaded model."""
+    return _model_info
+def set_threshold(threshold: float):
+    """Update the detection threshold dynamically."""
+    global _predictor
+    if _predictor is not None:
+        _predictor.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold

post_processor.py ADDED Viewed

	@@ -0,0 +1,333 @@

+"""
+V5.1 Post-Processing Pipeline — "The Hybrid Eye"
+=================================================
+يشتغل بعد V5 مباشرة بدون أي تدريب جديد.
+Pipeline:
+  1. V5 (Hunter)    → masks أولية
+  2. MobileSAM      → يقطع الكتل المتلاصقة لـ sub-masks
+  3. SigLIP         → Zero-Shot: building vs non-building
+  4. Geometric Rules → area + shape filter + area_m2
+التثبيت:
+  pip install git+https://github.com/ChaoningZhang/MobileSAM.git
+  pip install transformers torch
+"""
+import math
+import numpy as np
+import cv2
+import torch
+from PIL import Image
+# ============================================================
+# === تحميل الموديلات (مرة واحدة) ===
+# ============================================================
+_mobile_sam   = None
+_sam_predictor = None
+_siglip_model  = None
+_siglip_processor = None
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+def load_mobile_sam():
+    """تحميل MobileSAM (5.78MB فقط — خفيف جداً)."""
+    global _mobile_sam, _sam_predictor
+    if _sam_predictor is not None:
+        return _sam_predictor
+    try:
+        from mobile_sam import sam_model_registry, SamPredictor
+        from huggingface_hub import hf_hub_download
+        print("📥 تحميل MobileSAM...")
+        ckpt = hf_hub_download(
+            repo_id="dhkim2810/MobileSAM",
+            filename="mobile_sam.pt",
+        )
+        _mobile_sam = sam_model_registry["vit_t"](checkpoint=ckpt)
+        _mobile_sam.to(DEVICE).eval()
+        _sam_predictor = SamPredictor(_mobile_sam)
+        print("✅ MobileSAM جاهز!")
+        return _sam_predictor
+    except Exception as e:
+        print(f"⚠️ MobileSAM مش متاح: {e}")
+        return None
+def load_siglip():
+    """تحميل SigLIP للـ Zero-Shot material classification."""
+    global _siglip_model, _siglip_processor
+    if _siglip_model is not None:
+        return _siglip_model, _siglip_processor
+    try:
+        from transformers import SiglipProcessor, SiglipModel
+        print("📥 تحميل SigLIP...")
+        model_id = "google/siglip-base-patch16-224"
+        _siglip_processor = SiglipProcessor.from_pretrained(model_id)
+        _siglip_model = SiglipModel.from_pretrained(
+            model_id,
+            torch_dtype=torch.float32,   # CPU → float32 دايماً
+        ).to(DEVICE).eval()
+        print("✅ SigLIP جاهز!")
+        return _siglip_model, _siglip_processor
+    except Exception as e:
+        print(f"⚠️ SigLIP مش متاح: {e}")
+        return None, None
+# ============================================================
+# === STEP 1: MobileSAM — Surgical Cutting ===
+# ============================================================
+def split_mask_with_sam(image_rgb: np.ndarray, mask: np.ndarray, predictor) -> list:
+    """
+    بياخد mask واحد (ممكن يكون فيه 4 بيوت) ويقطعه لـ sub-masks.
+    Args:
+        image_rgb: صورة كاملة (H, W, 3)
+        mask: binary mask (H, W) من V5
+        predictor: SamPredictor instance
+    Returns:
+        list of binary masks — كل mask مبنى لوحده
+    """
+    if predictor is None:
+        return [mask]  # fallback: رجّع الـ mask الأصلي
+    # لو الـ mask صغير (مبنى واحد) → مش محتاج قطع
+    area = mask.sum()
+    if area < 2000:  # ~45×45 pixels → مبنى واحد على الأرجح
+        return [mask]
+    try:
+        # جهّز الصورة للـ SAM
+        predictor.set_image(image_rgb)
+        # استخدم الـ bounding box بتاع الـ mask كـ Prompt
+        ys, xs = np.where(mask)
+        x1, x2 = xs.min(), xs.max()
+        y1, y2 = ys.min(), ys.max()
+        box = np.array([x1, y1, x2, y2])
+        # اطلب من SAM يقطع
+        masks_out, scores, _ = predictor.predict(
+            box=box,
+            multimask_output=True,   # <-- اطلب أكتر من اقتراح
+        )
+        # فلتر الـ sub-masks اللي منطقية (جوه الـ mask الأصلي)
+        valid_masks = []
+        for sub_mask in masks_out:
+            # الـ sub-mask لازم يتداخل مع الـ mask الأصلي
+            overlap = (sub_mask & mask.astype(bool)).sum()
+            if overlap > 200:  # على الأقل 200 pixel مشتركة
+                valid_masks.append(sub_mask.astype(np.uint8))
+        return valid_masks if valid_masks else [mask]
+    except Exception as e:
+        print(f"⚠️ SAM splitter error: {e}")
+        return [mask]
+# ============================================================
+# === STEP 2: SigLIP — Zero-Shot Material Check ===
+# ============================================================
+# نصوص المقارنة — بدون تدريب
+BUILDING_TEXTS = [
+    "a satellite view of a building rooftop",
+    "concrete roof of a building seen from above",
+    "residential building viewed from satellite",
+    "rooftop of a house or apartment building",
+]
+NON_BUILDING_TEXTS = [
+    "farmland and agricultural fields from above",
+    "green vegetation and trees from satellite",
+    "water surface river or lake from above",
+    "empty desert or bare soil from satellite",
+    "road or highway seen from above",
+    "swimming pool seen from satellite",
+]
+ALL_TEXTS = BUILDING_TEXTS + NON_BUILDING_TEXTS
+NUM_BUILDING = len(BUILDING_TEXTS)
+@torch.no_grad()
+def is_building_siglip(
+    image_rgb: np.ndarray,
+    mask: np.ndarray,
+    model,
+    processor,
+    threshold: float = 0.4,
+) -> bool:
+    """
+    بيستخدم SigLIP Zero-Shot عشان يتأكد إن الـ mask ده فعلاً مبنى.
+    Returns True لو مبنى، False لو لا (يتحذف).
+    """
+    if model is None:
+        return True  # fallback: اقبل كل حاجة لو SigLIP مش شغال
+    try:
+        # Crop الـ bounding box من الصورة
+        ys, xs = np.where(mask)
+        if len(ys) == 0:
+            return False
+        x1, x2 = max(0, xs.min() - 5), min(image_rgb.shape[1], xs.max() + 5)
+        y1, y2 = max(0, ys.min() - 5), min(image_rgb.shape[0], ys.max() + 5)
+        crop = image_rgb[y1:y2, x1:x2]
+        if crop.size == 0:
+            return False
+        pil_crop = Image.fromarray(crop)
+        # جهّز الـ inputs
+        inputs = processor(
+            text=ALL_TEXTS,
+            images=[pil_crop],
+            return_tensors="pt",
+            padding="max_length",
+        )
+        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
+        if DEVICE == "cuda":
+            inputs["pixel_values"] = inputs["pixel_values"].half()
+        # احسب الـ similarity scores
+        outputs = model(**inputs)
+        logits = outputs.logits_per_image[0]  # (num_texts,)
+        probs = torch.softmax(logits, dim=0).cpu().float().numpy()
+        # مجموع probability الـ building texts
+        building_score = probs[:NUM_BUILDING].sum()
+        non_building_score = probs[NUM_BUILDING:].sum()
+        return building_score > threshold
+    except Exception as e:
+        print(f"⚠️ SigLIP check error: {e}")
+        return True  # fallback: اقبل
+# ============================================================
+# === STEP 3: Geometric Rules ===
+# ============================================================
+def _mask_area_m2(mask, pixel_size_m=0.597):
+    """
+    تحويل عدد pixels لـ متر مربع.
+    pixel_size_m = حجم البيكسل عند Zoom 18 (~0.6 متر)
+    """
+    return mask.sum() * (pixel_size_m ** 2)
+def _aspect_ratio(mask):
+    """نسبة الطول للعرض — لو > 10 فالشكل غريب جداً."""
+    ys, xs = np.where(mask)
+    if len(ys) == 0:
+        return 1.0
+    h = ys.max() - ys.min() + 1
+    w = xs.max() - xs.min() + 1
+    return max(h, w) / max(min(h, w), 1)
+def apply_geometric_rules(masks: list, min_area_m2=20.0, max_area_m2=15000.0, max_aspect=10.0):
+    """
+    يفلتر الـ masks بقواعد هندسية:
+    - مساحة < 20 م² → احذف (noise)
+    - مساحة > 15,000 م² → حذّر (probably wrong)
+    - aspect ratio > 10 → احذف (شكل غريب مش مبنى)
+    Returns: list of (mask, area_m2) tuples
+    """
+    result = []
+    for mask in masks:
+        area = _mask_area_m2(mask)
+        if area < min_area_m2:
+            continue
+        if _aspect_ratio(mask) > max_aspect:
+            continue
+        result.append((mask, round(area, 1)))
+    return result
+# ============================================================
+# === MAIN: run_v51_pipeline ===
+# ============================================================
+def run_v51_pipeline(
+    image_rgb: np.ndarray,
+    v5_masks: list,
+    v5_scores: list,
+    use_sam: bool = True,
+    use_siglip: bool = True,
+    siglip_threshold: float = 0.4,
+) -> list:
+    """
+    الـ Pipeline الكامل لـ V5.1.
+    Args:
+        image_rgb: الصورة كـ numpy array (H, W, 3)
+        v5_masks:  list of binary masks من V5
+        v5_scores: list of confidence scores من V5
+        use_sam:   تفعيل MobileSAM splitting
+        use_siglip: تفعيل SigLIP material check
+    Returns:
+        list of dicts: [{"mask": np.array, "score": float, "area_m2": float}]
+    """
+    # تحميل الموديلات
+    sam_predictor      = load_mobile_sam() if use_sam else None
+    siglip_model, siglip_proc = load_siglip() if use_siglip else (None, None)
+    all_masks   = []
+    all_scores  = []
+    # ── STEP 1: MobileSAM Splitting ─────────────────────────
+    for mask, score in zip(v5_masks, v5_scores):
+        sub_masks = split_mask_with_sam(image_rgb, mask, sam_predictor)
+        all_masks.extend(sub_masks)
+        all_scores.extend([score] * len(sub_masks))
+    print(f"   SAM: {len(v5_masks)} → {len(all_masks)} masks")
+    # ── STEP 2: SigLIP Material Check ───────────────────────
+    if use_siglip and siglip_model is not None:
+        filtered_masks  = []
+        filtered_scores = []
+        removed = 0
+        for mask, score in zip(all_masks, all_scores):
+            if is_building_siglip(image_rgb, mask, siglip_model, siglip_proc, siglip_threshold):
+                filtered_masks.append(mask)
+                filtered_scores.append(score)
+            else:
+                removed += 1
+        print(f"   SigLIP: حذف {removed} غير مباني")
+        all_masks, all_scores = filtered_masks, filtered_scores
+    # ── STEP 3: Geometric Rules ──────────────────────────────
+    geo_filtered = apply_geometric_rules(all_masks)
+    print(f"   Geometric: {len(all_masks)} → {len(geo_filtered)} masks")
+    # ── Build result ─────────────────────────────────────────
+    results = []
+    for i, (mask, area_m2) in enumerate(geo_filtered):
+        score = all_scores[i] if i < len(all_scores) else 0.5
+        results.append({
+            "mask":    mask,
+            "score":   score,
+            "area_m2": area_m2,
+        })
+    return results