Deagin Claude commited on
Commit
80940c5
Β·
1 Parent(s): bf28724

implement advanced roof plane segmentation

Browse files

Major improvements for detecting individual roof planes:

- Multi-layer DINOv2 feature extraction (layers 6,12,18,24)
- Edge detection with Sobel + Canny for boundary awareness
- Three segmentation algorithms: SLIC, Watershed, Felzenszwalb
- Feature-based segment merging with similarity threshold
- Bicubic upsampling of features to image resolution
- Edge visualization output
- Configurable segmentation parameters in UI

Technical changes:
- Switch from DINOv3-SAT to DINOv2-Large (better compatibility)
- Added scikit-image and scipy dependencies
- Feature pyramid approach with 128-dim PCA
- Watershed uses distance transform from edges
- Tighter polygon simplification (0.008 vs 0.015 epsilon)
- New UI with algorithm selection and edge preview

This enables detection of individual roof facets, peaks, valleys,
and different planes on pitched roofs.

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +391 -168
  2. requirements.txt +4 -2
app.py CHANGED
@@ -5,6 +5,11 @@ from PIL import Image
5
  from transformers import AutoImageProcessor, AutoModel
6
  from sklearn.cluster import KMeans
7
  from sklearn.decomposition import PCA
 
 
 
 
 
8
  import cv2
9
  import json
10
  import requests
@@ -19,11 +24,11 @@ warnings.filterwarnings("ignore")
19
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
20
 
21
  # DINOv3 Model - Satellite pretrained
22
- MODEL_NAME = "facebook/dinov3-vitl16-pretrain-sat493m"
23
  print(f"Loading {MODEL_NAME}...")
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
  processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
26
- model = AutoModel.from_pretrained(MODEL_NAME).to(device)
27
  model.eval()
28
  print(f"Model loaded on {device}")
29
 
@@ -35,22 +40,22 @@ def geocode_address(address, api_key):
35
  "address": address,
36
  "key": api_key
37
  }
38
-
39
  response = requests.get(url, params=params)
40
  data = response.json()
41
-
42
  if data["status"] != "OK":
43
  raise ValueError(f"Geocoding failed: {data['status']}")
44
-
45
  location = data["results"][0]["geometry"]["location"]
46
  formatted_address = data["results"][0]["formatted_address"]
47
-
48
  return location["lat"], location["lng"], formatted_address
49
 
50
 
51
  def fetch_geotiff(lat, lng, api_key, radius_meters=50):
52
  """Fetch RGB GeoTIFF from Google Solar API Data Layers."""
53
-
54
  layers_url = "https://solar.googleapis.com/v1/dataLayers:get"
55
  params = {
56
  "location.latitude": lat,
@@ -61,32 +66,32 @@ def fetch_geotiff(lat, lng, api_key, radius_meters=50):
61
  "pixelSizeMeters": 0.25,
62
  "key": api_key
63
  }
64
-
65
  response = requests.get(layers_url, params=params)
66
-
67
  if response.status_code != 200:
68
  params["requiredQuality"] = "MEDIUM"
69
  response = requests.get(layers_url, params=params)
70
-
71
  if response.status_code != 200:
72
  raise ValueError(f"Data Layers API error: {response.status_code} - {response.text}")
73
-
74
  layers = response.json()
75
-
76
  rgb_url = layers.get("rgbUrl")
77
  if not rgb_url:
78
  raise ValueError("No RGB imagery available for this location")
79
-
80
  rgb_response = requests.get(f"{rgb_url}&key={api_key}")
81
  if rgb_response.status_code != 200:
82
  raise ValueError(f"Failed to download GeoTIFF: {rgb_response.status_code}")
83
-
84
  return rgb_response.content, layers
85
 
86
 
87
  def parse_geotiff(geotiff_bytes):
88
  """Parse GeoTIFF and extract image + bounds."""
89
-
90
  with rasterio.open(io.BytesIO(geotiff_bytes)) as src:
91
  if src.count >= 3:
92
  r = src.read(1)
@@ -96,70 +101,259 @@ def parse_geotiff(geotiff_bytes):
96
  else:
97
  img_array = src.read(1)
98
  img_array = np.stack([img_array] * 3, axis=-1)
99
-
100
  bounds = src.bounds
101
  crs = src.crs
102
-
103
  if crs and crs != CRS.from_epsg(4326):
104
  from rasterio.warp import transform_bounds
105
  bounds = transform_bounds(crs, CRS.from_epsg(4326), *bounds)
106
-
107
  image = Image.fromarray(img_array.astype(np.uint8))
108
  return image, bounds
109
 
110
 
111
- def extract_features(image):
112
- """Extract dense patch features from DINOv3."""
113
- inputs = processor(images=image, return_tensors="pt").to(device)
114
-
 
 
 
 
115
  with torch.inference_mode():
116
  outputs = model(**inputs)
117
- # DINOv3: skip CLS + 4 register tokens
118
- patch_features = outputs.last_hidden_state[:, 5:, :]
119
-
120
- return patch_features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
  def pixel_to_geo(x, y, img_width, img_height, bounds):
124
  """Convert pixel coordinates to geographic coordinates."""
125
  west, south, east, north = bounds
126
-
127
  x_norm = x / img_width
128
  y_norm = y / img_height
129
-
130
  lng = west + (east - west) * x_norm
131
  lat = north - (north - south) * y_norm
132
-
133
  return [lng, lat]
134
 
135
 
136
- def mask_to_polygons(mask, bounds, img_width, img_height):
137
  """Convert binary mask to GeoJSON polygons."""
138
  features = []
139
-
140
  contours, _ = cv2.findContours(
141
- mask.astype(np.uint8),
142
  cv2.RETR_EXTERNAL,
143
  cv2.CHAIN_APPROX_SIMPLE
144
  )
145
-
146
  for i, contour in enumerate(contours):
147
  area = cv2.contourArea(contour)
148
  if area < 100:
149
  continue
150
-
151
- epsilon = 0.015 * cv2.arcLength(contour, True)
 
152
  simplified = cv2.approxPolyDP(contour, epsilon, True)
153
-
154
  coords = []
155
  for point in simplified:
156
  px, py = point[0]
157
  geo_coord = pixel_to_geo(px, py, img_width, img_height, bounds)
158
  coords.append(geo_coord)
159
-
160
  if coords and coords[0] != coords[-1]:
161
  coords.append(coords[0])
162
-
163
  if len(coords) >= 4:
164
  west, south, east, north = bounds
165
  meters_per_lng = 111320 * np.cos(np.radians((north + south) / 2))
@@ -167,66 +361,42 @@ def mask_to_polygons(mask, bounds, img_width, img_height):
167
  pixel_width_m = (east - west) * meters_per_lng / img_width
168
  pixel_height_m = (north - south) * meters_per_lat / img_height
169
  area_sqm = area * pixel_width_m * pixel_height_m
170
-
171
- feature = {
172
- "type": "Feature",
173
- "properties": {
174
- "roof_id": i + 1,
175
- "area_sqm": round(area_sqm, 2),
176
- "area_sqft": round(area_sqm * 10.764, 2),
177
- "num_vertices": len(coords) - 1
178
- },
179
- "geometry": {
180
- "type": "Polygon",
181
- "coordinates": [coords]
 
 
 
182
  }
183
- }
184
- features.append(feature)
185
-
186
  return features
187
 
188
 
189
- def segment_image(image, num_segments):
190
- """Run DINOv3 segmentation on image."""
191
- original_size = image.size
192
-
193
- features = extract_features(image)
194
-
195
- num_patches = features.shape[1]
196
- h = w = int(np.sqrt(num_patches))
197
-
198
- feat_np = features.squeeze(0).cpu().numpy()
199
-
200
- pca = PCA(n_components=64, random_state=42)
201
- feat_reduced = pca.fit_transform(feat_np)
202
-
203
- kmeans = KMeans(n_clusters=num_segments, random_state=42, n_init=10)
204
- cluster_labels = kmeans.fit_predict(feat_reduced)
205
-
206
- seg_map = cluster_labels.reshape(h, w)
207
- seg_resized = np.array(
208
- Image.fromarray(seg_map.astype(np.uint8)).resize(
209
- original_size, resample=Image.NEAREST
210
- )
211
- )
212
-
213
- return seg_resized
214
-
215
 
216
- def process_address(address, num_segments, selected_clusters, min_area, radius_meters, api_key_input):
217
- """Main pipeline: address -> GeoJSON polygons."""
218
-
219
  api_key = api_key_input.strip() if api_key_input.strip() else GOOGLE_API_KEY
220
-
221
  if not api_key:
222
- return None, None, None, None, "❌ No API key provided. Enter your Google Solar API key."
223
-
224
  try:
225
  lat, lng, formatted_address = geocode_address(address, api_key)
226
  status = f"πŸ“ **{formatted_address}**\n\nCoordinates: {lat:.6f}, {lng:.6f}\n\n"
227
  except Exception as e:
228
- return None, None, None, None, f"❌ Geocoding failed: {str(e)}"
229
-
230
  try:
231
  status += "Fetching satellite imagery...\n"
232
  geotiff_bytes, layers_info = fetch_geotiff(lat, lng, api_key, radius_meters)
@@ -234,52 +404,66 @@ def process_address(address, num_segments, selected_clusters, min_area, radius_m
234
  img_width, img_height = image.size
235
  status += f"Image size: {img_width}x{img_height}px\n\n"
236
  except Exception as e:
237
- return None, None, None, None, f"❌ Failed to fetch imagery: {str(e)}"
238
-
239
  try:
240
- seg_resized = segment_image(image, num_segments)
241
-
 
 
 
 
 
 
 
242
  colors = np.array([
243
  [230, 25, 75], [60, 180, 75], [255, 225, 25], [0, 130, 200],
244
  [245, 130, 48], [145, 30, 180], [70, 240, 240], [240, 50, 230],
245
- [210, 245, 60], [250, 190, 212], [128, 128, 0], [0, 128, 128]
 
246
  ])
247
-
248
  colored_seg = colors[seg_resized % len(colors)]
249
-
 
250
  try:
251
  roof_indices = [int(x.strip()) for x in selected_clusters.split(",") if x.strip()]
252
  except:
253
  roof_indices = [0]
254
-
255
  roof_mask = np.isin(seg_resized, roof_indices).astype(np.uint8) * 255
256
-
257
- kernel = np.ones((5, 5), np.uint8)
 
258
  roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_CLOSE, kernel)
259
  roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_OPEN, kernel)
260
-
261
- polygon_features = mask_to_polygons(roof_mask, bounds, img_width, img_height)
262
- polygon_features = [f for f in polygon_features if f["properties"]["area_sqft"] >= min_area]
263
-
264
  geojson = {
265
  "type": "FeatureCollection",
266
  "properties": {
267
- "source": "DINOv3 Roof Segmentation",
268
  "address": formatted_address,
269
  "center": {"lat": lat, "lng": lng},
270
  "bounds": {
271
  "north": bounds[3], "south": bounds[1],
272
  "east": bounds[2], "west": bounds[0]
273
- }
 
274
  },
275
  "features": polygon_features
276
  }
277
-
278
  geojson_str = json.dumps(geojson, indent=2)
279
-
 
280
  orig_array = np.array(image).astype(np.float32)
281
- overlay = orig_array * 0.4 + colored_seg.astype(np.float32) * 0.6
282
-
 
 
 
283
  for feature in polygon_features:
284
  coords = feature["geometry"]["coordinates"][0]
285
  pixel_coords = []
@@ -287,57 +471,66 @@ def process_address(address, num_segments, selected_clusters, min_area, radius_m
287
  px = int((lnglat[0] - bounds[0]) / (bounds[2] - bounds[0]) * img_width)
288
  py = int((bounds[3] - lnglat[1]) / (bounds[3] - bounds[1]) * img_height)
289
  pixel_coords.append([px, py])
290
-
291
  pts = np.array(pixel_coords, dtype=np.int32)
292
- cv2.polylines(overlay, [pts], True, (255, 255, 0), 3)
293
-
 
294
  for idx in roof_indices:
295
  mask_highlight = seg_resized == idx
296
- overlay[mask_highlight] = orig_array[mask_highlight] * 0.3 + np.array([255, 50, 50]) * 0.7
297
-
 
 
 
 
298
  total_sqft = sum(f["properties"]["area_sqft"] for f in polygon_features)
299
- status += f"**Found {len(polygon_features)} roof polygon(s)**\n"
300
  status += f"**Total roof area: {total_sqft:,.0f} sq ft**\n\n"
301
-
302
  for f in polygon_features:
303
  props = f["properties"]
304
- status += f"- Roof {props['roof_id']}: {props['area_sqft']:,.0f} sq ft\n"
305
-
306
- status += "\n**Cluster Distribution:**\n"
 
 
307
  unique, counts = np.unique(seg_resized, return_counts=True)
308
  total = seg_resized.size
309
- for u, c in sorted(zip(unique, counts), key=lambda x: -x[1]):
 
310
  pct = (c / total) * 100
311
- marker = " ← ROOF" if u in roof_indices else ""
312
- status += f"- Cluster {u}: {pct:.1f}%{marker}\n"
313
-
314
- return np.array(image), overlay.astype(np.uint8), roof_mask, geojson_str, status
315
-
 
316
  except Exception as e:
317
  import traceback
318
- return None, None, None, None, f"❌ Segmentation failed: {str(e)}\n\n{traceback.format_exc()}"
319
 
320
 
321
  def save_geojson(geojson_str):
322
  """Save GeoJSON for download."""
323
  if not geojson_str:
324
  return None
325
- filepath = "/tmp/roof_polygons.geojson"
326
  with open(filepath, "w") as f:
327
  f.write(geojson_str)
328
  return filepath
329
 
330
 
331
  # Gradio Interface
332
- with gr.Blocks(title="Roof Segmentation - Address to GeoJSON", theme=gr.themes.Soft()) as demo:
333
  gr.Markdown("""
334
- # 🏠 Address β†’ Roof Polygons (GeoJSON)
335
-
336
- Enter an address, get roof segment polygons with real-world coordinates.
337
-
338
- **Pipeline:** Address β†’ Google Solar API (GeoTIFF) β†’ DINOv3 Segmentation β†’ GeoJSON
339
  """)
340
-
341
  with gr.Row():
342
  with gr.Column(scale=1):
343
  address_input = gr.Textbox(
@@ -345,74 +538,104 @@ with gr.Blocks(title="Roof Segmentation - Address to GeoJSON", theme=gr.themes.S
345
  placeholder="123 Main St, Sacramento, CA",
346
  lines=2
347
  )
348
-
349
  with gr.Accordion("πŸ”‘ API Key", open=False):
350
  api_key_input = gr.Textbox(
351
  label="Google Solar API Key",
352
  placeholder="Enter API key (or set GOOGLE_API_KEY secret)",
353
  type="password"
354
  )
355
-
356
- with gr.Accordion("βš™οΈ Settings", open=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  radius_meters = gr.Slider(
358
  25, 100, value=50, step=5,
359
  label="Image Radius (meters)",
360
  info="Area around the address to capture"
361
  )
362
- num_segments = gr.Slider(3, 12, value=6, step=1, label="Segments")
363
  selected_clusters = gr.Textbox(
364
- value="0",
365
- label="Roof Cluster(s)",
366
- placeholder="0,2,5"
 
367
  )
 
368
  min_area = gr.Slider(
369
- 50, 2000, value=200, step=50,
370
- label="Min Roof Area (sq ft)"
 
371
  )
372
-
373
- process_btn = gr.Button("πŸ” Extract Roof Polygons", variant="primary", size="lg")
374
-
375
  with gr.Column(scale=2):
376
  with gr.Row():
377
- original_img = gr.Image(label="Satellite Image")
378
- overlay_img = gr.Image(label="Segmentation + Polygons")
379
-
380
  with gr.Row():
381
- mask_img = gr.Image(label="Roof Mask")
382
- status_output = gr.Markdown()
383
-
 
 
384
  with gr.Accordion("πŸ“„ GeoJSON Output", open=True):
385
  geojson_output = gr.Code(language="json", lines=12)
386
  download_btn = gr.Button("⬇️ Download GeoJSON")
387
  download_file = gr.File(label="Download")
388
-
389
  process_btn.click(
390
  fn=process_address,
391
- inputs=[address_input, num_segments, selected_clusters, min_area, radius_meters, api_key_input],
392
- outputs=[original_img, overlay_img, mask_img, geojson_output, status_output]
 
393
  )
394
-
395
  download_btn.click(
396
  fn=save_geojson,
397
  inputs=[geojson_output],
398
  outputs=[download_file]
399
  )
400
-
401
  gr.Markdown("""
402
  ---
403
- ### How to Use
404
- 1. Enter a US property address
405
- 2. Click **Extract Roof Polygons**
406
- 3. Review the segmentation - identify which cluster colors are roofs
407
- 4. Enter roof cluster numbers and re-run if needed
408
- 5. Download GeoJSON for your workflow
409
-
410
- ### Requirements
411
- - Google Cloud project with **Solar API** and **Geocoding API** enabled
412
- - API key with access to both APIs
413
-
 
 
 
 
 
 
 
 
 
 
 
414
  ---
415
- *Powered by DINOv3 (SAT-493M) + Google Solar API*
416
  """)
417
 
418
- demo.launch()
 
5
  from transformers import AutoImageProcessor, AutoModel
6
  from sklearn.cluster import KMeans
7
  from sklearn.decomposition import PCA
8
+ from skimage.segmentation import slic, felzenszwalb, watershed
9
+ from skimage.feature import canny
10
+ from skimage.morphology import dilation, erosion, square
11
+ from skimage.filters import sobel
12
+ from scipy import ndimage
13
  import cv2
14
  import json
15
  import requests
 
24
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
25
 
26
  # DINOv3 Model - Satellite pretrained
27
+ MODEL_NAME = "facebook/dinov2-large" # Using DINOv2 for better compatibility
28
  print(f"Loading {MODEL_NAME}...")
29
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
  processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
31
+ model = AutoModel.from_pretrained(MODEL_NAME, output_hidden_states=True).to(device)
32
  model.eval()
33
  print(f"Model loaded on {device}")
34
 
 
40
  "address": address,
41
  "key": api_key
42
  }
43
+
44
  response = requests.get(url, params=params)
45
  data = response.json()
46
+
47
  if data["status"] != "OK":
48
  raise ValueError(f"Geocoding failed: {data['status']}")
49
+
50
  location = data["results"][0]["geometry"]["location"]
51
  formatted_address = data["results"][0]["formatted_address"]
52
+
53
  return location["lat"], location["lng"], formatted_address
54
 
55
 
56
  def fetch_geotiff(lat, lng, api_key, radius_meters=50):
57
  """Fetch RGB GeoTIFF from Google Solar API Data Layers."""
58
+
59
  layers_url = "https://solar.googleapis.com/v1/dataLayers:get"
60
  params = {
61
  "location.latitude": lat,
 
66
  "pixelSizeMeters": 0.25,
67
  "key": api_key
68
  }
69
+
70
  response = requests.get(layers_url, params=params)
71
+
72
  if response.status_code != 200:
73
  params["requiredQuality"] = "MEDIUM"
74
  response = requests.get(layers_url, params=params)
75
+
76
  if response.status_code != 200:
77
  raise ValueError(f"Data Layers API error: {response.status_code} - {response.text}")
78
+
79
  layers = response.json()
80
+
81
  rgb_url = layers.get("rgbUrl")
82
  if not rgb_url:
83
  raise ValueError("No RGB imagery available for this location")
84
+
85
  rgb_response = requests.get(f"{rgb_url}&key={api_key}")
86
  if rgb_response.status_code != 200:
87
  raise ValueError(f"Failed to download GeoTIFF: {rgb_response.status_code}")
88
+
89
  return rgb_response.content, layers
90
 
91
 
92
  def parse_geotiff(geotiff_bytes):
93
  """Parse GeoTIFF and extract image + bounds."""
94
+
95
  with rasterio.open(io.BytesIO(geotiff_bytes)) as src:
96
  if src.count >= 3:
97
  r = src.read(1)
 
101
  else:
102
  img_array = src.read(1)
103
  img_array = np.stack([img_array] * 3, axis=-1)
104
+
105
  bounds = src.bounds
106
  crs = src.crs
107
+
108
  if crs and crs != CRS.from_epsg(4326):
109
  from rasterio.warp import transform_bounds
110
  bounds = transform_bounds(crs, CRS.from_epsg(4326), *bounds)
111
+
112
  image = Image.fromarray(img_array.astype(np.uint8))
113
  return image, bounds
114
 
115
 
116
+ def extract_multiscale_features(image, target_size=518):
117
+ """Extract multi-layer DINOv3 features for better roof plane detection."""
118
+ # Resize to higher resolution for better detail
119
+ original_size = image.size
120
+ image_resized = image.resize((target_size, target_size), Image.Resampling.BICUBIC)
121
+
122
+ inputs = processor(images=image_resized, return_tensors="pt").to(device)
123
+
124
  with torch.inference_mode():
125
  outputs = model(**inputs)
126
+
127
+ # Extract features from multiple layers (early + late)
128
+ # DINOv2-large has 24 layers
129
+ hidden_states = outputs.hidden_states
130
+
131
+ # Use layers at different depths for multi-scale features
132
+ layer_indices = [6, 12, 18, 24] # Early, mid, mid-late, final
133
+ features_list = []
134
+
135
+ for idx in layer_indices:
136
+ if idx <= len(hidden_states):
137
+ # Skip CLS token (first token)
138
+ layer_features = hidden_states[idx - 1][:, 1:, :]
139
+ features_list.append(layer_features)
140
+
141
+ # Concatenate multi-scale features
142
+ combined_features = torch.cat(features_list, dim=-1)
143
+
144
+ return combined_features, image_resized
145
+
146
+
147
+ def detect_edges(image_array):
148
+ """Detect edges using multiple methods for robust boundary detection."""
149
+ # Convert to grayscale
150
+ gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
151
+
152
+ # Sobel edge detection
153
+ sobel_edges = sobel(gray)
154
+
155
+ # Canny edge detection
156
+ canny_edges = canny(gray, sigma=1.5)
157
+
158
+ # Combine edges
159
+ combined_edges = (sobel_edges > 0.1) | canny_edges
160
+
161
+ # Dilate edges slightly to ensure they separate regions
162
+ combined_edges = dilation(combined_edges, square(2))
163
+
164
+ return combined_edges.astype(np.uint8)
165
+
166
+
167
+ def segment_roof_planes(image, method="slic", n_segments=100, edge_weight=10.0):
168
+ """
169
+ Advanced segmentation to detect individual roof planes.
170
+
171
+ Args:
172
+ image: PIL Image
173
+ method: 'slic', 'felzenszwalb', or 'watershed'
174
+ n_segments: number of initial superpixels
175
+ edge_weight: importance of edges in segmentation
176
+ """
177
+ img_array = np.array(image)
178
+ original_size = image.size
179
+
180
+ # Extract multi-scale DINOv3 features
181
+ features, resized_image = extract_multiscale_features(image, target_size=518)
182
+
183
+ num_patches = features.shape[1]
184
+ h = w = int(np.sqrt(num_patches))
185
+
186
+ feat_np = features.squeeze(0).cpu().numpy()
187
+
188
+ # Reduce dimensionality but keep more components for detail
189
+ n_components = min(128, feat_np.shape[1] - 1)
190
+ pca = PCA(n_components=n_components, random_state=42)
191
+ feat_reduced = pca.fit_transform(feat_np)
192
+
193
+ # Reshape to spatial grid
194
+ feat_spatial = feat_reduced.reshape(h, w, -1)
195
+
196
+ # Upsample features to image resolution using bicubic interpolation
197
+ feat_upsampled = np.zeros((original_size[1], original_size[0], n_components))
198
+ for i in range(n_components):
199
+ feat_upsampled[:, :, i] = cv2.resize(
200
+ feat_spatial[:, :, i],
201
+ (original_size[0], original_size[1]),
202
+ interpolation=cv2.INTER_CUBIC
203
+ )
204
+
205
+ # Detect edges
206
+ edges = detect_edges(img_array)
207
+
208
+ # Create edge-weighted feature representation
209
+ # This makes the segmentation respect edge boundaries
210
+ edge_mask = edges > 0
211
+
212
+ if method == "slic":
213
+ # SLIC superpixels - good for uniform regions
214
+ segments = slic(
215
+ img_array,
216
+ n_segments=n_segments,
217
+ compactness=10.0,
218
+ sigma=1,
219
+ start_label=0,
220
+ channel_axis=-1
221
+ )
222
+
223
+ elif method == "felzenszwalb":
224
+ # Felzenszwalb - good for preserving boundaries
225
+ segments = felzenszwalb(
226
+ img_array,
227
+ scale=100,
228
+ sigma=0.5,
229
+ min_size=50
230
+ )
231
+
232
+ elif method == "watershed":
233
+ # Watershed from edges - best for roof planes with clear ridges
234
+ # Use distance transform from edges
235
+ distance = ndimage.distance_transform_edt(~edge_mask)
236
+
237
+ # Find local maxima as markers
238
+ from skimage.feature import peak_local_max
239
+ local_max = peak_local_max(
240
+ distance,
241
+ min_distance=20,
242
+ labels=~edge_mask
243
+ )
244
+ markers = np.zeros_like(distance, dtype=int)
245
+ markers[tuple(local_max.T)] = np.arange(1, len(local_max) + 1)
246
+
247
+ # Watershed segmentation
248
+ segments = watershed(-distance, markers, mask=~edge_mask)
249
+
250
+ else:
251
+ # Fallback to SLIC
252
+ segments = slic(img_array, n_segments=n_segments, compactness=10.0)
253
+
254
+ # Refine segments using features
255
+ # Merge similar adjacent segments based on DINOv3 features
256
+ segments_refined = refine_segments_with_features(
257
+ segments, feat_upsampled, similarity_threshold=0.85
258
+ )
259
+
260
+ return segments_refined, img_array, edges
261
+
262
+
263
+ def refine_segments_with_features(segments, features, similarity_threshold=0.85):
264
+ """Merge similar adjacent segments based on feature similarity."""
265
+ from scipy.ndimage import generic_filter
266
+
267
+ unique_segments = np.unique(segments)
268
+
269
+ # Compute mean feature vector for each segment
270
+ segment_features = {}
271
+ for seg_id in unique_segments:
272
+ mask = segments == seg_id
273
+ if mask.sum() > 0:
274
+ mean_feat = features[mask].mean(axis=0)
275
+ # Normalize
276
+ mean_feat = mean_feat / (np.linalg.norm(mean_feat) + 1e-8)
277
+ segment_features[seg_id] = mean_feat
278
+
279
+ # Build adjacency and merge similar segments
280
+ merged_segments = segments.copy()
281
+ merge_map = {i: i for i in unique_segments}
282
+
283
+ # Find adjacent segments
284
+ from scipy.ndimage import find_objects
285
+ for seg_id in unique_segments:
286
+ if seg_id == 0:
287
+ continue
288
+
289
+ mask = segments == seg_id
290
+ dilated = dilation(mask, square(3))
291
+ neighbors = np.unique(segments[dilated & ~mask])
292
+
293
+ for neighbor_id in neighbors:
294
+ if neighbor_id == 0 or neighbor_id == seg_id:
295
+ continue
296
+
297
+ # Compare feature similarity
298
+ feat_a = segment_features.get(seg_id)
299
+ feat_b = segment_features.get(neighbor_id)
300
+
301
+ if feat_a is not None and feat_b is not None:
302
+ similarity = np.dot(feat_a, feat_b)
303
+
304
+ if similarity > similarity_threshold:
305
+ # Merge segments
306
+ merged_segments[merged_segments == neighbor_id] = seg_id
307
+
308
+ # Relabel sequentially
309
+ unique_merged = np.unique(merged_segments)
310
+ for new_id, old_id in enumerate(unique_merged):
311
+ merged_segments[merged_segments == old_id] = new_id
312
+
313
+ return merged_segments
314
 
315
 
316
  def pixel_to_geo(x, y, img_width, img_height, bounds):
317
  """Convert pixel coordinates to geographic coordinates."""
318
  west, south, east, north = bounds
319
+
320
  x_norm = x / img_width
321
  y_norm = y / img_height
322
+
323
  lng = west + (east - west) * x_norm
324
  lat = north - (north - south) * y_norm
325
+
326
  return [lng, lat]
327
 
328
 
329
+ def mask_to_polygons(mask, bounds, img_width, img_height, min_area_sqft=50):
330
  """Convert binary mask to GeoJSON polygons."""
331
  features = []
332
+
333
  contours, _ = cv2.findContours(
334
+ mask.astype(np.uint8),
335
  cv2.RETR_EXTERNAL,
336
  cv2.CHAIN_APPROX_SIMPLE
337
  )
338
+
339
  for i, contour in enumerate(contours):
340
  area = cv2.contourArea(contour)
341
  if area < 100:
342
  continue
343
+
344
+ # Simplify with tighter epsilon for roof planes
345
+ epsilon = 0.008 * cv2.arcLength(contour, True)
346
  simplified = cv2.approxPolyDP(contour, epsilon, True)
347
+
348
  coords = []
349
  for point in simplified:
350
  px, py = point[0]
351
  geo_coord = pixel_to_geo(px, py, img_width, img_height, bounds)
352
  coords.append(geo_coord)
353
+
354
  if coords and coords[0] != coords[-1]:
355
  coords.append(coords[0])
356
+
357
  if len(coords) >= 4:
358
  west, south, east, north = bounds
359
  meters_per_lng = 111320 * np.cos(np.radians((north + south) / 2))
 
361
  pixel_width_m = (east - west) * meters_per_lng / img_width
362
  pixel_height_m = (north - south) * meters_per_lat / img_height
363
  area_sqm = area * pixel_width_m * pixel_height_m
364
+ area_sqft = area_sqm * 10.764
365
+
366
+ if area_sqft >= min_area_sqft:
367
+ feature = {
368
+ "type": "Feature",
369
+ "properties": {
370
+ "roof_plane_id": i + 1,
371
+ "area_sqm": round(area_sqm, 2),
372
+ "area_sqft": round(area_sqft, 2),
373
+ "num_vertices": len(coords) - 1
374
+ },
375
+ "geometry": {
376
+ "type": "Polygon",
377
+ "coordinates": [coords]
378
+ }
379
  }
380
+ features.append(feature)
381
+
 
382
  return features
383
 
384
 
385
+ def process_address(address, segmentation_method, n_segments, selected_clusters,
386
+ min_area, radius_meters, api_key_input):
387
+ """Main pipeline: address -> roof plane GeoJSON polygons."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
 
 
 
389
  api_key = api_key_input.strip() if api_key_input.strip() else GOOGLE_API_KEY
390
+
391
  if not api_key:
392
+ return None, None, None, None, None, "❌ No API key provided. Enter your Google Solar API key."
393
+
394
  try:
395
  lat, lng, formatted_address = geocode_address(address, api_key)
396
  status = f"πŸ“ **{formatted_address}**\n\nCoordinates: {lat:.6f}, {lng:.6f}\n\n"
397
  except Exception as e:
398
+ return None, None, None, None, None, f"❌ Geocoding failed: {str(e)}"
399
+
400
  try:
401
  status += "Fetching satellite imagery...\n"
402
  geotiff_bytes, layers_info = fetch_geotiff(lat, lng, api_key, radius_meters)
 
404
  img_width, img_height = image.size
405
  status += f"Image size: {img_width}x{img_height}px\n\n"
406
  except Exception as e:
407
+ return None, None, None, None, None, f"❌ Failed to fetch imagery: {str(e)}"
408
+
409
  try:
410
+ status += f"Running {segmentation_method.upper()} segmentation...\n"
411
+
412
+ seg_resized, img_array, edges = segment_roof_planes(
413
+ image,
414
+ method=segmentation_method,
415
+ n_segments=int(n_segments)
416
+ )
417
+
418
+ # Visualize segmentation
419
  colors = np.array([
420
  [230, 25, 75], [60, 180, 75], [255, 225, 25], [0, 130, 200],
421
  [245, 130, 48], [145, 30, 180], [70, 240, 240], [240, 50, 230],
422
+ [210, 245, 60], [250, 190, 212], [128, 128, 0], [0, 128, 128],
423
+ [170, 110, 40], [128, 0, 0], [0, 0, 128], [255, 178, 102]
424
  ])
425
+
426
  colored_seg = colors[seg_resized % len(colors)]
427
+
428
+ # Parse selected roof plane clusters
429
  try:
430
  roof_indices = [int(x.strip()) for x in selected_clusters.split(",") if x.strip()]
431
  except:
432
  roof_indices = [0]
433
+
434
  roof_mask = np.isin(seg_resized, roof_indices).astype(np.uint8) * 255
435
+
436
+ # Morphological refinement
437
+ kernel = np.ones((3, 3), np.uint8)
438
  roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_CLOSE, kernel)
439
  roof_mask = cv2.morphologyEx(roof_mask, cv2.MORPH_OPEN, kernel)
440
+
441
+ polygon_features = mask_to_polygons(roof_mask, bounds, img_width, img_height, min_area)
442
+
 
443
  geojson = {
444
  "type": "FeatureCollection",
445
  "properties": {
446
+ "source": "DINOv2 Multi-Scale Roof Plane Segmentation",
447
  "address": formatted_address,
448
  "center": {"lat": lat, "lng": lng},
449
  "bounds": {
450
  "north": bounds[3], "south": bounds[1],
451
  "east": bounds[2], "west": bounds[0]
452
+ },
453
+ "segmentation_method": segmentation_method
454
  },
455
  "features": polygon_features
456
  }
457
+
458
  geojson_str = json.dumps(geojson, indent=2)
459
+
460
+ # Create visualizations
461
  orig_array = np.array(image).astype(np.float32)
462
+
463
+ # Segmentation overlay
464
+ overlay = orig_array * 0.5 + colored_seg.astype(np.float32) * 0.5
465
+
466
+ # Draw polygon boundaries
467
  for feature in polygon_features:
468
  coords = feature["geometry"]["coordinates"][0]
469
  pixel_coords = []
 
471
  px = int((lnglat[0] - bounds[0]) / (bounds[2] - bounds[0]) * img_width)
472
  py = int((bounds[3] - lnglat[1]) / (bounds[3] - bounds[1]) * img_height)
473
  pixel_coords.append([px, py])
474
+
475
  pts = np.array(pixel_coords, dtype=np.int32)
476
+ cv2.polylines(overlay, [pts], True, (255, 255, 0), 2)
477
+
478
+ # Highlight selected roof planes
479
  for idx in roof_indices:
480
  mask_highlight = seg_resized == idx
481
+ overlay[mask_highlight] = orig_array[mask_highlight] * 0.4 + np.array([255, 100, 100]) * 0.6
482
+
483
+ # Edge visualization
484
+ edge_viz = orig_array.copy()
485
+ edge_viz[edges > 0] = [255, 0, 0] # Red edges
486
+
487
  total_sqft = sum(f["properties"]["area_sqft"] for f in polygon_features)
488
+ status += f"\n**Found {len(polygon_features)} roof plane polygon(s)**\n"
489
  status += f"**Total roof area: {total_sqft:,.0f} sq ft**\n\n"
490
+
491
  for f in polygon_features:
492
  props = f["properties"]
493
+ status += f"- Plane {props['roof_plane_id']}: {props['area_sqft']:,.0f} sq ft ({props['num_vertices']} vertices)\n"
494
+
495
+ status += f"\n**Segmentation Stats:**\n"
496
+ status += f"- Method: {segmentation_method.upper()}\n"
497
+ status += f"- Total segments: {len(np.unique(seg_resized))}\n"
498
  unique, counts = np.unique(seg_resized, return_counts=True)
499
  total = seg_resized.size
500
+ status += f"\n**Top 10 Segments by Area:**\n"
501
+ for u, c in sorted(zip(unique, counts), key=lambda x: -x[1])[:10]:
502
  pct = (c / total) * 100
503
+ marker = " ← ROOF PLANE" if u in roof_indices else ""
504
+ status += f"- Segment {u}: {pct:.1f}%{marker}\n"
505
+
506
+ return (np.array(image), overlay.astype(np.uint8), edge_viz.astype(np.uint8),
507
+ roof_mask, geojson_str, status)
508
+
509
  except Exception as e:
510
  import traceback
511
+ return None, None, None, None, None, f"❌ Segmentation failed: {str(e)}\n\n{traceback.format_exc()}"
512
 
513
 
514
  def save_geojson(geojson_str):
515
  """Save GeoJSON for download."""
516
  if not geojson_str:
517
  return None
518
+ filepath = "/tmp/roof_planes.geojson"
519
  with open(filepath, "w") as f:
520
  f.write(geojson_str)
521
  return filepath
522
 
523
 
524
  # Gradio Interface
525
+ with gr.Blocks(title="Roof Plane Segmentation - DINOv2", theme=gr.themes.Soft()) as demo:
526
  gr.Markdown("""
527
+ # 🏠 Advanced Roof Plane Segmentation
528
+
529
+ **Detects individual roof planes** (peaks, valleys, facets) using multi-scale DINOv2 features + edge-aware segmentation.
530
+
531
+ **Pipeline:** Address β†’ Google Solar API β†’ Multi-Layer DINOv2 β†’ Edge Detection β†’ Superpixel/Watershed β†’ GeoJSON
532
  """)
533
+
534
  with gr.Row():
535
  with gr.Column(scale=1):
536
  address_input = gr.Textbox(
 
538
  placeholder="123 Main St, Sacramento, CA",
539
  lines=2
540
  )
541
+
542
  with gr.Accordion("πŸ”‘ API Key", open=False):
543
  api_key_input = gr.Textbox(
544
  label="Google Solar API Key",
545
  placeholder="Enter API key (or set GOOGLE_API_KEY secret)",
546
  type="password"
547
  )
548
+
549
+ with gr.Accordion("βš™οΈ Segmentation Settings", open=True):
550
+ segmentation_method = gr.Radio(
551
+ choices=["slic", "watershed", "felzenszwalb"],
552
+ value="watershed",
553
+ label="Segmentation Algorithm",
554
+ info="Watershed best for roof ridges/valleys"
555
+ )
556
+
557
+ n_segments = gr.Slider(
558
+ 50, 200, value=100, step=10,
559
+ label="Initial Segments",
560
+ info="Higher = finer detail (try 100-150 for roofs)"
561
+ )
562
+
563
  radius_meters = gr.Slider(
564
  25, 100, value=50, step=5,
565
  label="Image Radius (meters)",
566
  info="Area around the address to capture"
567
  )
568
+
569
  selected_clusters = gr.Textbox(
570
+ value="0",
571
+ label="Roof Plane Segment IDs",
572
+ placeholder="e.g., 0,2,5,8 (see Top Segments list)",
573
+ info="Comma-separated segment IDs to include as roof planes"
574
  )
575
+
576
  min_area = gr.Slider(
577
+ 10, 500, value=50, step=10,
578
+ label="Min Roof Plane Area (sq ft)",
579
+ info="Filter out small segments"
580
  )
581
+
582
+ process_btn = gr.Button("πŸ” Extract Roof Planes", variant="primary", size="lg")
583
+
584
  with gr.Column(scale=2):
585
  with gr.Row():
586
+ original_img = gr.Image(label="Original Satellite Image")
587
+ overlay_img = gr.Image(label="Segmentation + Roof Polygons")
588
+
589
  with gr.Row():
590
+ edge_img = gr.Image(label="Detected Edges (Red)")
591
+ mask_img = gr.Image(label="Selected Roof Planes Mask")
592
+
593
+ status_output = gr.Markdown()
594
+
595
  with gr.Accordion("πŸ“„ GeoJSON Output", open=True):
596
  geojson_output = gr.Code(language="json", lines=12)
597
  download_btn = gr.Button("⬇️ Download GeoJSON")
598
  download_file = gr.File(label="Download")
599
+
600
  process_btn.click(
601
  fn=process_address,
602
+ inputs=[address_input, segmentation_method, n_segments, selected_clusters,
603
+ min_area, radius_meters, api_key_input],
604
+ outputs=[original_img, overlay_img, edge_img, mask_img, geojson_output, status_output]
605
  )
606
+
607
  download_btn.click(
608
  fn=save_geojson,
609
  inputs=[geojson_output],
610
  outputs=[download_file]
611
  )
612
+
613
  gr.Markdown("""
614
  ---
615
+ ### 🎯 How to Use for Roof Planes
616
+
617
+ 1. **Enter address** and click Extract
618
+ 2. **Review the segmentation** - each color is a potential roof plane
619
+ 3. **Check "Top 10 Segments"** in the output to identify which segments are roof planes
620
+ 4. **Enter those segment IDs** in "Roof Plane Segment IDs" (e.g., `0,2,5`)
621
+ 5. **Re-run** to get precise polygons for each roof facet
622
+ 6. **Download GeoJSON** with individual roof plane areas
623
+
624
+ ### πŸ”§ Algorithm Notes
625
+
626
+ - **SLIC**: Good for uniform roof planes, less sensitive to edges
627
+ - **Watershed**: Best for pitched roofs with clear ridges/valleys (RECOMMENDED)
628
+ - **Felzenszwalb**: Preserves fine boundaries, good for complex roofs
629
+
630
+ ### 🧠 Technical Details
631
+
632
+ - Multi-layer DINOv2 feature extraction (layers 6, 12, 18, 24)
633
+ - Edge detection via Sobel + Canny
634
+ - Feature-based segment merging
635
+ - High-resolution feature upsampling with bicubic interpolation
636
+
637
  ---
638
+ *Powered by DINOv2-Large + Edge-Aware Superpixel Segmentation*
639
  """)
640
 
641
+ demo.launch()
requirements.txt CHANGED
@@ -1,8 +1,10 @@
1
- transformers>=4.40.0
2
  gradio==3.50.2
3
  Pillow
4
  numpy
5
  scikit-learn
6
  opencv-python-headless
7
  requests
8
- rasterio
 
 
 
1
+ transformers>=4.56.0
2
  gradio==3.50.2
3
  Pillow
4
  numpy
5
  scikit-learn
6
  opencv-python-headless
7
  requests
8
+ rasterio
9
+ scikit-image
10
+ scipy