Deagin commited on
Commit
66bcd8c
·
1 Parent(s): ef1b4e9

DINOv3 satellite roof segmentation app

Browse files
Files changed (3) hide show
  1. README.md +11 -7
  2. app.py +188 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,13 +1,17 @@
1
  ---
2
- title: RoofSegmentation2
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: segmentation using dinov3
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Roof Segmentation DINOv3
3
+ emoji: 🛰️
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: other
11
+ models:
12
+ - facebook/dinov3-vitl16-pretrain-sat493m
13
  ---
14
 
15
+ # Roof Segmentation with DINOv3 Satellite
16
+
17
+ Segment roofs from satellite imagery using Meta's DINOv3 model pretrained on 493M satellite images.
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+ from PIL import Image
6
+ from transformers import AutoImageProcessor, AutoModel
7
+ from sklearn.cluster import KMeans
8
+ import warnings
9
+ warnings.filterwarnings("ignore")
10
+
11
+ # Model selection - ViT-L for satellite imagery
12
+ MODEL_NAME = "facebook/dinov3-vitl16-pretrain-sat493m"
13
+
14
+ print(f"Loading {MODEL_NAME}...")
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
17
+ model = AutoModel.from_pretrained(MODEL_NAME).to(device)
18
+ model.eval()
19
+ print(f"Model loaded on {device}")
20
+
21
+ def extract_features(image):
22
+ """Extract dense patch features from DINOv3."""
23
+ inputs = processor(images=image, return_tensors="pt").to(device)
24
+
25
+ with torch.inference_mode():
26
+ outputs = model(**inputs)
27
+ # DINOv3: 1 CLS + 4 register tokens + N patch tokens
28
+ # Skip first 5 tokens (CLS + 4 registers)
29
+ patch_features = outputs.last_hidden_state[:, 5:, :]
30
+
31
+ return patch_features
32
+
33
+ def segment_roof(image, num_segments=5, selected_clusters="0"):
34
+ """
35
+ Segment roofs using DINOv3 satellite features + K-means.
36
+
37
+ Args:
38
+ image: Input satellite image
39
+ num_segments: Number of K-means clusters
40
+ selected_clusters: Comma-separated cluster indices to highlight as roof
41
+ """
42
+ if image is None:
43
+ return None, None, "Please upload an image"
44
+
45
+ # Convert to PIL if needed
46
+ if isinstance(image, np.ndarray):
47
+ image = Image.fromarray(image).convert("RGB")
48
+
49
+ original_size = image.size # (W, H)
50
+
51
+ # Extract DINOv3 features
52
+ features = extract_features(image)
53
+
54
+ # Calculate spatial dimensions
55
+ # DINOv3 uses patch_size=16
56
+ num_patches = features.shape[1]
57
+ h = w = int(np.sqrt(num_patches))
58
+
59
+ # Reshape for clustering
60
+ feat_np = features.squeeze(0).cpu().numpy() # [num_patches, hidden_dim]
61
+
62
+ # PCA for dimensionality reduction (helps clustering)
63
+ from sklearn.decomposition import PCA
64
+ pca = PCA(n_components=64, random_state=42)
65
+ feat_reduced = pca.fit_transform(feat_np)
66
+
67
+ # K-means clustering
68
+ kmeans = KMeans(n_clusters=num_segments, random_state=42, n_init=10)
69
+ cluster_labels = kmeans.fit_predict(feat_reduced)
70
+
71
+ # Reshape to spatial grid
72
+ seg_map = cluster_labels.reshape(h, w)
73
+
74
+ # Upscale to original image size
75
+ seg_resized = np.array(
76
+ Image.fromarray(seg_map.astype(np.uint8)).resize(
77
+ original_size, resample=Image.NEAREST
78
+ )
79
+ )
80
+
81
+ # Color palette for visualization
82
+ colors = np.array([
83
+ [230, 25, 75], # Red
84
+ [60, 180, 75], # Green
85
+ [255, 225, 25], # Yellow
86
+ [0, 130, 200], # Blue
87
+ [245, 130, 48], # Orange
88
+ [145, 30, 180], # Purple
89
+ [70, 240, 240], # Cyan
90
+ [240, 50, 230], # Magenta
91
+ [210, 245, 60], # Lime
92
+ [250, 190, 212], # Pink
93
+ ])
94
+
95
+ # Create colored segmentation
96
+ colored_seg = colors[seg_resized % len(colors)]
97
+
98
+ # Parse selected clusters for roof mask
99
+ try:
100
+ roof_indices = [int(x.strip()) for x in selected_clusters.split(",") if x.strip()]
101
+ except:
102
+ roof_indices = [0]
103
+
104
+ # Create binary roof mask
105
+ roof_mask = np.isin(seg_resized, roof_indices).astype(np.uint8) * 255
106
+
107
+ # Create overlay visualization
108
+ orig_array = np.array(image).astype(np.float32)
109
+ overlay = orig_array * 0.4 + colored_seg.astype(np.float32) * 0.6
110
+
111
+ # Highlight selected roof clusters
112
+ for idx in roof_indices:
113
+ mask = seg_resized == idx
114
+ overlay[mask] = orig_array[mask] * 0.3 + np.array([255, 0, 0]) * 0.7
115
+
116
+ # Calculate cluster statistics
117
+ unique, counts = np.unique(seg_resized, return_counts=True)
118
+ total_pixels = seg_resized.size
119
+ stats = "**Cluster Statistics:**\n"
120
+ for u, c in sorted(zip(unique, counts), key=lambda x: -x[1]):
121
+ pct = (c / total_pixels) * 100
122
+ marker = " ← ROOF" if u in roof_indices else ""
123
+ stats += f"- Cluster {u}: {pct:.1f}%{marker}\n"
124
+
125
+ return overlay.astype(np.uint8), roof_mask, stats
126
+
127
+ # Gradio Interface
128
+ with gr.Blocks(title="Roof Segmentation - DINOv3 Satellite", theme=gr.themes.Soft()) as demo:
129
+ gr.Markdown("""
130
+ # 🛰️ Roof Segmentation with DINOv3 (Satellite)
131
+
132
+ Using Meta's **DINOv3 ViT-L** pretrained on **493M satellite images** at 0.6m resolution.
133
+
134
+ Upload a satellite/aerial image to detect and segment roof areas.
135
+ """)
136
+
137
+ with gr.Row():
138
+ with gr.Column(scale=1):
139
+ input_image = gr.Image(type="pil", label="📸 Upload Satellite Image")
140
+
141
+ with gr.Accordion("⚙️ Segmentation Settings", open=True):
142
+ num_segments = gr.Slider(
143
+ minimum=3, maximum=12, value=5, step=1,
144
+ label="Number of Segments",
145
+ info="More segments = finer detail"
146
+ )
147
+ selected_clusters = gr.Textbox(
148
+ value="0",
149
+ label="Roof Cluster(s)",
150
+ info="Enter cluster numbers separated by commas (e.g., '0,2')",
151
+ placeholder="0"
152
+ )
153
+
154
+ segment_btn = gr.Button("🔍 Segment Roofs", variant="primary", size="lg")
155
+
156
+ with gr.Column(scale=2):
157
+ with gr.Row():
158
+ output_overlay = gr.Image(label="Segmentation Overlay")
159
+ output_mask = gr.Image(label="Roof Mask (Binary)")
160
+
161
+ cluster_stats = gr.Markdown(label="Cluster Info")
162
+
163
+ segment_btn.click(
164
+ fn=segment_roof,
165
+ inputs=[input_image, num_segments, selected_clusters],
166
+ outputs=[output_overlay, output_mask, cluster_stats]
167
+ )
168
+
169
+ gr.Markdown("""
170
+ ---
171
+ ### How to Use
172
+ 1. Upload a satellite or aerial image of buildings
173
+ 2. Click **Segment Roofs** to analyze
174
+ 3. Look at the colored overlay - each color is a different segment
175
+ 4. Find which cluster number(s) correspond to roofs (shown in stats)
176
+ 5. Enter those numbers in **Roof Cluster(s)** and re-run
177
+ 6. Download the binary mask for your workflow
178
+
179
+ ### Tips
180
+ - **Roofs** often cluster together due to similar materials/colors
181
+ - Try **5-7 segments** for typical suburban imagery
182
+ - Multiple buildings? Select multiple clusters: `0,3,5`
183
+
184
+ ---
185
+ *Powered by [DINOv3](https://github.com/facebookresearch/dinov3) pretrained on SAT-493M*
186
+ """)
187
+
188
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers>=4.40.0
3
+ gradio>=4.0.0
4
+ Pillow
5
+ numpy
6
+ scikit-learn