muk42 commited on
Commit
16a059c
·
1 Parent(s): 2276a97
annotation_tab/annotation_setup.py CHANGED
@@ -4,6 +4,9 @@ from .annotation_logic import (
4
  get_current_image_path, get_annotation_for_image, get_current_annotations_path,refresh_image_list
5
  )
6
 
 
 
 
7
  def get_annotation_widgets():
8
  message = gr.Markdown("", visible=False)
9
  image_path_display = gr.Markdown(value=get_current_image_path() or "No image loaded", elem_id="image_path")
 
4
  get_current_image_path, get_annotation_for_image, get_current_annotations_path,refresh_image_list
5
  )
6
 
7
+
8
+
9
+
10
  def get_annotation_widgets():
11
  message = gr.Markdown("", visible=False)
12
  image_path_display = gr.Markdown(value=get_current_image_path() or "No image loaded", elem_id="image_path")
app.py CHANGED
@@ -1,11 +1,11 @@
1
  # [DEBUG]
2
- #from osgeo import gdal
3
-
4
 
5
  import gradio as gr
6
  import logging
7
- from inference_tab import get_inference_widgets, run_inference
8
  from annotation_tab import get_annotation_widgets
 
9
 
10
  # setup logging
11
  logging.basicConfig(level=logging.DEBUG)
@@ -13,15 +13,17 @@ logging.basicConfig(level=logging.DEBUG)
13
 
14
  with gr.Blocks() as demo:
15
  with gr.Tab("Inference"):
16
- get_inference_widgets(run_inference)
17
  with gr.Tab("Annotation"):
18
  get_annotation_widgets()
 
 
19
 
20
  # [DEBUG]
21
- #demo.launch(inbrowser=True)
22
 
23
  # [PROD]
24
- demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)
25
 
26
 
27
 
 
1
  # [DEBUG]
2
+ from osgeo import gdal
 
3
 
4
  import gradio as gr
5
  import logging
6
+ from inference_tab import get_inference_widgets, run_inference,georefImg
7
  from annotation_tab import get_annotation_widgets
8
+ from map_tab import get_map_widgets
9
 
10
  # setup logging
11
  logging.basicConfig(level=logging.DEBUG)
 
13
 
14
  with gr.Blocks() as demo:
15
  with gr.Tab("Inference"):
16
+ get_inference_widgets(run_inference,georefImg)
17
  with gr.Tab("Annotation"):
18
  get_annotation_widgets()
19
+ with gr.Tab("Map"):
20
+ get_map_widgets()
21
 
22
  # [DEBUG]
23
+ demo.launch(inbrowser=True)
24
 
25
  # [PROD]
26
+ #demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)
27
 
28
 
29
 
inference_tab/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
  from .inference_setup import get_inference_widgets
2
- from .inference_logic import run_inference
3
 
4
- __all__ = ["get_inference_widgets", "run_inference"]
 
 
1
  from .inference_setup import get_inference_widgets
2
+ from .inference_logic import run_inference,georefImg
3
 
4
+
5
+ __all__ = ["get_inference_widgets", "run_inference","georefImg"]
inference_tab/helpers.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics import SAM
2
+ import cv2
3
+ from shapely.geometry import shape
4
+ from rapidfuzz import process, fuzz
5
+ from huggingface_hub import hf_hub_download
6
+ from config import OUTPUT_DIR
7
+ from pathlib import Path
8
+ from PIL import Image
9
+ import spaces
10
+ import numpy as np
11
+ import os
12
+ import json
13
+ from PIL import Image
14
+
15
+
16
+ def box_inside_global(box, global_box):
17
+ x1, y1, x2, y2 = box
18
+ gx1, gy1, gx2, gy2 = global_box
19
+ return (x1 >= gx1 and y1 >= gy1 and x2 <= gx2 and y2 <= gy2)
20
+
21
+ def nms_iou(box1, box2):
22
+ x1 = max(box1[0], box2[0])
23
+ y1 = max(box1[1], box2[1])
24
+ x2 = min(box1[2], box2[2])
25
+ y2 = min(box1[3], box2[3])
26
+
27
+ inter_area = max(0, x2 - x1) * max(0, y2 - y1)
28
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
29
+ box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
30
+ union_area = box1_area + box2_area - inter_area
31
+
32
+ return inter_area / union_area if union_area > 0 else 0
33
+
34
+ def non_max_suppression(boxes, scores, iou_threshold=0.5):
35
+ idxs = np.argsort(scores)[::-1]
36
+ keep = []
37
+
38
+ while len(idxs) > 0:
39
+ current = idxs[0]
40
+ keep.append(current)
41
+ idxs = idxs[1:]
42
+ idxs = np.array([i for i in idxs if nms_iou(boxes[current], boxes[i]) < iou_threshold])
43
+
44
+ return keep
45
+
46
+
47
+
48
+ def tile_image_with_overlap(image_path, tile_size=1024, overlap=256):
49
+ """Tile image into overlapping RGB tiles."""
50
+ image = cv2.imread(image_path)
51
+ height, width, _ = image.shape
52
+
53
+ step = tile_size - overlap
54
+ tile_list = []
55
+ seen = set() # to avoid duplicates
56
+
57
+ for y in range(0, height, step):
58
+ if y + tile_size > height:
59
+ y = height - tile_size
60
+ for x in range(0, width, step):
61
+ if x + tile_size > width:
62
+ x = width - tile_size
63
+
64
+ # clamp to valid region
65
+ x_start = max(0, x)
66
+ y_start = max(0, y)
67
+ x_end = x_start + tile_size
68
+ y_end = y_start + tile_size
69
+
70
+ coords = (x_start, y_start)
71
+ if coords in seen: # skip duplicates
72
+ continue
73
+ seen.add(coords)
74
+
75
+ tile = image[y_start:y_end, x_start:x_end, :]
76
+ tile_list.append((tile, coords))
77
+
78
+ return tile_list, image.shape
79
+
80
+
81
+
82
+ def compute_iou(box1, box2):
83
+ """Compute Intersection over Union for two boxes."""
84
+ x1 = max(box1[0], box2[0])
85
+ y1 = max(box1[1], box2[1])
86
+ x2 = min(box1[2], box2[2])
87
+ y2 = min(box1[3], box2[3])
88
+
89
+ inter_area = max(0, x2 - x1) * max(0, y2 - y1)
90
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
91
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
92
+ union_area = area1 + area2 - inter_area
93
+
94
+ return inter_area / union_area if union_area > 0 else 0
95
+
96
+
97
+ def merge_boxes(boxes, iou_threshold=0.8):
98
+ """Merge overlapping boxes based on IoU."""
99
+ merged = []
100
+ used = [False] * len(boxes)
101
+
102
+ for i, box in enumerate(boxes):
103
+ if used[i]:
104
+ continue
105
+ group = [box]
106
+ used[i] = True
107
+ for j in range(i + 1, len(boxes)):
108
+ if used[j]:
109
+ continue
110
+ if compute_iou(box, boxes[j]) > iou_threshold:
111
+ group.append(boxes[j])
112
+ used[j] = True
113
+
114
+ # Merge group into one bounding box
115
+ x1 = min(b[0] for b in group)
116
+ y1 = min(b[1] for b in group)
117
+ x2 = max(b[2] for b in group)
118
+ y2 = max(b[3] for b in group)
119
+ merged.append([x1, y1, x2, y2])
120
+
121
+ return merged
122
+
123
+
124
+ def box_area(box):
125
+ return max(0, box[2] - box[0]) * max(0, box[3] - box[1])
126
+
127
+ def is_contained(box1, box2, containment_threshold=0.9):
128
+ # Check if box1 is mostly inside box2
129
+ x1 = max(box1[0], box2[0])
130
+ y1 = max(box1[1], box2[1])
131
+ x2 = min(box1[2], box2[2])
132
+ y2 = min(box1[3], box2[3])
133
+
134
+ inter_area = max(0, x2 - x1) * max(0, y2 - y1)
135
+ area1 = box_area(box1)
136
+ area2 = box_area(box2)
137
+
138
+ # If intersection covers most of smaller box area, consider contained
139
+ smaller_area = min(area1, area2)
140
+ if smaller_area == 0:
141
+ return False
142
+ return (inter_area / smaller_area) >= containment_threshold
143
+
144
+ def merge_boxes_iterative(boxes, iou_threshold=0.25, containment_threshold=0.75):
145
+ boxes = boxes.copy()
146
+ changed = True
147
+
148
+ while changed:
149
+ changed = False
150
+ merged = []
151
+ used = [False] * len(boxes)
152
+
153
+ for i, box in enumerate(boxes):
154
+ if used[i]:
155
+ continue
156
+ group = [box]
157
+ used[i] = True
158
+ for j in range(i + 1, len(boxes)):
159
+ if used[j]:
160
+ continue
161
+ iou = compute_iou(box, boxes[j])
162
+ contained = is_contained(box, boxes[j], containment_threshold)
163
+ if iou > iou_threshold or contained:
164
+ group.append(boxes[j])
165
+ used[j] = True
166
+
167
+ # Merge group into one bounding box
168
+ x1 = min(b[0] for b in group)
169
+ y1 = min(b[1] for b in group)
170
+ x2 = max(b[2] for b in group)
171
+ y2 = max(b[3] for b in group)
172
+ merged.append([x1, y1, x2, y2])
173
+
174
+ if len(merged) < len(boxes):
175
+ changed = True
176
+ boxes = merged
177
+
178
+ return boxes
179
+
180
+
181
+ def get_corner_points(box):
182
+ x1, y1, x2, y2 = box
183
+ return [
184
+ [x1, y1], # top-left
185
+ [x2, y1], # top-right
186
+ [x1, y2], # bottom-left
187
+ [x2, y2], # bottom-right
188
+ ]
189
+
190
+
191
+ def sample_negative_points_outside_boxes(mask, num_points):
192
+ points = []
193
+ tries = 0
194
+ max_tries = num_points * 20 # fail-safe to avoid infinite loops
195
+ while len(points) < num_points and tries < max_tries:
196
+ x = np.random.randint(0, mask.shape[1])
197
+ y = np.random.randint(0, mask.shape[0])
198
+ if not mask[y, x]:
199
+ points.append([x, y])
200
+ tries += 1
201
+ return np.array(points)
202
+
203
+ def get_inset_corner_points(box, margin=5):
204
+ x1, y1, x2, y2 = box
205
+
206
+ # Ensure box is large enough for the margin
207
+ x1i = min(x1 + margin, x2)
208
+ y1i = min(y1 + margin, y2)
209
+ x2i = max(x2 - margin, x1)
210
+ y2i = max(y2 - margin, y1)
211
+
212
+ return [
213
+ [x1i, y1i], # top-left (inset)
214
+ [x2i, y1i], # top-right
215
+ [x1i, y2i], # bottom-left
216
+ [x2i, y2i], # bottom-right
217
+ ]
218
+
219
+
220
+ def processYOLOBoxes(iou):
221
+ # Load YOLO-predicted boxes
222
+ BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
223
+ with open(BOXES_PATH, "r") as f:
224
+ box_data = json.load(f)
225
+
226
+ # Non-max suppression
227
+ boxes = np.array([item["bbox"] for item in box_data])
228
+ scores = np.array([item["score"] for item in box_data])
229
+ # Run NMS
230
+ keep_indices = non_max_suppression(boxes, scores, iou)
231
+ # Filter data
232
+ box_data = [box_data[i] for i in keep_indices]
233
+ # Filter boxes inside global bbox (TBD)
234
+ #box_data = [entry for entry in box_data if box_inside_global(entry["bbox"], GLOBAL_BOX)]
235
+ boxes_full = [b["bbox"] for b in box_data] # Format: [x1, y1, x2, y2]
236
+ return boxes_full
237
+
238
+ def prepare_tiles(image_path, boxes_full, tile_size=1024, overlap=50, iou=0.5, c_th=0.75, edge_margin=10):
239
+ """
240
+ Tiles the image and prepares per-tile metadata including filtered boxes and point prompts.
241
+ Returns full image size H, W.
242
+ """
243
+ tiles, (H, W, _) = tile_image_with_overlap(image_path, tile_size, overlap)
244
+ os.makedirs("tmp/tiles", exist_ok=True)
245
+ meta = []
246
+
247
+ for idx, (tile_array, (x_offset, y_offset)) in enumerate(tiles):
248
+ tile_path = f"tmp/tiles/tile_{idx}.png"
249
+ tile_array = cv2.cvtColor(tile_array, cv2.COLOR_BGR2RGB)
250
+ Image.fromarray(tile_array).save(tile_path)
251
+
252
+ tile_h, tile_w, _ = tile_array.shape
253
+
254
+ # Select boxes overlapping this tile
255
+ candidate_boxes = []
256
+ for x1, y1, x2, y2 in boxes_full:
257
+ if (x2 > x_offset) and (x1 < x_offset + tile_w) and (y2 > y_offset) and (y1 < y_offset + tile_h):
258
+ candidate_boxes.append([x1, y1, x2, y2])
259
+
260
+ if not candidate_boxes:
261
+ meta.append({
262
+ "idx": idx,
263
+ "x_off": x_offset,
264
+ "y_off": y_offset,
265
+ "local_boxes": [],
266
+ "point_coords": [],
267
+ "point_labels": []
268
+ })
269
+ continue
270
+
271
+ # Merge overlapping boxes
272
+ merged_boxes = merge_boxes_iterative(candidate_boxes, iou_threshold=iou, containment_threshold=c_th)
273
+
274
+ # Adjust boxes to tile-local coordinates
275
+ local_boxes = []
276
+ for x1, y1, x2, y2 in merged_boxes:
277
+ new_x1 = max(0, x1 - x_offset)
278
+ new_y1 = max(0, y1 - y_offset)
279
+ new_x2 = min(tile_w, x2 - x_offset)
280
+ new_y2 = min(tile_h, y2 - y_offset)
281
+ local_boxes.append([new_x1, new_y1, new_x2, new_y2])
282
+
283
+ # Filter boxes too close to edges
284
+ filtered_local_boxes = []
285
+ for box in local_boxes:
286
+ x1, y1, x2, y2 = box
287
+ if (x1 > edge_margin and y1 > edge_margin and (tile_w - x2) > edge_margin and (tile_h - y2) > edge_margin):
288
+ filtered_local_boxes.append(box)
289
+
290
+ if not filtered_local_boxes:
291
+ meta.append({
292
+ "idx": idx,
293
+ "x_off": x_offset,
294
+ "y_off": y_offset,
295
+ "local_boxes": [],
296
+ "point_coords": [],
297
+ "point_labels": []
298
+ })
299
+ continue
300
+
301
+ # Compute point prompts
302
+ centroids = [((bx1 + bx2) / 2, (by1 + by2) / 2) for bx1, by1, bx2, by2 in filtered_local_boxes]
303
+ negative_points_per_box = [get_inset_corner_points(box, margin=2) for box in filtered_local_boxes]
304
+
305
+ point_coords = []
306
+ point_labels = []
307
+ for centroid, neg_points in zip(centroids, negative_points_per_box):
308
+ if not isinstance(neg_points, list):
309
+ neg_points = neg_points.tolist()
310
+ all_points = [centroid] + neg_points
311
+ all_labels = [1] + [0] * len(neg_points)
312
+ point_coords.append(all_points)
313
+ point_labels.append(all_labels)
314
+
315
+ meta.append({
316
+ "idx": idx,
317
+ "x_off": x_offset,
318
+ "y_off": y_offset,
319
+ "local_boxes": filtered_local_boxes,
320
+ "point_coords": point_coords,
321
+ "point_labels": point_labels
322
+ })
323
+
324
+ # Save metadata
325
+ os.makedirs("tmp", exist_ok=True)
326
+ with open("tmp/tiles_meta.json", "w") as f:
327
+ json.dump(meta, f)
328
+
329
+ return H, W
330
+
331
+
332
+
333
+
334
+ def merge_tile_masks(H, W):
335
+ """
336
+ Merge predicted tile masks into a full-size image.
337
+
338
+ Args:
339
+ H (int): full image height
340
+ W (int): full image width
341
+
342
+ Returns:
343
+ full_mask (np.ndarray): merged mask array
344
+ """
345
+ full_mask = np.zeros((H, W), dtype=np.uint16)
346
+ instance_id = 1
347
+
348
+ # Load tile metadata
349
+ with open("tmp/tiles_meta.json", "r") as f:
350
+ tiles_meta = json.load(f)
351
+
352
+ for tile in tiles_meta:
353
+ tile_idx = tile["idx"]
354
+ x_off = tile["x_off"]
355
+ y_off = tile["y_off"]
356
+
357
+ mask_path = f"tmp/masks/tile_{tile_idx}.npy"
358
+ if not Path(mask_path).exists():
359
+ continue
360
+
361
+ # Load tile masks (expected shape = (N, h, w))
362
+ tile_masks = np.load(mask_path)
363
+
364
+ if tile_masks.ndim == 2: # single mask saved as (h, w)
365
+ tile_masks = tile_masks[None, :, :] # make it (1, h, w)
366
+
367
+ for mask in tile_masks:
368
+ mask = mask.astype(bool)
369
+
370
+ # Pad mask to 1024x1024
371
+ pad_h = 1024 - mask.shape[0]
372
+ pad_w = 1024 - mask.shape[1]
373
+ if pad_h > 0 or pad_w > 0:
374
+ mask = np.pad(mask, ((0, pad_h), (0, pad_w)), mode='constant', constant_values=0)
375
+
376
+
377
+ h_end = y_off + mask.shape[0]
378
+ w_end = x_off + mask.shape[1]
379
+ region = full_mask[y_off:h_end, x_off:w_end]
380
+
381
+ region[mask & (region == 0)] = instance_id
382
+ instance_id += 1
383
+
384
+ # Save as TIFF
385
+ final_mask = Image.fromarray(full_mask)
386
+ MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
387
+ final_mask.save(MASK_PATH)
388
+
389
+
390
+
391
+
392
+ def chunkify(lst, n):
393
+ """Yield successive n-sized chunks from lst."""
394
+ for i in range(0, len(lst), n):
395
+ yield lst[i:i + n]
396
+
397
+
398
+
399
+
400
+
401
+
402
+ def img_shape(image_path):
403
+ img = cv2.imread(image_path)
404
+ return img.shape
405
+
406
+
407
+
408
+ def best_street_match(point, query_name, edges_gdf, max_distance=100):
409
+ buffer = point.buffer(max_distance)
410
+ nearby_edges = edges_gdf[edges_gdf.intersects(buffer)]
411
+
412
+ if nearby_edges.empty:
413
+ return None, 0
414
+
415
+ candidate_names = nearby_edges['name'].tolist()
416
+ best_match = process.extractOne(query_name, candidate_names, scorer=fuzz.ratio)
417
+ return best_match # (name, score, index)
inference_tab/inference_logic.py CHANGED
@@ -20,6 +20,9 @@ from huggingface_hub import hf_hub_download
20
  from config import OUTPUT_DIR
21
  from pathlib import Path
22
  from PIL import Image
 
 
 
23
 
24
  # Global cache
25
  _trocr_processor = None
@@ -28,17 +31,29 @@ _trocr_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
 
29
 
30
 
31
- def load_trocr_model():
32
- """Load TrOCR into GPU if not cached."""
33
- global _trocr_processor, _trocr_model
34
- if _trocr_model is None:
35
- _trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
36
- _trocr_model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
37
- _trocr_model.to(_trocr_device).eval()
38
- return _trocr_processor, _trocr_model
39
 
40
- def run_inference(image_path, gcp_path, city_name, score_th):
 
 
 
 
 
 
 
 
 
 
 
41
  log = ""
 
 
 
 
 
 
 
 
 
42
  # ==== TEXT DETECTION ====
43
  for msg in getBBoxes(image_path):
44
  log += msg + "\n"
@@ -60,10 +75,13 @@ def run_inference(image_path, gcp_path, city_name, score_th):
60
  yield log, None
61
 
62
  # === ADD GEO DATA ===
63
- MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
64
- for msg in georefImg(MASK_PATH, gcp_path):
65
  log += msg + "\n"
66
  yield log, None
 
 
 
67
  for msg in extractCentroids(image_path):
68
  log += msg + "\n"
69
  yield log, None
@@ -82,6 +100,14 @@ def run_inference(image_path, gcp_path, city_name, score_th):
82
 
83
 
84
 
 
 
 
 
 
 
 
 
85
 
86
  @spaces.GPU
87
  def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
@@ -156,321 +182,6 @@ def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25)
156
  yield f"Inference complete."
157
 
158
 
159
- def box_inside_global(box, global_box):
160
- x1, y1, x2, y2 = box
161
- gx1, gy1, gx2, gy2 = global_box
162
- return (x1 >= gx1 and y1 >= gy1 and x2 <= gx2 and y2 <= gy2)
163
-
164
- def nms_iou(box1, box2):
165
- x1 = max(box1[0], box2[0])
166
- y1 = max(box1[1], box2[1])
167
- x2 = min(box1[2], box2[2])
168
- y2 = min(box1[3], box2[3])
169
-
170
- inter_area = max(0, x2 - x1) * max(0, y2 - y1)
171
- box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
172
- box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
173
- union_area = box1_area + box2_area - inter_area
174
-
175
- return inter_area / union_area if union_area > 0 else 0
176
-
177
- def non_max_suppression(boxes, scores, iou_threshold=0.5):
178
- idxs = np.argsort(scores)[::-1]
179
- keep = []
180
-
181
- while len(idxs) > 0:
182
- current = idxs[0]
183
- keep.append(current)
184
- idxs = idxs[1:]
185
- idxs = np.array([i for i in idxs if nms_iou(boxes[current], boxes[i]) < iou_threshold])
186
-
187
- return keep
188
-
189
-
190
-
191
- def tile_image_with_overlap(image_path, tile_size=1024, overlap=256):
192
- """Tile image into overlapping RGB tiles."""
193
- image = cv2.imread(image_path)
194
- height, width, _ = image.shape
195
-
196
- step = tile_size - overlap
197
- tile_list = []
198
- seen = set() # to avoid duplicates
199
-
200
- for y in range(0, height, step):
201
- if y + tile_size > height:
202
- y = height - tile_size
203
- for x in range(0, width, step):
204
- if x + tile_size > width:
205
- x = width - tile_size
206
-
207
- # clamp to valid region
208
- x_start = max(0, x)
209
- y_start = max(0, y)
210
- x_end = x_start + tile_size
211
- y_end = y_start + tile_size
212
-
213
- coords = (x_start, y_start)
214
- if coords in seen: # skip duplicates
215
- continue
216
- seen.add(coords)
217
-
218
- tile = image[y_start:y_end, x_start:x_end, :]
219
- tile_list.append((tile, coords))
220
-
221
- return tile_list, image.shape
222
-
223
-
224
-
225
- def compute_iou(box1, box2):
226
- """Compute Intersection over Union for two boxes."""
227
- x1 = max(box1[0], box2[0])
228
- y1 = max(box1[1], box2[1])
229
- x2 = min(box1[2], box2[2])
230
- y2 = min(box1[3], box2[3])
231
-
232
- inter_area = max(0, x2 - x1) * max(0, y2 - y1)
233
- area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
234
- area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
235
- union_area = area1 + area2 - inter_area
236
-
237
- return inter_area / union_area if union_area > 0 else 0
238
-
239
-
240
- def merge_boxes(boxes, iou_threshold=0.8):
241
- """Merge overlapping boxes based on IoU."""
242
- merged = []
243
- used = [False] * len(boxes)
244
-
245
- for i, box in enumerate(boxes):
246
- if used[i]:
247
- continue
248
- group = [box]
249
- used[i] = True
250
- for j in range(i + 1, len(boxes)):
251
- if used[j]:
252
- continue
253
- if compute_iou(box, boxes[j]) > iou_threshold:
254
- group.append(boxes[j])
255
- used[j] = True
256
-
257
- # Merge group into one bounding box
258
- x1 = min(b[0] for b in group)
259
- y1 = min(b[1] for b in group)
260
- x2 = max(b[2] for b in group)
261
- y2 = max(b[3] for b in group)
262
- merged.append([x1, y1, x2, y2])
263
-
264
- return merged
265
-
266
-
267
- def box_area(box):
268
- return max(0, box[2] - box[0]) * max(0, box[3] - box[1])
269
-
270
- def is_contained(box1, box2, containment_threshold=0.9):
271
- # Check if box1 is mostly inside box2
272
- x1 = max(box1[0], box2[0])
273
- y1 = max(box1[1], box2[1])
274
- x2 = min(box1[2], box2[2])
275
- y2 = min(box1[3], box2[3])
276
-
277
- inter_area = max(0, x2 - x1) * max(0, y2 - y1)
278
- area1 = box_area(box1)
279
- area2 = box_area(box2)
280
-
281
- # If intersection covers most of smaller box area, consider contained
282
- smaller_area = min(area1, area2)
283
- if smaller_area == 0:
284
- return False
285
- return (inter_area / smaller_area) >= containment_threshold
286
-
287
- def merge_boxes_iterative(boxes, iou_threshold=0.25, containment_threshold=0.75):
288
- boxes = boxes.copy()
289
- changed = True
290
-
291
- while changed:
292
- changed = False
293
- merged = []
294
- used = [False] * len(boxes)
295
-
296
- for i, box in enumerate(boxes):
297
- if used[i]:
298
- continue
299
- group = [box]
300
- used[i] = True
301
- for j in range(i + 1, len(boxes)):
302
- if used[j]:
303
- continue
304
- iou = compute_iou(box, boxes[j])
305
- contained = is_contained(box, boxes[j], containment_threshold)
306
- if iou > iou_threshold or contained:
307
- group.append(boxes[j])
308
- used[j] = True
309
-
310
- # Merge group into one bounding box
311
- x1 = min(b[0] for b in group)
312
- y1 = min(b[1] for b in group)
313
- x2 = max(b[2] for b in group)
314
- y2 = max(b[3] for b in group)
315
- merged.append([x1, y1, x2, y2])
316
-
317
- if len(merged) < len(boxes):
318
- changed = True
319
- boxes = merged
320
-
321
- return boxes
322
-
323
-
324
- def get_corner_points(box):
325
- x1, y1, x2, y2 = box
326
- return [
327
- [x1, y1], # top-left
328
- [x2, y1], # top-right
329
- [x1, y2], # bottom-left
330
- [x2, y2], # bottom-right
331
- ]
332
-
333
-
334
- def sample_negative_points_outside_boxes(mask, num_points):
335
- points = []
336
- tries = 0
337
- max_tries = num_points * 20 # fail-safe to avoid infinite loops
338
- while len(points) < num_points and tries < max_tries:
339
- x = np.random.randint(0, mask.shape[1])
340
- y = np.random.randint(0, mask.shape[0])
341
- if not mask[y, x]:
342
- points.append([x, y])
343
- tries += 1
344
- return np.array(points)
345
-
346
- def get_inset_corner_points(box, margin=5):
347
- x1, y1, x2, y2 = box
348
-
349
- # Ensure box is large enough for the margin
350
- x1i = min(x1 + margin, x2)
351
- y1i = min(y1 + margin, y2)
352
- x2i = max(x2 - margin, x1)
353
- y2i = max(y2 - margin, y1)
354
-
355
- return [
356
- [x1i, y1i], # top-left (inset)
357
- [x2i, y1i], # top-right
358
- [x1i, y2i], # bottom-left
359
- [x2i, y2i], # bottom-right
360
- ]
361
-
362
-
363
- def processYOLOBoxes(iou):
364
- # Load YOLO-predicted boxes
365
- BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
366
- with open(BOXES_PATH, "r") as f:
367
- box_data = json.load(f)
368
-
369
- # Non-max suppression
370
- boxes = np.array([item["bbox"] for item in box_data])
371
- scores = np.array([item["score"] for item in box_data])
372
- # Run NMS
373
- keep_indices = non_max_suppression(boxes, scores, iou)
374
- # Filter data
375
- box_data = [box_data[i] for i in keep_indices]
376
- # Filter boxes inside global bbox (TBD)
377
- #box_data = [entry for entry in box_data if box_inside_global(entry["bbox"], GLOBAL_BOX)]
378
- boxes_full = [b["bbox"] for b in box_data] # Format: [x1, y1, x2, y2]
379
- return boxes_full
380
-
381
- def prepare_tiles(image_path, boxes_full, tile_size=1024, overlap=50, iou=0.5, c_th=0.75, edge_margin=10):
382
- """
383
- Tiles the image and prepares per-tile metadata including filtered boxes and point prompts.
384
- Returns full image size H, W.
385
- """
386
- tiles, (H, W, _) = tile_image_with_overlap(image_path, tile_size, overlap)
387
- os.makedirs("tmp/tiles", exist_ok=True)
388
- meta = []
389
-
390
- for idx, (tile_array, (x_offset, y_offset)) in enumerate(tiles):
391
- tile_path = f"tmp/tiles/tile_{idx}.png"
392
- tile_array = cv2.cvtColor(tile_array, cv2.COLOR_BGR2RGB)
393
- Image.fromarray(tile_array).save(tile_path)
394
-
395
- tile_h, tile_w, _ = tile_array.shape
396
-
397
- # Select boxes overlapping this tile
398
- candidate_boxes = []
399
- for x1, y1, x2, y2 in boxes_full:
400
- if (x2 > x_offset) and (x1 < x_offset + tile_w) and (y2 > y_offset) and (y1 < y_offset + tile_h):
401
- candidate_boxes.append([x1, y1, x2, y2])
402
-
403
- if not candidate_boxes:
404
- meta.append({
405
- "idx": idx,
406
- "x_off": x_offset,
407
- "y_off": y_offset,
408
- "local_boxes": [],
409
- "point_coords": [],
410
- "point_labels": []
411
- })
412
- continue
413
-
414
- # Merge overlapping boxes
415
- merged_boxes = merge_boxes_iterative(candidate_boxes, iou_threshold=iou, containment_threshold=c_th)
416
-
417
- # Adjust boxes to tile-local coordinates
418
- local_boxes = []
419
- for x1, y1, x2, y2 in merged_boxes:
420
- new_x1 = max(0, x1 - x_offset)
421
- new_y1 = max(0, y1 - y_offset)
422
- new_x2 = min(tile_w, x2 - x_offset)
423
- new_y2 = min(tile_h, y2 - y_offset)
424
- local_boxes.append([new_x1, new_y1, new_x2, new_y2])
425
-
426
- # Filter boxes too close to edges
427
- filtered_local_boxes = []
428
- for box in local_boxes:
429
- x1, y1, x2, y2 = box
430
- if (x1 > edge_margin and y1 > edge_margin and (tile_w - x2) > edge_margin and (tile_h - y2) > edge_margin):
431
- filtered_local_boxes.append(box)
432
-
433
- if not filtered_local_boxes:
434
- meta.append({
435
- "idx": idx,
436
- "x_off": x_offset,
437
- "y_off": y_offset,
438
- "local_boxes": [],
439
- "point_coords": [],
440
- "point_labels": []
441
- })
442
- continue
443
-
444
- # Compute point prompts
445
- centroids = [((bx1 + bx2) / 2, (by1 + by2) / 2) for bx1, by1, bx2, by2 in filtered_local_boxes]
446
- negative_points_per_box = [get_inset_corner_points(box, margin=2) for box in filtered_local_boxes]
447
-
448
- point_coords = []
449
- point_labels = []
450
- for centroid, neg_points in zip(centroids, negative_points_per_box):
451
- if not isinstance(neg_points, list):
452
- neg_points = neg_points.tolist()
453
- all_points = [centroid] + neg_points
454
- all_labels = [1] + [0] * len(neg_points)
455
- point_coords.append(all_points)
456
- point_labels.append(all_labels)
457
-
458
- meta.append({
459
- "idx": idx,
460
- "x_off": x_offset,
461
- "y_off": y_offset,
462
- "local_boxes": filtered_local_boxes,
463
- "point_coords": point_coords,
464
- "point_labels": point_labels
465
- })
466
-
467
- # Save metadata
468
- os.makedirs("tmp", exist_ok=True)
469
- with open("tmp/tiles_meta.json", "w") as f:
470
- json.dump(meta, f)
471
-
472
- return H, W
473
-
474
 
475
  @spaces.GPU(duration=180)
476
  def run_tile_inference():
@@ -504,52 +215,6 @@ def run_tile_inference():
504
  np.save(out_path, masks_stack)
505
 
506
 
507
- def merge_tile_masks(H, W):
508
- """
509
- Merge predicted tile masks into a full-size image.
510
-
511
- Args:
512
- H (int): full image height
513
- W (int): full image width
514
-
515
- Returns:
516
- full_mask (np.ndarray): merged mask array
517
- """
518
- full_mask = np.zeros((H, W), dtype=np.uint16)
519
- instance_id = 1
520
-
521
- # Load tile metadata
522
- with open("tmp/tiles_meta.json", "r") as f:
523
- tiles_meta = json.load(f)
524
-
525
- for tile in tiles_meta:
526
- tile_idx = tile["idx"]
527
- x_off = tile["x_off"]
528
- y_off = tile["y_off"]
529
-
530
- mask_path = f"tmp/masks/tile_{tile_idx}.npy"
531
- if not Path(mask_path).exists():
532
- continue
533
-
534
- # Load tile masks (expected shape = (N, h, w))
535
- tile_masks = np.load(mask_path)
536
-
537
- if tile_masks.ndim == 2: # single mask saved as (h, w)
538
- tile_masks = tile_masks[None, :, :] # make it (1, h, w)
539
-
540
- for mask in tile_masks:
541
- mask = mask.astype(bool)
542
- h_end = y_off + mask.shape[0]
543
- w_end = x_off + mask.shape[1]
544
- region = full_mask[y_off:h_end, x_off:w_end]
545
- region[mask & (region == 0)] = instance_id
546
- instance_id += 1
547
-
548
- # Save as TIFF
549
- final_mask = Image.fromarray(full_mask)
550
- MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
551
- final_mask.save(MASK_PATH)
552
-
553
  def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
554
  """
555
  iou for combining bounding boxes
@@ -575,7 +240,7 @@ def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
575
 
576
 
577
 
578
- def extractSegments(image_path, min_size=500, margin=10):
579
 
580
  image = cv2.imread(image_path)
581
  MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
@@ -589,7 +254,7 @@ def extractSegments(image_path, min_size=500, margin=10):
589
 
590
  yield f"Found {len(blob_ids)} blobs"
591
 
592
- for blob_id in blob_ids[:1000]:
593
  yield f"Processing blob {blob_id}..."
594
  # Create a binary mask for the current blob
595
  blob_mask = (mask == blob_id).astype(np.uint8)
@@ -614,18 +279,19 @@ def extractSegments(image_path, min_size=500, margin=10):
614
  cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
615
 
616
  # Apply mask to original image
617
- if image.ndim == 3:
618
- masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
619
- else:
620
- masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
621
-
 
622
  # Save the masked image
623
  BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
624
- cv2.imwrite(BLOB_PATH, masked_image)
625
 
626
  yield f"Done."
627
 
628
- @spaces.GPU(duration=180)
629
  def blobsOCR(image_path):
630
  yield "Load OCR model.."
631
  # Load model + processor
@@ -659,13 +325,7 @@ def blobsOCR(image_path):
659
  yield f"{filename} → {generated_text}"
660
 
661
  except Exception as e:
662
- yield f"Error processing {filename}: {e}"
663
-
664
-
665
- def chunkify(lst, n):
666
- """Yield successive n-sized chunks from lst."""
667
- for i in range(0, len(lst), n):
668
- yield lst[i:i + n]
669
 
670
  @spaces.GPU(duration=180)
671
  def blobsOCR_chunk(image_paths):
@@ -689,7 +349,6 @@ def blobsOCR_chunk(image_paths):
689
 
690
  return results
691
 
692
-
693
  def blobsOCR_all():
694
  image_folder = os.path.join(OUTPUT_DIR, "blobs")
695
  all_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]
@@ -705,7 +364,7 @@ def blobsOCR_all():
705
 
706
 
707
  def extractCentroids(image_path):
708
- GEO_PATH=os.path.join(OUTPUT_DIR,"georeferenced.tif")
709
  with rasterio.open(GEO_PATH) as src:
710
  mask = src.read(1)
711
  transform = src.transform
@@ -749,12 +408,55 @@ def extractCentroids(image_path):
749
 
750
 
751
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752
 
753
- def img_shape(image_path):
754
- img = cv2.imread(image_path)
755
- return img.shape
756
 
757
  def georefImg(image_path, gcp_path):
 
758
  yield "Reading GCP CSV..."
759
  df = pd.read_csv(gcp_path)
760
 
@@ -808,7 +510,35 @@ def georefImg(image_path, gcp_path):
808
 
809
  def extractStreetNet(city_name):
810
  yield f"Extract OSM street network for {city_name}"
811
- G = ox.graph_from_place(city_name, network_type='all')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
  G_proj = ox.project_graph(G)
813
  edges = ox.graph_to_gdfs(G_proj, nodes=False, edges=True, fill_edge_geometry=True)
814
  edges_3857 = edges.to_crs(epsg=3857)
@@ -823,16 +553,6 @@ def extractStreetNet(city_name):
823
  yield "Done."
824
 
825
 
826
- def best_street_match(point, query_name, edges_gdf, max_distance=100):
827
- buffer = point.buffer(max_distance)
828
- nearby_edges = edges_gdf[edges_gdf.intersects(buffer)]
829
-
830
- if nearby_edges.empty:
831
- return None, 0
832
-
833
- candidate_names = nearby_edges['name'].tolist()
834
- best_match = process.extractOne(query_name, candidate_names, scorer=fuzz.ratio)
835
- return best_match # (name, score, index)
836
 
837
  def fuzzyMatch(score_th):
838
  COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
 
20
  from config import OUTPUT_DIR
21
  from pathlib import Path
22
  from PIL import Image
23
+ from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
24
+ from pyproj import Transformer
25
+ import shutil
26
 
27
  # Global cache
28
  _trocr_processor = None
 
31
 
32
 
33
 
 
 
 
 
 
 
 
 
34
 
35
+ def run_inference(tile_dict, gcp_path, city_name, score_th):
36
+ IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
37
+ CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
38
+
39
+ if os.path.exists(IMAGE_FOLDER):
40
+ shutil.rmtree(IMAGE_FOLDER)
41
+ os.makedirs(IMAGE_FOLDER, exist_ok=True)
42
+
43
+ if os.path.exists(CSV_FILE):
44
+ os.remove(CSV_FILE)
45
+
46
+
47
  log = ""
48
+ if tile_dict is None:
49
+ yield "No tile selected", None
50
+ return
51
+
52
+ image_path = tile_dict["tile_path"]
53
+ coords = tile_dict["coords"] # (x_start, y_start, x_end, y_end)
54
+
55
+
56
+
57
  # ==== TEXT DETECTION ====
58
  for msg in getBBoxes(image_path):
59
  log += msg + "\n"
 
75
  yield log, None
76
 
77
  # === ADD GEO DATA ===
78
+
79
+ for msg in georefTile(coords,gcp_path):
80
  log += msg + "\n"
81
  yield log, None
82
+ '''for msg in georefImg(MASK_PATH, gcp_path):
83
+ log += msg + "\n"
84
+ yield log, None'''
85
  for msg in extractCentroids(image_path):
86
  log += msg + "\n"
87
  yield log, None
 
100
 
101
 
102
 
103
+ def load_trocr_model():
104
+ """Load TrOCR into GPU if not cached."""
105
+ global _trocr_processor, _trocr_model
106
+ if _trocr_model is None:
107
+ _trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
108
+ _trocr_model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
109
+ _trocr_model.to(_trocr_device).eval()
110
+ return _trocr_processor, _trocr_model
111
 
112
  @spaces.GPU
113
  def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
 
182
  yield f"Inference complete."
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  @spaces.GPU(duration=180)
187
  def run_tile_inference():
 
215
  np.save(out_path, masks_stack)
216
 
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
219
  """
220
  iou for combining bounding boxes
 
240
 
241
 
242
 
243
+ def extractSegments(image_path, min_size=500, margin=100):
244
 
245
  image = cv2.imread(image_path)
246
  MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
 
254
 
255
  yield f"Found {len(blob_ids)} blobs"
256
 
257
+ for blob_id in blob_ids:
258
  yield f"Processing blob {blob_id}..."
259
  # Create a binary mask for the current blob
260
  blob_mask = (mask == blob_id).astype(np.uint8)
 
279
  cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
280
 
281
  # Apply mask to original image
282
+ shaded = cropped_image.copy()
283
+ overlay = cropped_image.copy()
284
+ overlay[cropped_mask == 1] = (0, 0, 255)
285
+ alpha = 0.5
286
+ shaded = cv2.addWeighted(overlay, alpha, shaded, 1 - alpha, 0)
287
+
288
  # Save the masked image
289
  BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
290
+ cv2.imwrite(BLOB_PATH, shaded)
291
 
292
  yield f"Done."
293
 
294
+ '''@spaces.GPU(duration=180)
295
  def blobsOCR(image_path):
296
  yield "Load OCR model.."
297
  # Load model + processor
 
325
  yield f"{filename} → {generated_text}"
326
 
327
  except Exception as e:
328
+ yield f"Error processing {filename}: {e}"'''
 
 
 
 
 
 
329
 
330
  @spaces.GPU(duration=180)
331
  def blobsOCR_chunk(image_paths):
 
349
 
350
  return results
351
 
 
352
  def blobsOCR_all():
353
  image_folder = os.path.join(OUTPUT_DIR, "blobs")
354
  all_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]
 
364
 
365
 
366
  def extractCentroids(image_path):
367
+ GEO_PATH=os.path.join(OUTPUT_DIR,"mask_georef.tif")
368
  with rasterio.open(GEO_PATH) as src:
369
  mask = src.read(1)
370
  transform = src.transform
 
408
 
409
 
410
 
411
+ def georefTile(tile_coords, gcp_path):
412
+ yield "Georeferencing SAM image.."
413
+
414
+
415
+ MASK_TILE=os.path.join(OUTPUT_DIR,"mask.tif")
416
+ TMP_TILE=os.path.join(OUTPUT_DIR,"mask_tmp.tif")
417
+ MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
418
+
419
+ df = pd.read_csv(gcp_path)
420
+
421
+ xmin, ymin, xmax, ymax = tile_coords
422
+ xoff, yoff = xmin, ymin
423
+ xsize, ysize = xmax - xmin, ymax - ymin
424
+
425
+ shifted_gcps = []
426
+ for _, r in df.iterrows():
427
+ shifted_gcps.append(
428
+ gdal.GCP(
429
+ float(r['mapX']),
430
+ float(r['mapY']),
431
+ 0,
432
+ float(r['sourceX']) - xoff,
433
+ abs(float(r['sourceY'])) - yoff
434
+ )
435
+ )
436
+
437
+ gdal.Translate(
438
+ TMP_TILE,
439
+ MASK_TILE,
440
+ format="GTiff",
441
+ GCPs=shifted_gcps,
442
+ outputSRS="EPSG:3857"
443
+ )
444
+
445
+ gdal.Warp(
446
+ MASK_TILE_GEO,
447
+ TMP_TILE,
448
+ dstSRS="EPSG:3857",
449
+ resampleAlg="near",
450
+ polynomialOrder=1,
451
+ creationOptions=["COMPRESS=LZW"]
452
+ )
453
+
454
+ yield "Done."
455
+
456
 
 
 
 
457
 
458
  def georefImg(image_path, gcp_path):
459
+
460
  yield "Reading GCP CSV..."
461
  df = pd.read_csv(gcp_path)
462
 
 
510
 
511
  def extractStreetNet(city_name):
512
  yield f"Extract OSM street network for {city_name}"
513
+
514
+ MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
515
+
516
+ ds = gdal.Open(MASK_TILE_GEO)
517
+ gt = ds.GetGeoTransform()
518
+ width = ds.RasterXSize
519
+ height = ds.RasterYSize
520
+
521
+ minx = gt[0]
522
+ maxy = gt[3]
523
+ maxx = gt[0] + width * gt[1] + height * gt[2]
524
+ miny = gt[3] + width * gt[4] + height * gt[5]
525
+
526
+ # Add 100 meters buffer in all directions
527
+ minx -= 100 # west
528
+ maxx += 100 # east
529
+ miny -= 100 # south
530
+ maxy += 100 # north
531
+
532
+ bbox = (maxy, miny, maxx, minx)
533
+
534
+
535
+ transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
536
+ north, south = transformer.transform(bbox[2], bbox[0])[1], transformer.transform(bbox[3], bbox[1])[1]
537
+ east, west = transformer.transform(bbox[2], bbox[0])[0], transformer.transform(bbox[3], bbox[1])[0]
538
+
539
+ bbox = (west, south, east, north)
540
+
541
+ G = ox.graph_from_bbox(bbox,network_type='all')
542
  G_proj = ox.project_graph(G)
543
  edges = ox.graph_to_gdfs(G_proj, nodes=False, edges=True, fill_edge_geometry=True)
544
  edges_3857 = edges.to_crs(epsg=3857)
 
553
  yield "Done."
554
 
555
 
 
 
 
 
 
 
 
 
 
 
556
 
557
  def fuzzyMatch(score_th):
558
  COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
inference_tab/inference_setup.py CHANGED
@@ -1,22 +1,130 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def get_inference_widgets(run_inference):
4
- image_input = gr.File(label="Select Image File")
5
- gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
6
- city_name = gr.Textbox(label="Enter city name")
7
- score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
8
- run_button = gr.Button("Run Inference")
9
- output = gr.Textbox(label="Progress", lines=10, interactive=False)
10
- download_file = gr.File(label="Download CSV")
11
 
12
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  run_button.click(
14
- run_inference,
15
- inputs=[image_input, gcp_input, city_name, score_th],
16
  outputs=[output, download_file]
17
  )
18
 
19
-
 
 
 
 
20
 
21
 
22
  return image_input, gcp_input, city_name, score_th, run_button, output, download_file
 
1
  import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ import os
6
 
7
+ TILE_SIZE = 1024
8
+ TILE_FOLDER = "tiles"
9
+ os.makedirs(TILE_FOLDER, exist_ok=True)
10
+ tiles_cache = {"tiles": [], "selected_tile": None}
 
 
 
 
11
 
12
+
13
+ def make_tiles(image, tile_size=TILE_SIZE):
14
+ h, w, _ = image.shape
15
+ annotated = image.copy()
16
+ tiles = []
17
+ tile_id = 0
18
+
19
+ for y in range(0, h, tile_size):
20
+ for x in range(0, w, tile_size):
21
+ tile = image[y:y+tile_size, x:x+tile_size]
22
+ tiles.append(((x, y, x+tile_size, y+tile_size), tile))
23
+ cv2.rectangle(annotated, (x, y), (x+tile_size, y+tile_size), (255,0,0), 2)
24
+ cv2.putText(annotated, str(tile_id), (x+50, y+50),
25
+ cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 5)
26
+ tile_id += 1
27
+ return annotated, tiles
28
+
29
+ def create_tiles(image_file):
30
+ img = Image.open(image_file.name).convert("RGB")
31
+ img = np.array(img)
32
+
33
+ annotated, tiles = make_tiles(img, TILE_SIZE)
34
+ tiles_cache["tiles"] = []
35
+
36
+ for idx, (coords, tile) in enumerate(tiles):
37
+ tile_path = os.path.join(TILE_FOLDER, f"tile_{idx}.png")
38
+ Image.fromarray(tile).save(tile_path)
39
+ tiles_cache["tiles"].append((coords, tile_path)) # store path instead of array
40
+
41
+ tiles_cache["selected_tile"] = None
42
+ return annotated, gr.update(interactive=False)
43
+
44
+ def select_tile(evt: gr.SelectData,state):
45
+ # compute tile index
46
+ if not tiles_cache["tiles"]:
47
+ return None, gr.update(interactive=False), state
48
+
49
+ num_tiles_x = (tiles_cache["tiles"][-1][0][2]) // TILE_SIZE
50
+ tile_id = (evt.index[1] // TILE_SIZE) * num_tiles_x + (evt.index[0] // TILE_SIZE)
51
+
52
+ if 0 <= tile_id < len(tiles_cache["tiles"]):
53
+ coords, tile_path = tiles_cache["tiles"][tile_id]
54
+
55
+ # store the path, not the array
56
+ tiles_cache["selected_tile"] = {
57
+ "tile_path": tile_path,
58
+ "coords": coords
59
+ }
60
+
61
+ updated_state = {
62
+ "tile_path": tile_path,
63
+ "coords": coords
64
+ }
65
+
66
+ # load tile only for display
67
+ tile_array = np.array(Image.open(tile_path))
68
+ cv2.putText(tile_array, str(tile_id), (100, 100),
69
+ cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 4, cv2.LINE_AA)
70
+
71
+ return tile_array, gr.update(interactive=True),updated_state
72
+
73
+ return None, gr.update(interactive=False), state
74
+
75
+
76
+
77
+
78
+
79
+ def get_inference_widgets(run_inference,georefImg):
80
+ with gr.Row():
81
+ # Left column
82
+ with gr.Column(scale=1,min_width=500):
83
+ annotated_out = gr.Image(
84
+ type="numpy", label="City Map",
85
+ height=500, width=500
86
+ )
87
+ city_name = gr.Textbox(label="Enter city name")
88
+ image_input = gr.File(label="Select Image File")
89
+ gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
90
+ create_btn = gr.Button("Create Tiles")
91
+ georef_btn = gr.Button("Georeference Full Map")
92
+
93
+
94
+ # Right column
95
+ with gr.Column(scale=1):
96
+ selected_tile = gr.Image(
97
+ type="numpy", label="Selected Tile",
98
+ height=500, width=500
99
+ )
100
+ score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
101
+ run_button = gr.Button("Run Inference", interactive=False)
102
+ output = gr.Textbox(label="Progress", lines=5, interactive=False)
103
+ download_file = gr.File(label="Download CSV")
104
+
105
+ selected_tile_path = gr.State()
106
+
107
+
108
+ # Wire events
109
+ create_btn.click(
110
+ fn=create_tiles, inputs=image_input,
111
+ outputs=[annotated_out, run_button]
112
+ )
113
+ annotated_out.select(
114
+ fn=select_tile, inputs=[selected_tile_path],
115
+ outputs=[selected_tile, run_button, selected_tile_path]
116
+ )
117
  run_button.click(
118
+ fn=run_inference,
119
+ inputs=[selected_tile_path, gcp_input, city_name, score_th],
120
  outputs=[output, download_file]
121
  )
122
 
123
+ georef_btn.click(
124
+ fn=georefImg,
125
+ inputs=[image_input, gcp_input],
126
+ outputs=[output]
127
+ )
128
 
129
 
130
  return image_input, gcp_input, city_name, score_th, run_button, output, download_file
map_tab/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .map_setup import get_map_widgets
2
+
3
+ __all__ = ["get_map_widgets"]
map_tab/map_setup.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import folium
3
+
4
+ def get_map_widgets():
5
+ m = folium.Map(location=[48.8566, 2.3522], zoom_start=12)
6
+ map_html = m._repr_html_()
7
+ with gr.Column():
8
+ gr.HTML(value=map_html, elem_id="map-widget")
requirements.txt CHANGED
@@ -14,5 +14,5 @@ torch==2.7.1
14
  transformers==4.53.2
15
  ultralytics==8.3.94
16
  huggingface_hub[hf_xet]
17
- gradio>=3.39
18
  GDAL==3.6.2
 
 
14
  transformers==4.53.2
15
  ultralytics==8.3.94
16
  huggingface_hub[hf_xet]
 
17
  GDAL==3.6.2
18
+ folium==0.18.0