Spaces:

muk42
/

histOSM

Running on Zero

App Files Files Community

muk42 commited on Aug 24, 2025

Commit

16a059c

1 Parent(s): 2276a97

new ver

Browse files

Files changed (9) hide show

annotation_tab/annotation_setup.py +3 -0
app.py +8 -6
inference_tab/__init__.py +3 -2
inference_tab/helpers.py +417 -0
inference_tab/inference_logic.py +124 -404
inference_tab/inference_setup.py +120 -12
map_tab/__init__.py +3 -0
map_tab/map_setup.py +8 -0
requirements.txt +1 -1

annotation_tab/annotation_setup.py CHANGED Viewed

@@ -4,6 +4,9 @@ from .annotation_logic import (
     get_current_image_path, get_annotation_for_image, get_current_annotations_path,refresh_image_list
 )
 def get_annotation_widgets():
     message = gr.Markdown("", visible=False)
     image_path_display = gr.Markdown(value=get_current_image_path() or "No image loaded", elem_id="image_path")

     get_current_image_path, get_annotation_for_image, get_current_annotations_path,refresh_image_list
 )
 def get_annotation_widgets():
     message = gr.Markdown("", visible=False)
     image_path_display = gr.Markdown(value=get_current_image_path() or "No image loaded", elem_id="image_path")

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 # [DEBUG]
-#from osgeo import gdal
 import gradio as gr
 import logging
-from inference_tab import get_inference_widgets, run_inference
 from annotation_tab import get_annotation_widgets
 # setup logging
 logging.basicConfig(level=logging.DEBUG)
@@ -13,15 +13,17 @@ logging.basicConfig(level=logging.DEBUG)
 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
-        get_inference_widgets(run_inference)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
 # [DEBUG]
-#demo.launch(inbrowser=True)
 # [PROD]
-demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

 # [DEBUG]
+from osgeo import gdal
 import gradio as gr
 import logging
+from inference_tab import get_inference_widgets, run_inference,georefImg
 from annotation_tab import get_annotation_widgets
+from map_tab import get_map_widgets
 # setup logging
 logging.basicConfig(level=logging.DEBUG)
 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
+        get_inference_widgets(run_inference,georefImg)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
+    with gr.Tab("Map"):
+        get_map_widgets()
 # [DEBUG]
+demo.launch(inbrowser=True)
 # [PROD]
+#demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

inference_tab/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from .inference_setup import get_inference_widgets
-from .inference_logic import run_inference
-__all__ = ["get_inference_widgets", "run_inference"]

 from .inference_setup import get_inference_widgets
+from .inference_logic import run_inference,georefImg
+__all__ = ["get_inference_widgets", "run_inference","georefImg"]

inference_tab/helpers.py ADDED Viewed

	@@ -0,0 +1,417 @@

+from ultralytics import SAM
+import cv2
+from shapely.geometry import shape
+from rapidfuzz import process, fuzz
+from huggingface_hub import hf_hub_download
+from config import OUTPUT_DIR
+from pathlib import Path
+from PIL import Image
+import spaces
+import numpy as np
+import os
+import json
+from PIL import Image
+def box_inside_global(box, global_box):
+    x1, y1, x2, y2 = box
+    gx1, gy1, gx2, gy2 = global_box
+    return (x1 >= gx1 and y1 >= gy1 and x2 <= gx2 and y2 <= gy2)
+def nms_iou(box1, box2):
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = box1_area + box2_area - inter_area
+    return inter_area / union_area if union_area > 0 else 0
+def non_max_suppression(boxes, scores, iou_threshold=0.5):
+    idxs = np.argsort(scores)[::-1]
+    keep = []
+    while len(idxs) > 0:
+        current = idxs[0]
+        keep.append(current)
+        idxs = idxs[1:]
+        idxs = np.array([i for i in idxs if nms_iou(boxes[current], boxes[i]) < iou_threshold])
+    return keep
+def tile_image_with_overlap(image_path, tile_size=1024, overlap=256):
+    """Tile image into overlapping RGB tiles."""
+    image = cv2.imread(image_path)
+    height, width, _ = image.shape
+    step = tile_size - overlap
+    tile_list = []
+    seen = set()  # to avoid duplicates
+    for y in range(0, height, step):
+        if y + tile_size > height:
+            y = height - tile_size
+        for x in range(0, width, step):
+            if x + tile_size > width:
+                x = width - tile_size
+            # clamp to valid region
+            x_start = max(0, x)
+            y_start = max(0, y)
+            x_end = x_start + tile_size
+            y_end = y_start + tile_size
+            coords = (x_start, y_start)
+            if coords in seen:  # skip duplicates
+                continue
+            seen.add(coords)
+            tile = image[y_start:y_end, x_start:x_end, :]
+            tile_list.append((tile, coords))
+    return tile_list, image.shape
+def compute_iou(box1, box2):
+    """Compute Intersection over Union for two boxes."""
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = area1 + area2 - inter_area
+    return inter_area / union_area if union_area > 0 else 0
+def merge_boxes(boxes, iou_threshold=0.8):
+    """Merge overlapping boxes based on IoU."""
+    merged = []
+    used = [False] * len(boxes)
+    for i, box in enumerate(boxes):
+        if used[i]:
+            continue
+        group = [box]
+        used[i] = True
+        for j in range(i + 1, len(boxes)):
+            if used[j]:
+                continue
+            if compute_iou(box, boxes[j]) > iou_threshold:
+                group.append(boxes[j])
+                used[j] = True
+        # Merge group into one bounding box
+        x1 = min(b[0] for b in group)
+        y1 = min(b[1] for b in group)
+        x2 = max(b[2] for b in group)
+        y2 = max(b[3] for b in group)
+        merged.append([x1, y1, x2, y2])
+    return merged
+def box_area(box):
+    return max(0, box[2] - box[0]) * max(0, box[3] - box[1])
+def is_contained(box1, box2, containment_threshold=0.9):
+    # Check if box1 is mostly inside box2
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = box_area(box1)
+    area2 = box_area(box2)
+    # If intersection covers most of smaller box area, consider contained
+    smaller_area = min(area1, area2)
+    if smaller_area == 0:
+        return False
+    return (inter_area / smaller_area) >= containment_threshold
+def merge_boxes_iterative(boxes, iou_threshold=0.25, containment_threshold=0.75):
+    boxes = boxes.copy()
+    changed = True
+    while changed:
+        changed = False
+        merged = []
+        used = [False] * len(boxes)
+        for i, box in enumerate(boxes):
+            if used[i]:
+                continue
+            group = [box]
+            used[i] = True
+            for j in range(i + 1, len(boxes)):
+                if used[j]:
+                    continue
+                iou = compute_iou(box, boxes[j])
+                contained = is_contained(box, boxes[j], containment_threshold)
+                if iou > iou_threshold or contained:
+                    group.append(boxes[j])
+                    used[j] = True
+            # Merge group into one bounding box
+            x1 = min(b[0] for b in group)
+            y1 = min(b[1] for b in group)
+            x2 = max(b[2] for b in group)
+            y2 = max(b[3] for b in group)
+            merged.append([x1, y1, x2, y2])
+        if len(merged) < len(boxes):
+            changed = True
+            boxes = merged
+    return boxes
+def get_corner_points(box):
+    x1, y1, x2, y2 = box
+    return [
+        [x1, y1],  # top-left
+        [x2, y1],  # top-right
+        [x1, y2],  # bottom-left
+        [x2, y2],  # bottom-right
+    ]
+def sample_negative_points_outside_boxes(mask, num_points):
+    points = []
+    tries = 0
+    max_tries = num_points * 20  # fail-safe to avoid infinite loops
+    while len(points) < num_points and tries < max_tries:
+        x = np.random.randint(0, mask.shape[1])
+        y = np.random.randint(0, mask.shape[0])
+        if not mask[y, x]:
+            points.append([x, y])
+        tries += 1
+    return np.array(points)
+def get_inset_corner_points(box, margin=5):
+    x1, y1, x2, y2 = box
+    # Ensure box is large enough for the margin
+    x1i = min(x1 + margin, x2)
+    y1i = min(y1 + margin, y2)
+    x2i = max(x2 - margin, x1)
+    y2i = max(y2 - margin, y1)
+    return [
+        [x1i, y1i],  # top-left (inset)
+        [x2i, y1i],  # top-right
+        [x1i, y2i],  # bottom-left
+        [x2i, y2i],  # bottom-right
+    ]
+def processYOLOBoxes(iou):
+    # Load YOLO-predicted boxes
+    BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
+    with open(BOXES_PATH, "r") as f:
+        box_data = json.load(f)
+    # Non-max suppression
+    boxes = np.array([item["bbox"] for item in box_data])
+    scores = np.array([item["score"] for item in box_data])
+    # Run NMS
+    keep_indices = non_max_suppression(boxes, scores, iou)
+    # Filter data
+    box_data = [box_data[i] for i in keep_indices]
+    # Filter boxes inside global bbox (TBD)
+    #box_data = [entry for entry in box_data if box_inside_global(entry["bbox"], GLOBAL_BOX)]
+    boxes_full = [b["bbox"] for b in box_data]  # Format: [x1, y1, x2, y2]
+    return boxes_full
+def prepare_tiles(image_path, boxes_full, tile_size=1024, overlap=50, iou=0.5, c_th=0.75, edge_margin=10):
+    """
+    Tiles the image and prepares per-tile metadata including filtered boxes and point prompts.
+    Returns full image size H, W.
+    """
+    tiles, (H, W, _) = tile_image_with_overlap(image_path, tile_size, overlap)
+    os.makedirs("tmp/tiles", exist_ok=True)
+    meta = []
+    for idx, (tile_array, (x_offset, y_offset)) in enumerate(tiles):
+        tile_path = f"tmp/tiles/tile_{idx}.png"
+        tile_array = cv2.cvtColor(tile_array, cv2.COLOR_BGR2RGB)
+        Image.fromarray(tile_array).save(tile_path)
+        tile_h, tile_w, _ = tile_array.shape
+        # Select boxes overlapping this tile
+        candidate_boxes = []
+        for x1, y1, x2, y2 in boxes_full:
+            if (x2 > x_offset) and (x1 < x_offset + tile_w) and (y2 > y_offset) and (y1 < y_offset + tile_h):
+                candidate_boxes.append([x1, y1, x2, y2])
+        if not candidate_boxes:
+            meta.append({
+                "idx": idx,
+                "x_off": x_offset,
+                "y_off": y_offset,
+                "local_boxes": [],
+                "point_coords": [],
+                "point_labels": []
+            })
+            continue
+        # Merge overlapping boxes
+        merged_boxes = merge_boxes_iterative(candidate_boxes, iou_threshold=iou, containment_threshold=c_th)
+        # Adjust boxes to tile-local coordinates
+        local_boxes = []
+        for x1, y1, x2, y2 in merged_boxes:
+            new_x1 = max(0, x1 - x_offset)
+            new_y1 = max(0, y1 - y_offset)
+            new_x2 = min(tile_w, x2 - x_offset)
+            new_y2 = min(tile_h, y2 - y_offset)
+            local_boxes.append([new_x1, new_y1, new_x2, new_y2])
+        # Filter boxes too close to edges
+        filtered_local_boxes = []
+        for box in local_boxes:
+            x1, y1, x2, y2 = box
+            if (x1 > edge_margin and y1 > edge_margin and (tile_w - x2) > edge_margin and (tile_h - y2) > edge_margin):
+                filtered_local_boxes.append(box)
+        if not filtered_local_boxes:
+            meta.append({
+                "idx": idx,
+                "x_off": x_offset,
+                "y_off": y_offset,
+                "local_boxes": [],
+                "point_coords": [],
+                "point_labels": []
+            })
+            continue
+        # Compute point prompts
+        centroids = [((bx1 + bx2) / 2, (by1 + by2) / 2) for bx1, by1, bx2, by2 in filtered_local_boxes]
+        negative_points_per_box = [get_inset_corner_points(box, margin=2) for box in filtered_local_boxes]
+        point_coords = []
+        point_labels = []
+        for centroid, neg_points in zip(centroids, negative_points_per_box):
+            if not isinstance(neg_points, list):
+                neg_points = neg_points.tolist()
+            all_points = [centroid] + neg_points
+            all_labels = [1] + [0] * len(neg_points)
+            point_coords.append(all_points)
+            point_labels.append(all_labels)
+        meta.append({
+            "idx": idx,
+            "x_off": x_offset,
+            "y_off": y_offset,
+            "local_boxes": filtered_local_boxes,
+            "point_coords": point_coords,
+            "point_labels": point_labels
+        })
+    # Save metadata
+    os.makedirs("tmp", exist_ok=True)
+    with open("tmp/tiles_meta.json", "w") as f:
+        json.dump(meta, f)
+    return H, W
+def merge_tile_masks(H, W):
+    """
+    Merge predicted tile masks into a full-size image.
+    Args:
+        H (int): full image height
+        W (int): full image width
+    Returns:
+        full_mask (np.ndarray): merged mask array
+    """
+    full_mask = np.zeros((H, W), dtype=np.uint16)
+    instance_id = 1
+    # Load tile metadata
+    with open("tmp/tiles_meta.json", "r") as f:
+        tiles_meta = json.load(f)
+    for tile in tiles_meta:
+        tile_idx = tile["idx"]
+        x_off = tile["x_off"]
+        y_off = tile["y_off"]
+        mask_path = f"tmp/masks/tile_{tile_idx}.npy"
+        if not Path(mask_path).exists():
+            continue
+        # Load tile masks (expected shape = (N, h, w))
+        tile_masks = np.load(mask_path)
+        if tile_masks.ndim == 2:  # single mask saved as (h, w)
+            tile_masks = tile_masks[None, :, :]  # make it (1, h, w)
+        for mask in tile_masks:
+            mask = mask.astype(bool)
+             # Pad mask to 1024x1024
+            pad_h = 1024 - mask.shape[0]
+            pad_w = 1024 - mask.shape[1]
+            if pad_h > 0 or pad_w > 0:
+                mask = np.pad(mask, ((0, pad_h), (0, pad_w)), mode='constant', constant_values=0)
+            h_end = y_off + mask.shape[0]
+            w_end = x_off + mask.shape[1]
+            region = full_mask[y_off:h_end, x_off:w_end]
+            region[mask & (region == 0)] = instance_id
+            instance_id += 1
+    # Save as TIFF
+    final_mask = Image.fromarray(full_mask)
+    MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
+    final_mask.save(MASK_PATH)
+def chunkify(lst, n):
+    """Yield successive n-sized chunks from lst."""
+    for i in range(0, len(lst), n):
+        yield lst[i:i + n]
+def img_shape(image_path):
+    img = cv2.imread(image_path)
+    return img.shape
+def best_street_match(point, query_name, edges_gdf, max_distance=100):
+        buffer = point.buffer(max_distance)
+        nearby_edges = edges_gdf[edges_gdf.intersects(buffer)]
+        if nearby_edges.empty:
+            return None, 0
+        candidate_names = nearby_edges['name'].tolist()
+        best_match = process.extractOne(query_name, candidate_names, scorer=fuzz.ratio)
+        return best_match  # (name, score, index)

inference_tab/inference_logic.py CHANGED Viewed

@@ -20,6 +20,9 @@ from huggingface_hub import hf_hub_download
 from config import OUTPUT_DIR
 from pathlib import Path
 from PIL import Image
 # Global cache
 _trocr_processor = None
@@ -28,17 +31,29 @@ _trocr_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def load_trocr_model():
-    """Load TrOCR into GPU if not cached."""
-    global _trocr_processor, _trocr_model
-    if _trocr_model is None:
-        _trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
-        _trocr_model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
-        _trocr_model.to(_trocr_device).eval()
-    return _trocr_processor, _trocr_model
-def run_inference(image_path, gcp_path, city_name, score_th):
     log = ""
     # ==== TEXT DETECTION ====
     for msg in getBBoxes(image_path):
         log += msg + "\n"
@@ -60,10 +75,13 @@ def run_inference(image_path, gcp_path, city_name, score_th):
         yield log, None
     # === ADD GEO DATA ===
-    MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
-    for msg in georefImg(MASK_PATH, gcp_path):
         log += msg + "\n"
         yield log, None
     for msg in extractCentroids(image_path):
         log += msg + "\n"
         yield log, None
@@ -82,6 +100,14 @@ def run_inference(image_path, gcp_path, city_name, score_th):
 @spaces.GPU
 def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
@@ -156,321 +182,6 @@ def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25)
     yield f"Inference complete."
-def box_inside_global(box, global_box):
-    x1, y1, x2, y2 = box
-    gx1, gy1, gx2, gy2 = global_box
-    return (x1 >= gx1 and y1 >= gy1 and x2 <= gx2 and y2 <= gy2)
-def nms_iou(box1, box2):
-    x1 = max(box1[0], box2[0])
-    y1 = max(box1[1], box2[1])
-    x2 = min(box1[2], box2[2])
-    y2 = min(box1[3], box2[3])
-    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
-    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
-    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
-    union_area = box1_area + box2_area - inter_area
-    return inter_area / union_area if union_area > 0 else 0
-def non_max_suppression(boxes, scores, iou_threshold=0.5):
-    idxs = np.argsort(scores)[::-1]
-    keep = []
-    while len(idxs) > 0:
-        current = idxs[0]
-        keep.append(current)
-        idxs = idxs[1:]
-        idxs = np.array([i for i in idxs if nms_iou(boxes[current], boxes[i]) < iou_threshold])
-    return keep
-def tile_image_with_overlap(image_path, tile_size=1024, overlap=256):
-    """Tile image into overlapping RGB tiles."""
-    image = cv2.imread(image_path)
-    height, width, _ = image.shape
-    step = tile_size - overlap
-    tile_list = []
-    seen = set()  # to avoid duplicates
-    for y in range(0, height, step):
-        if y + tile_size > height:
-            y = height - tile_size
-        for x in range(0, width, step):
-            if x + tile_size > width:
-                x = width - tile_size
-            # clamp to valid region
-            x_start = max(0, x)
-            y_start = max(0, y)
-            x_end = x_start + tile_size
-            y_end = y_start + tile_size
-            coords = (x_start, y_start)
-            if coords in seen:  # skip duplicates
-                continue
-            seen.add(coords)
-            tile = image[y_start:y_end, x_start:x_end, :]
-            tile_list.append((tile, coords))
-    return tile_list, image.shape
-def compute_iou(box1, box2):
-    """Compute Intersection over Union for two boxes."""
-    x1 = max(box1[0], box2[0])
-    y1 = max(box1[1], box2[1])
-    x2 = min(box1[2], box2[2])
-    y2 = min(box1[3], box2[3])
-    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
-    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
-    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
-    union_area = area1 + area2 - inter_area
-    return inter_area / union_area if union_area > 0 else 0
-def merge_boxes(boxes, iou_threshold=0.8):
-    """Merge overlapping boxes based on IoU."""
-    merged = []
-    used = [False] * len(boxes)
-    for i, box in enumerate(boxes):
-        if used[i]:
-            continue
-        group = [box]
-        used[i] = True
-        for j in range(i + 1, len(boxes)):
-            if used[j]:
-                continue
-            if compute_iou(box, boxes[j]) > iou_threshold:
-                group.append(boxes[j])
-                used[j] = True
-        # Merge group into one bounding box
-        x1 = min(b[0] for b in group)
-        y1 = min(b[1] for b in group)
-        x2 = max(b[2] for b in group)
-        y2 = max(b[3] for b in group)
-        merged.append([x1, y1, x2, y2])
-    return merged
-def box_area(box):
-    return max(0, box[2] - box[0]) * max(0, box[3] - box[1])
-def is_contained(box1, box2, containment_threshold=0.9):
-    # Check if box1 is mostly inside box2
-    x1 = max(box1[0], box2[0])
-    y1 = max(box1[1], box2[1])
-    x2 = min(box1[2], box2[2])
-    y2 = min(box1[3], box2[3])
-    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
-    area1 = box_area(box1)
-    area2 = box_area(box2)
-    # If intersection covers most of smaller box area, consider contained
-    smaller_area = min(area1, area2)
-    if smaller_area == 0:
-        return False
-    return (inter_area / smaller_area) >= containment_threshold
-def merge_boxes_iterative(boxes, iou_threshold=0.25, containment_threshold=0.75):
-    boxes = boxes.copy()
-    changed = True
-    while changed:
-        changed = False
-        merged = []
-        used = [False] * len(boxes)
-        for i, box in enumerate(boxes):
-            if used[i]:
-                continue
-            group = [box]
-            used[i] = True
-            for j in range(i + 1, len(boxes)):
-                if used[j]:
-                    continue
-                iou = compute_iou(box, boxes[j])
-                contained = is_contained(box, boxes[j], containment_threshold)
-                if iou > iou_threshold or contained:
-                    group.append(boxes[j])
-                    used[j] = True
-            # Merge group into one bounding box
-            x1 = min(b[0] for b in group)
-            y1 = min(b[1] for b in group)
-            x2 = max(b[2] for b in group)
-            y2 = max(b[3] for b in group)
-            merged.append([x1, y1, x2, y2])
-        if len(merged) < len(boxes):
-            changed = True
-            boxes = merged
-    return boxes
-def get_corner_points(box):
-    x1, y1, x2, y2 = box
-    return [
-        [x1, y1],  # top-left
-        [x2, y1],  # top-right
-        [x1, y2],  # bottom-left
-        [x2, y2],  # bottom-right
-    ]
-def sample_negative_points_outside_boxes(mask, num_points):
-    points = []
-    tries = 0
-    max_tries = num_points * 20  # fail-safe to avoid infinite loops
-    while len(points) < num_points and tries < max_tries:
-        x = np.random.randint(0, mask.shape[1])
-        y = np.random.randint(0, mask.shape[0])
-        if not mask[y, x]:
-            points.append([x, y])
-        tries += 1
-    return np.array(points)
-def get_inset_corner_points(box, margin=5):
-    x1, y1, x2, y2 = box
-    # Ensure box is large enough for the margin
-    x1i = min(x1 + margin, x2)
-    y1i = min(y1 + margin, y2)
-    x2i = max(x2 - margin, x1)
-    y2i = max(y2 - margin, y1)
-    return [
-        [x1i, y1i],  # top-left (inset)
-        [x2i, y1i],  # top-right
-        [x1i, y2i],  # bottom-left
-        [x2i, y2i],  # bottom-right
-    ]
-def processYOLOBoxes(iou):
-    # Load YOLO-predicted boxes
-    BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
-    with open(BOXES_PATH, "r") as f:
-        box_data = json.load(f)
-    # Non-max suppression
-    boxes = np.array([item["bbox"] for item in box_data])
-    scores = np.array([item["score"] for item in box_data])
-    # Run NMS
-    keep_indices = non_max_suppression(boxes, scores, iou)
-    # Filter data
-    box_data = [box_data[i] for i in keep_indices]
-    # Filter boxes inside global bbox (TBD)
-    #box_data = [entry for entry in box_data if box_inside_global(entry["bbox"], GLOBAL_BOX)]
-    boxes_full = [b["bbox"] for b in box_data]  # Format: [x1, y1, x2, y2]
-    return boxes_full
-def prepare_tiles(image_path, boxes_full, tile_size=1024, overlap=50, iou=0.5, c_th=0.75, edge_margin=10):
-    """
-    Tiles the image and prepares per-tile metadata including filtered boxes and point prompts.
-    Returns full image size H, W.
-    """
-    tiles, (H, W, _) = tile_image_with_overlap(image_path, tile_size, overlap)
-    os.makedirs("tmp/tiles", exist_ok=True)
-    meta = []
-    for idx, (tile_array, (x_offset, y_offset)) in enumerate(tiles):
-        tile_path = f"tmp/tiles/tile_{idx}.png"
-        tile_array = cv2.cvtColor(tile_array, cv2.COLOR_BGR2RGB)
-        Image.fromarray(tile_array).save(tile_path)
-        tile_h, tile_w, _ = tile_array.shape
-        # Select boxes overlapping this tile
-        candidate_boxes = []
-        for x1, y1, x2, y2 in boxes_full:
-            if (x2 > x_offset) and (x1 < x_offset + tile_w) and (y2 > y_offset) and (y1 < y_offset + tile_h):
-                candidate_boxes.append([x1, y1, x2, y2])
-        if not candidate_boxes:
-            meta.append({
-                "idx": idx,
-                "x_off": x_offset,
-                "y_off": y_offset,
-                "local_boxes": [],
-                "point_coords": [],
-                "point_labels": []
-            })
-            continue
-        # Merge overlapping boxes
-        merged_boxes = merge_boxes_iterative(candidate_boxes, iou_threshold=iou, containment_threshold=c_th)
-        # Adjust boxes to tile-local coordinates
-        local_boxes = []
-        for x1, y1, x2, y2 in merged_boxes:
-            new_x1 = max(0, x1 - x_offset)
-            new_y1 = max(0, y1 - y_offset)
-            new_x2 = min(tile_w, x2 - x_offset)
-            new_y2 = min(tile_h, y2 - y_offset)
-            local_boxes.append([new_x1, new_y1, new_x2, new_y2])
-        # Filter boxes too close to edges
-        filtered_local_boxes = []
-        for box in local_boxes:
-            x1, y1, x2, y2 = box
-            if (x1 > edge_margin and y1 > edge_margin and (tile_w - x2) > edge_margin and (tile_h - y2) > edge_margin):
-                filtered_local_boxes.append(box)
-        if not filtered_local_boxes:
-            meta.append({
-                "idx": idx,
-                "x_off": x_offset,
-                "y_off": y_offset,
-                "local_boxes": [],
-                "point_coords": [],
-                "point_labels": []
-            })
-            continue
-        # Compute point prompts
-        centroids = [((bx1 + bx2) / 2, (by1 + by2) / 2) for bx1, by1, bx2, by2 in filtered_local_boxes]
-        negative_points_per_box = [get_inset_corner_points(box, margin=2) for box in filtered_local_boxes]
-        point_coords = []
-        point_labels = []
-        for centroid, neg_points in zip(centroids, negative_points_per_box):
-            if not isinstance(neg_points, list):
-                neg_points = neg_points.tolist()
-            all_points = [centroid] + neg_points
-            all_labels = [1] + [0] * len(neg_points)
-            point_coords.append(all_points)
-            point_labels.append(all_labels)
-        meta.append({
-            "idx": idx,
-            "x_off": x_offset,
-            "y_off": y_offset,
-            "local_boxes": filtered_local_boxes,
-            "point_coords": point_coords,
-            "point_labels": point_labels
-        })
-    # Save metadata
-    os.makedirs("tmp", exist_ok=True)
-    with open("tmp/tiles_meta.json", "w") as f:
-        json.dump(meta, f)
-    return H, W
 @spaces.GPU(duration=180)
 def run_tile_inference():
@@ -504,52 +215,6 @@ def run_tile_inference():
             np.save(out_path, masks_stack)
-def merge_tile_masks(H, W):
-    """
-    Merge predicted tile masks into a full-size image.
-    Args:
-        H (int): full image height
-        W (int): full image width
-    Returns:
-        full_mask (np.ndarray): merged mask array
-    """
-    full_mask = np.zeros((H, W), dtype=np.uint16)
-    instance_id = 1
-    # Load tile metadata
-    with open("tmp/tiles_meta.json", "r") as f:
-        tiles_meta = json.load(f)
-    for tile in tiles_meta:
-        tile_idx = tile["idx"]
-        x_off = tile["x_off"]
-        y_off = tile["y_off"]
-        mask_path = f"tmp/masks/tile_{tile_idx}.npy"
-        if not Path(mask_path).exists():
-            continue
-        # Load tile masks (expected shape = (N, h, w))
-        tile_masks = np.load(mask_path)
-        if tile_masks.ndim == 2:  # single mask saved as (h, w)
-            tile_masks = tile_masks[None, :, :]  # make it (1, h, w)
-        for mask in tile_masks:
-            mask = mask.astype(bool)
-            h_end = y_off + mask.shape[0]
-            w_end = x_off + mask.shape[1]
-            region = full_mask[y_off:h_end, x_off:w_end]
-            region[mask & (region == 0)] = instance_id
-            instance_id += 1
-    # Save as TIFF
-    final_mask = Image.fromarray(full_mask)
-    MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
-    final_mask.save(MASK_PATH)
 def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
     """
             iou for combining bounding boxes
@@ -575,7 +240,7 @@ def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
-def extractSegments(image_path, min_size=500, margin=10):
     image = cv2.imread(image_path)
     MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
@@ -589,7 +254,7 @@ def extractSegments(image_path, min_size=500, margin=10):
     yield f"Found {len(blob_ids)} blobs"
-    for blob_id in blob_ids[:1000]:
         yield f"Processing blob {blob_id}..."
         # Create a binary mask for the current blob
         blob_mask = (mask == blob_id).astype(np.uint8)
@@ -614,18 +279,19 @@ def extractSegments(image_path, min_size=500, margin=10):
         cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
         # Apply mask to original image
-        if image.ndim == 3:
-            masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
-        else:
-            masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
         # Save the masked image
         BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
-        cv2.imwrite(BLOB_PATH, masked_image)
     yield f"Done."
-@spaces.GPU(duration=180)
 def blobsOCR(image_path):
     yield "Load OCR model.."
     # Load model + processor
@@ -659,13 +325,7 @@ def blobsOCR(image_path):
                     yield f"{filename} → {generated_text}"
                 except Exception as e:
-                    yield f"Error processing {filename}: {e}"
-def chunkify(lst, n):
-    """Yield successive n-sized chunks from lst."""
-    for i in range(0, len(lst), n):
-        yield lst[i:i + n]
 @spaces.GPU(duration=180)
 def blobsOCR_chunk(image_paths):
@@ -689,7 +349,6 @@ def blobsOCR_chunk(image_paths):
     return results
 def blobsOCR_all():
     image_folder = os.path.join(OUTPUT_DIR, "blobs")
     all_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]
@@ -705,7 +364,7 @@ def blobsOCR_all():
 def extractCentroids(image_path):
-    GEO_PATH=os.path.join(OUTPUT_DIR,"georeferenced.tif")
     with rasterio.open(GEO_PATH) as src:
         mask = src.read(1)
         transform = src.transform
@@ -749,12 +408,55 @@ def extractCentroids(image_path):
-def img_shape(image_path):
-    img = cv2.imread(image_path)
-    return img.shape
 def georefImg(image_path, gcp_path):
     yield "Reading GCP CSV..."
     df = pd.read_csv(gcp_path)
@@ -808,7 +510,35 @@ def georefImg(image_path, gcp_path):
 def extractStreetNet(city_name):
     yield f"Extract OSM street network for {city_name}"
-    G = ox.graph_from_place(city_name, network_type='all')
     G_proj = ox.project_graph(G)
     edges = ox.graph_to_gdfs(G_proj, nodes=False, edges=True, fill_edge_geometry=True)
     edges_3857 = edges.to_crs(epsg=3857)
@@ -823,16 +553,6 @@ def extractStreetNet(city_name):
     yield "Done."
-def best_street_match(point, query_name, edges_gdf, max_distance=100):
-        buffer = point.buffer(max_distance)
-        nearby_edges = edges_gdf[edges_gdf.intersects(buffer)]
-        if nearby_edges.empty:
-            return None, 0
-        candidate_names = nearby_edges['name'].tolist()
-        best_match = process.extractOne(query_name, candidate_names, scorer=fuzz.ratio)
-        return best_match  # (name, score, index)
 def fuzzyMatch(score_th):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")

 from config import OUTPUT_DIR
 from pathlib import Path
 from PIL import Image
+from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
+from pyproj import Transformer
+import shutil
 # Global cache
 _trocr_processor = None
+def run_inference(tile_dict, gcp_path, city_name, score_th):
+    IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
+    CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
+    if os.path.exists(IMAGE_FOLDER):
+        shutil.rmtree(IMAGE_FOLDER)
+    os.makedirs(IMAGE_FOLDER, exist_ok=True)
+    if os.path.exists(CSV_FILE):
+        os.remove(CSV_FILE)
     log = ""
+    if tile_dict is None:
+        yield "No tile selected", None
+        return
+    image_path = tile_dict["tile_path"]
+    coords = tile_dict["coords"] # (x_start, y_start, x_end, y_end)
     # ==== TEXT DETECTION ====
     for msg in getBBoxes(image_path):
         log += msg + "\n"
         yield log, None
     # === ADD GEO DATA ===
+    for msg in georefTile(coords,gcp_path):
         log += msg + "\n"
         yield log, None
+    '''for msg in georefImg(MASK_PATH, gcp_path):
+        log += msg + "\n"
+        yield log, None'''
     for msg in extractCentroids(image_path):
         log += msg + "\n"
         yield log, None
+def load_trocr_model():
+    """Load TrOCR into GPU if not cached."""
+    global _trocr_processor, _trocr_model
+    if _trocr_model is None:
+        _trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
+        _trocr_model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
+        _trocr_model.to(_trocr_device).eval()
+    return _trocr_processor, _trocr_model
 @spaces.GPU
 def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
     yield f"Inference complete."
 @spaces.GPU(duration=180)
 def run_tile_inference():
             np.save(out_path, masks_stack)
 def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
     """
             iou for combining bounding boxes
+def extractSegments(image_path, min_size=500, margin=100):
     image = cv2.imread(image_path)
     MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
     yield f"Found {len(blob_ids)} blobs"
+    for blob_id in blob_ids:
         yield f"Processing blob {blob_id}..."
         # Create a binary mask for the current blob
         blob_mask = (mask == blob_id).astype(np.uint8)
         cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
         # Apply mask to original image
+        shaded = cropped_image.copy()
+        overlay = cropped_image.copy()
+        overlay[cropped_mask == 1] = (0, 0, 255)
+        alpha = 0.5
+        shaded = cv2.addWeighted(overlay, alpha, shaded, 1 - alpha, 0)
         # Save the masked image
         BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
+        cv2.imwrite(BLOB_PATH, shaded)
     yield f"Done."
+'''@spaces.GPU(duration=180)
 def blobsOCR(image_path):
     yield "Load OCR model.."
     # Load model + processor
                     yield f"{filename} → {generated_text}"
                 except Exception as e:
+                    yield f"Error processing {filename}: {e}"'''
 @spaces.GPU(duration=180)
 def blobsOCR_chunk(image_paths):
     return results
 def blobsOCR_all():
     image_folder = os.path.join(OUTPUT_DIR, "blobs")
     all_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]
 def extractCentroids(image_path):
+    GEO_PATH=os.path.join(OUTPUT_DIR,"mask_georef.tif")
     with rasterio.open(GEO_PATH) as src:
         mask = src.read(1)
         transform = src.transform
+def georefTile(tile_coords, gcp_path):
+    yield "Georeferencing SAM image.."
+    MASK_TILE=os.path.join(OUTPUT_DIR,"mask.tif")
+    TMP_TILE=os.path.join(OUTPUT_DIR,"mask_tmp.tif")
+    MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
+    df = pd.read_csv(gcp_path)
+    xmin, ymin, xmax, ymax = tile_coords
+    xoff, yoff = xmin, ymin
+    xsize, ysize = xmax - xmin, ymax - ymin
+    shifted_gcps = []
+    for _, r in df.iterrows():
+        shifted_gcps.append(
+            gdal.GCP(
+                float(r['mapX']),
+                float(r['mapY']),
+                0,
+                float(r['sourceX']) - xoff,
+                abs(float(r['sourceY'])) - yoff
+            )
+        )
+    gdal.Translate(
+        TMP_TILE,
+        MASK_TILE,
+        format="GTiff",
+        GCPs=shifted_gcps,
+        outputSRS="EPSG:3857"
+    )
+    gdal.Warp(
+        MASK_TILE_GEO,
+        TMP_TILE,
+        dstSRS="EPSG:3857",
+        resampleAlg="near",
+        polynomialOrder=1,
+        creationOptions=["COMPRESS=LZW"]
+    )
+    yield "Done."
 def georefImg(image_path, gcp_path):
     yield "Reading GCP CSV..."
     df = pd.read_csv(gcp_path)
 def extractStreetNet(city_name):
     yield f"Extract OSM street network for {city_name}"
+    MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
+    ds = gdal.Open(MASK_TILE_GEO)
+    gt = ds.GetGeoTransform()
+    width = ds.RasterXSize
+    height = ds.RasterYSize
+    minx = gt[0]
+    maxy = gt[3]
+    maxx = gt[0] + width * gt[1] + height * gt[2]
+    miny = gt[3] + width * gt[4] + height * gt[5]
+    # Add 100 meters buffer in all directions
+    minx -= 100  # west
+    maxx += 100  # east
+    miny -= 100  # south
+    maxy += 100  # north
+    bbox = (maxy, miny, maxx, minx)
+    transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
+    north, south = transformer.transform(bbox[2], bbox[0])[1], transformer.transform(bbox[3], bbox[1])[1]
+    east, west   = transformer.transform(bbox[2], bbox[0])[0], transformer.transform(bbox[3], bbox[1])[0]
+    bbox = (west, south, east, north)
+    G = ox.graph_from_bbox(bbox,network_type='all')
     G_proj = ox.project_graph(G)
     edges = ox.graph_to_gdfs(G_proj, nodes=False, edges=True, fill_edge_geometry=True)
     edges_3857 = edges.to_crs(epsg=3857)
     yield "Done."
 def fuzzyMatch(score_th):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")

inference_tab/inference_setup.py CHANGED Viewed

@@ -1,22 +1,130 @@
 import gradio as gr
-def get_inference_widgets(run_inference):
-    image_input = gr.File(label="Select Image File")
-    gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
-    city_name = gr.Textbox(label="Enter city name")
-    score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
-    run_button = gr.Button("Run Inference")
-    output = gr.Textbox(label="Progress", lines=10, interactive=False)
-    download_file = gr.File(label="Download CSV")
     run_button.click(
-        run_inference,
-        inputs=[image_input, gcp_input, city_name, score_th],
         outputs=[output, download_file]
     )
     return image_input, gcp_input, city_name, score_th, run_button, output, download_file

 import gradio as gr
+import cv2
+import numpy as np
+from PIL import Image
+import os
+TILE_SIZE = 1024
+TILE_FOLDER = "tiles"
+os.makedirs(TILE_FOLDER, exist_ok=True)
+tiles_cache = {"tiles": [], "selected_tile": None}
+def make_tiles(image, tile_size=TILE_SIZE):
+    h, w, _ = image.shape
+    annotated = image.copy()
+    tiles = []
+    tile_id = 0
+    for y in range(0, h, tile_size):
+        for x in range(0, w, tile_size):
+            tile = image[y:y+tile_size, x:x+tile_size]
+            tiles.append(((x, y, x+tile_size, y+tile_size), tile))
+            cv2.rectangle(annotated, (x, y), (x+tile_size, y+tile_size), (255,0,0), 2)
+            cv2.putText(annotated, str(tile_id), (x+50, y+50),
+                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 5)
+            tile_id += 1
+    return annotated, tiles
+def create_tiles(image_file):
+    img = Image.open(image_file.name).convert("RGB")
+    img = np.array(img)
+    annotated, tiles = make_tiles(img, TILE_SIZE)
+    tiles_cache["tiles"] = []
+    for idx, (coords, tile) in enumerate(tiles):
+        tile_path = os.path.join(TILE_FOLDER, f"tile_{idx}.png")
+        Image.fromarray(tile).save(tile_path)
+        tiles_cache["tiles"].append((coords, tile_path))  # store path instead of array
+    tiles_cache["selected_tile"] = None
+    return annotated, gr.update(interactive=False)
+def select_tile(evt: gr.SelectData,state):
+    # compute tile index
+    if not tiles_cache["tiles"]:
+        return None, gr.update(interactive=False), state
+    num_tiles_x = (tiles_cache["tiles"][-1][0][2]) // TILE_SIZE
+    tile_id = (evt.index[1] // TILE_SIZE) * num_tiles_x + (evt.index[0] // TILE_SIZE)
+    if 0 <= tile_id < len(tiles_cache["tiles"]):
+        coords, tile_path = tiles_cache["tiles"][tile_id]
+        # store the path, not the array
+        tiles_cache["selected_tile"] = {
+            "tile_path": tile_path,
+            "coords": coords
+        }
+        updated_state = {
+            "tile_path": tile_path,
+            "coords": coords
+        }
+        # load tile only for display
+        tile_array = np.array(Image.open(tile_path))
+        cv2.putText(tile_array, str(tile_id), (100, 100),
+                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 4, cv2.LINE_AA)
+        return tile_array, gr.update(interactive=True),updated_state
+    return None, gr.update(interactive=False), state
+def get_inference_widgets(run_inference,georefImg):
+    with gr.Row():
+        # Left column
+        with gr.Column(scale=1,min_width=500):
+            annotated_out = gr.Image(
+                type="numpy", label="City Map",
+                height=500, width=500
+            )
+            city_name = gr.Textbox(label="Enter city name")
+            image_input = gr.File(label="Select Image File")
+            gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
+            create_btn = gr.Button("Create Tiles")
+            georef_btn = gr.Button("Georeference Full Map")
+        # Right column
+        with gr.Column(scale=1):
+            selected_tile = gr.Image(
+                type="numpy", label="Selected Tile",
+                height=500, width=500
+            )
+            score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
+            run_button = gr.Button("Run Inference", interactive=False)
+            output = gr.Textbox(label="Progress", lines=5, interactive=False)
+            download_file = gr.File(label="Download CSV")
+    selected_tile_path = gr.State()
+    # Wire events
+    create_btn.click(
+        fn=create_tiles, inputs=image_input,
+        outputs=[annotated_out, run_button]
+    )
+    annotated_out.select(
+        fn=select_tile, inputs=[selected_tile_path],
+        outputs=[selected_tile, run_button, selected_tile_path]
+    )
     run_button.click(
+        fn=run_inference,
+        inputs=[selected_tile_path, gcp_input, city_name, score_th],
         outputs=[output, download_file]
     )
+    georef_btn.click(
+        fn=georefImg,
+        inputs=[image_input, gcp_input],
+        outputs=[output]
+    )
     return image_input, gcp_input, city_name, score_th, run_button, output, download_file

map_tab/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .map_setup import get_map_widgets
2	+
3	+ __all__ = ["get_map_widgets"]

map_tab/map_setup.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import gradio as gr
+import folium
+def get_map_widgets():
+    m = folium.Map(location=[48.8566, 2.3522], zoom_start=12)
+    map_html = m._repr_html_()
+    with gr.Column():
+        gr.HTML(value=map_html, elem_id="map-widget")

requirements.txt CHANGED Viewed

@@ -14,5 +14,5 @@ torch==2.7.1
 transformers==4.53.2
 ultralytics==8.3.94
 huggingface_hub[hf_xet]
-gradio>=3.39
 GDAL==3.6.2

 transformers==4.53.2
 ultralytics==8.3.94
 huggingface_hub[hf_xet]
 GDAL==3.6.2
+folium==0.18.0