Spaces:

muk42
/

histOSM

Running on Zero

App Files Files Community

muk42 commited on Aug 26, 2025

Commit

c43f7d1

1 Parent(s): bf77b4b

fix

Browse files

Files changed (2) hide show

inference_tab/inference_logic.py +605 -102
inference_tab/inference_setup.py +47 -58

inference_tab/inference_logic.py CHANGED Viewed

@@ -1,130 +1,633 @@
-import gradio as gr
-import cv2
 import numpy as np
-from PIL import Image
 import os
-TILE_SIZE = 1024
-TILE_FOLDER = "tiles"
-os.makedirs(TILE_FOLDER, exist_ok=True)
-tiles_cache = {"tiles": [], "selected_tile": None}
-def make_tiles(image, tile_size=TILE_SIZE):
-    h, w, _ = image.shape
-    annotated = image.copy()
-    tiles = []
-    tile_id = 0
-    for y in range(0, h, tile_size):
-        for x in range(0, w, tile_size):
             tile = image[y:y+tile_size, x:x+tile_size]
-            tiles.append(((x, y, x+tile_size, y+tile_size), tile))
-            cv2.rectangle(annotated, (x, y), (x+tile_size, y+tile_size), (255,0,0), 2)
-            cv2.putText(annotated, str(tile_id), (x+50, y+50),
-                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 5)
-            tile_id += 1
-    return annotated, tiles
-def create_tiles(image_file):
-    img = Image.open(image_file.name).convert("RGB")
-    img = np.array(img)
-    annotated, tiles = make_tiles(img, TILE_SIZE)
-    tiles_cache["tiles"] = []
-    for idx, (coords, tile) in enumerate(tiles):
-        tile_path = os.path.join(TILE_FOLDER, f"tile_{idx}.png")
-        Image.fromarray(tile).save(tile_path)
-        tiles_cache["tiles"].append((coords, tile_path))  # store path instead of array
-    tiles_cache["selected_tile"] = None
-    return annotated, gr.update(interactive=False)
-def select_tile(evt: gr.SelectData,state):
-    # compute tile index
-    if not tiles_cache["tiles"]:
-        return None, gr.update(interactive=False), state
-    num_tiles_x = (tiles_cache["tiles"][-1][0][2]) // TILE_SIZE
-    tile_id = (evt.index[1] // TILE_SIZE) * num_tiles_x + (evt.index[0] // TILE_SIZE)
-    if 0 <= tile_id < len(tiles_cache["tiles"]):
-        coords, tile_path = tiles_cache["tiles"][tile_id]
-        # store the path, not the array
-        tiles_cache["selected_tile"] = {
-            "tile_path": tile_path,
-            "coords": coords
-        }
-        updated_state = {
-            "tile_path": tile_path,
-            "coords": coords
         }
-        # load tile only for display
         tile_array = np.array(Image.open(tile_path))
-        cv2.putText(tile_array, str(tile_id), (100, 100),
-                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 4, cv2.LINE_AA)
-        return tile_array, gr.update(interactive=True),updated_state
-    return None, gr.update(interactive=False), state
-def get_inference_widgets(run_inference,georefImg):
-    with gr.Row():
-        # Left column
-        with gr.Column(scale=1,min_width=500):
-            annotated_out = gr.Image(
-                type="numpy", label="City Map",
-                height=500, width=500
-            )
-            city_name = gr.Textbox(label="Enter city name")
-            image_input = gr.File(label="Select Image File")
-            gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
-            create_btn = gr.Button("Create Tiles")
-            georef_btn = gr.Button("Georeference Full Map")
-        # Right column
-        with gr.Column(scale=1):
-            selected_tile = gr.Image(
-                type="numpy", label="Selected Tile",
-                height=500, width=500
-            )
-            score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
-            run_button = gr.Button("Run Inference", interactive=False)
-            output = gr.Textbox(label="Progress", lines=5, interactive=False)
-            download_file = gr.File(label="Download CSV")
-    selected_tile_path = gr.State()
-    # Wire events
-    create_btn.click(
-        fn=create_tiles, inputs=image_input,
-        outputs=[annotated_out, run_button]
     )
-    annotated_out.select(
-        fn=select_tile, inputs=[selected_tile_path],
-        outputs=[selected_tile, run_button, selected_tile_path]
     )
-    run_button.click(
-        fn=run_inference,
-        inputs=[selected_tile_path, gcp_input, city_name, score_th],
-        outputs=[output, download_file]
     )
-    georef_btn.click(
-        fn=georefImg,
-        inputs=[image_input, gcp_input],
-        outputs=[output]
     )
-    return image_input, gcp_input, city_name, score_th, run_button, output, download_file

+import spaces
 import numpy as np
+from ultralytics import YOLO
 import os
+import json
+from PIL import Image
+from ultralytics import SAM
+import cv2
+import torch
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import rasterio
+import rasterio.features
+from shapely.geometry import shape
+import pandas as pd
+import osmnx as ox
+from osgeo import gdal
+import geopandas as gpd
+from rapidfuzz import process, fuzz
+from huggingface_hub import hf_hub_download
+from config import OUTPUT_DIR
+from pathlib import Path
+from PIL import Image
+from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
+from pyproj import Transformer
+import shutil
+# Global cache
+_trocr_processor = None
+_trocr_model = None
+_trocr_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def run_inference(tile_dict, gcp_path, city_name, score_th):
+    IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
+    CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
+    MASK_FILE = os.path.join(OUTPUT_DIR, "mask.tif")
+    if os.path.exists(IMAGE_FOLDER):
+        shutil.rmtree(IMAGE_FOLDER)
+    os.makedirs(IMAGE_FOLDER, exist_ok=True)
+    if os.path.exists("tmp"):
+        shutil.rmtree("tmp")
+    os.makedirs("tmp", exist_ok=True)
+    if os.path.exists(CSV_FILE):
+        os.remove(CSV_FILE)
+    if os.path.exists(MASK_FILE):
+        os.remove(MASK_FILE)
+    log = ""
+    if tile_dict is None:
+        yield "No tile selected", None
+        return
+    image_path = tile_dict["tile_path"]
+    coords = tile_dict["coords"] # (x_start, y_start, x_end, y_end)
+    print(f"Tile path: {image_path}; Tile coords: {coords}")
+    # ==== TEXT DETECTION ====
+    for msg in getBBoxes(image_path):
+        log += msg + "\n"
+        yield log, None
+    for msg in getSegments(image_path):
+        if msg.endswith(".tif"):
+            log += f"Mask saved at {msg}.\n"
+            yield log, msg
+        else:
+            log += msg + "\n"
+            yield log, None
+    for msg in extractSegments(image_path):
+        log += msg + "\n"
+        yield log, None
+    # === TEXT RECOGNITION ===
+    for msg in blobsOCR_all():
+        log += msg + "\n"
+        yield log, None
+    # === ADD GEO DATA ===
+    for msg in georefTile(coords,gcp_path):
+        log += msg + "\n"
+        yield log, None
+    '''for msg in georefImg(MASK_PATH, gcp_path):
+        log += msg + "\n"
+        yield log, None'''
+    for msg in extractCentroids(image_path):
+        log += msg + "\n"
+        yield log, None
+    for msg in extractStreetNet(city_name):
+        log += msg + "\n"
+        yield log, None
+    # === POST OCR ===
+    for msg in fuzzyMatch(score_th):
+        if msg.endswith(".csv"):
+            log+= f"Finished! CSV saved at {msg}. Street labels are ready for manual input."
+            yield log, msg
+        else:
+            log += msg + "\n"
+            yield log, None
+def load_trocr_model():
+    """Load TrOCR into GPU if not cached."""
+    global _trocr_processor, _trocr_model
+    if _trocr_model is None:
+        _trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
+        _trocr_model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
+        _trocr_model.to(_trocr_device).eval()
+    return _trocr_processor, _trocr_model
+@spaces.GPU
+def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
+    yield f"DEBUG: Received image_path: {image_path}"
+    image = cv2.imread(image_path)
+    H, W, _ = image.shape
+    yolo_weights = hf_hub_download(
+        repo_id="muk42/yolov9_streets",
+        filename="yolov9c_finetuned.pt")
+    model = YOLO(yolo_weights)
+    step = int(tile_size * (1 - overlap))
+    all_detections=[]
+    total_tiles = 0
+    # Calculate total tiles for progress reporting
+    for y in range(0, H, step):
+        for x in range(0, W, step):
+            # Skip small tiles at the edges
+            if y + tile_size > H or x + tile_size > W:
+                continue
+            total_tiles += 1
+    processed_tiles = 0
+    # Tile the image and run prediction
+    for y in range(0, H, step):
+        for x in range(0, W, step):
             tile = image[y:y+tile_size, x:x+tile_size]
+            if tile.shape[0] < tile_size or tile.shape[1] < tile_size:
+                continue
+            results= model.predict(source=tile, imgsz=tile_size, conf=confidence_threshold, verbose=False, iou=0.5)
+            for result in results:
+                boxes = result.boxes.xyxy.cpu().numpy()
+                scores = result.boxes.conf.cpu().numpy()
+                classes = result.boxes.cls.cpu().numpy()
+                for box, score, cls in zip(boxes, scores, classes):
+                    x1, y1, x2, y2 = box
+                    # Shift box coordinates relative to full image
+                    x1 += x
+                    x2 += x
+                    y1 += y
+                    y2 += y
+                    all_detections.append([x1, y1, x2, y2, float(score), int(cls)])
+            processed_tiles += 1
+            yield f"Processed tile {processed_tiles} of {total_tiles}"
+    # After all tiles are processed, save detections to JSON
+    boxes_to_save = [
+        {
+            "bbox": [float(x1), float(y1), float(x2), float(y2)],
+            "score": float(conf),
+            "class": int(cls)
         }
+        for x1, y1, x2, y2, conf, cls in all_detections
+    ]
+    BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
+    with open(BOXES_PATH, "w") as f:
+        json.dump(boxes_to_save, f, indent=4)
+    yield f"Inference complete."
+@spaces.GPU
+def run_tile_inference():
+    model = SAM("mobile_sam.pt") # sam2.1_l.pt
+    Path("tmp/masks").mkdir(parents=True, exist_ok=True)
+    with open("tmp/tiles_meta.json", "r") as f:
+        tiles_meta = json.load(f)
+    for tile in tiles_meta:
+        yield f"Processing {tile['idx']}..."
+        tile_path = f"tmp/tiles/tile_{tile['idx']}.png"
+        out_path = tile_path.replace("tiles", "masks").replace(".png", ".npy")
+        # skip if already processed
+        if Path(out_path).exists():
+            continue
+        local_boxes = tile.get('local_boxes', [])
+        point_coords = tile.get('point_coords', [])
+        point_labels = tile.get('point_labels', [])
         tile_array = np.array(Image.open(tile_path))
+        results = model(tile_array, bboxes=local_boxes,
+                        points=point_coords, labels=point_labels)
+        masks_to_save = [r.masks.data.cpu().numpy() for r in results if r.masks is not None]
+        if masks_to_save:
+            masks_stack = np.concatenate(masks_to_save, axis=0)  # shape (N, H, W)
+            np.save(out_path, masks_stack)
+def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
+    """
+            iou for combining bounding boxes
+            c_th defined share of the smaller box contained in the larger box for merge
+            edge_margin pixel margin for tiles
+    """
+    yield "Load YOLO boxes.."
+    BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
+    with open(BOXES_PATH, "r") as f:
+        box_data = json.load(f)
+    boxes = [b["bbox"] for b in box_data]
+    yield "Prepare tiles..."
+    H,W = prepare_tiles(image_path, boxes, tile_size=1024, overlap=50, iou=iou, c_th=c_th, edge_margin=edge_margin)
+    yield "Run inference on tiles..."
+    for msg in run_tile_inference():
+        yield msg
+    yield "Marge predicted masks into image..."
+    merge_tile_masks(H,W)
+    MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
+    yield f"{MASK_PATH}"
+def extractSegments(image_path, min_size=500, margin=100):
+    image = cv2.imread(image_path)
+    MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
+    mask = cv2.imread(MASK_PATH, cv2.IMREAD_UNCHANGED)
+    height, width = mask.shape[:2]
+    # Get unique labels (excluding background label 0)
+    blob_ids = np.unique(mask)
+    blob_ids = blob_ids[blob_ids != 0]
+    yield f"Found {len(blob_ids)} blobs"
+    for blob_id in blob_ids:
+        yield f"Processing blob {blob_id}..."
+        # Create a binary mask for the current blob
+        blob_mask = (mask == blob_id).astype(np.uint8)
+        # Skip small blobs (WxH)
+        if np.sum(blob_mask) < min_size:
+            continue
+        # Find bounding box of the blob
+        ys, xs = np.where(blob_mask)
+        y_min, y_max = ys.min(), ys.max() + 1
+        x_min, x_max = xs.min(), xs.max() + 1
+        # Add margin to bounding box while keeping inside image bounds
+        x_min = max(0, x_min - margin)
+        y_min = max(0, y_min - margin)
+        x_max = min(width, x_max + margin)
+        y_max = min(height, y_max + margin)
+        # Crop the region from original image
+        cropped_image = image[y_min:y_max, x_min:x_max]
+        cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
+        # Apply mask to original image
+        shaded = cropped_image.copy()
+        overlay = cropped_image.copy()
+        overlay[cropped_mask == 1] =  (255, 200, 100)
+        alpha = 0.35
+        shaded = cv2.addWeighted(overlay, alpha, shaded, 1 - alpha, 0)
+        # Save the masked image
+        BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
+        cv2.imwrite(BLOB_PATH, shaded)
+    yield f"Done."
+'''@spaces.GPU(duration=180)
+def blobsOCR(image_path):
+    yield "Load OCR model.."
+    # Load model + processor
+    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
+    model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
+    image_extensions = (".png")
+    # Device setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.half().to(device) # float16 weights precision
+    yield f"Running on {device}..."
+    # Open output file for writing
+    OCR_PATH = os.path.join(OUTPUT_DIR,"ocr.csv")
+    with open(OCR_PATH, "w", encoding="utf-8") as f_out:
+        # Process each image
+        image_folder = os.path.join(OUTPUT_DIR,"blobs")
+        for filename in os.listdir(image_folder):
+            if filename.lower().endswith(image_extensions):
+                image_path = os.path.join(image_folder, filename)
+                try:
+                    image = Image.open(image_path).convert("RGB")
+                    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
+                    generated_ids = model.generate(pixel_values)
+                    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                    # Write to file
+                    name = os.path.splitext(os.path.basename(filename))[0]
+                    f_out.write(f'{name},"{generated_text}"\n')
+                    yield f"{filename} → {generated_text}"
+                except Exception as e:
+                    yield f"Error processing {filename}: {e}"'''
+@spaces.GPU
+def blobsOCR_chunk(image_paths):
+    """Run OCR on a list of images (one chunk)."""
+    processor, model = load_trocr_model()
+    results = []
+    # Load all images in the chunk
+    images = [Image.open(path).convert("RGB") for path in image_paths]
+    # Convert to pixel_values tensor
+    pixel_values = processor(images=images, return_tensors="pt", padding=True).pixel_values.to(_trocr_device)
+    # Generate text for the whole batch at once
+    generated_ids = model.generate(pixel_values)
+    texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    for path, text in zip(image_paths, texts):
+        name = os.path.splitext(os.path.basename(path))[0]
+        results.append((name, text))
+    return results
+def blobsOCR_all():
+    image_folder = os.path.join(OUTPUT_DIR, "blobs")
+    all_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith(".png")]
+    OCR_PATH = os.path.join(OUTPUT_DIR,"ocr.csv")
+    with open(OCR_PATH, "w", encoding="utf-8") as f_out:
+        for chunk in chunkify(all_files, n=16):  # adjust batch size
+            yield f"Processing {len(chunk)} images..."
+            results = blobsOCR_chunk(chunk)
+            for name, text in results:
+                f_out.write(f'{name},"{text}"\n')
+                yield f"{name} → {text}"
+def extractCentroids(image_path):
+    GEO_PATH=os.path.join(OUTPUT_DIR,"mask_georef.tif")
+    with rasterio.open(GEO_PATH) as src:
+        mask = src.read(1)
+        transform = src.transform
+    labels = np.unique(mask)
+    labels = labels[labels != 0]
+    data = []
+    # Generate polygons and their values
+    shapes_gen = rasterio.features.shapes(mask, mask=(mask != 0), transform=transform)
+    # Create a dict to collect polygons by label
+    polygons_by_label = {}
+    for geom, val in shapes_gen:
+        if val == 0:
+            continue
+        polygons_by_label.setdefault(val, []).append(shape(geom))
+    # For each label, merge polygons and get centroid
+    for idx, label in enumerate(labels):
+        yield f"Processing {idx+1} out of {len(labels)}"
+        polygons = polygons_by_label.get(label)
+        if not polygons:
+            continue
+        # Merge polygons of the same label (if multiple parts)
+        multi_poly = polygons[0]
+        for poly in polygons[1:]:
+            multi_poly = multi_poly.union(poly)
+        centroid = multi_poly.centroid
+        data.append({"blob_id": label, "x": centroid.x, "y": centroid.y})
+    df = pd.DataFrame(data)
+    COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
+    df.to_csv(COORD_PATH, index=False)
+    yield f"Saved centroid coordinates of {len(labels)} blobs."
+def georefTile(tile_coords, gcp_path):
+    yield "Georeferencing SAM image.."
+    MASK_TILE=os.path.join(OUTPUT_DIR,"mask.tif")
+    TMP_TILE=os.path.join(OUTPUT_DIR,"mask_tmp.tif")
+    MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
+    for f in [TMP_TILE, MASK_TILE_GEO]:
+        if os.path.exists(f):
+            os.remove(f)
+    df = pd.read_csv(gcp_path)
+    xmin, ymin, xmax, ymax = tile_coords
+    xoff, yoff = xmin, ymin
+    xsize, ysize = xmax - xmin, ymax - ymin
+    shifted_gcps = []
+    for _, r in df.iterrows():
+        shifted_gcps.append(
+            gdal.GCP(
+                float(r['mapX']),
+                float(r['mapY']),
+                0,
+                float(r['sourceX']) - xoff,
+                abs(float(r['sourceY'])) - yoff
+            )
+        )
+    gdal.Translate(
+        TMP_TILE,
+        MASK_TILE,
+        format="GTiff",
+        GCPs=shifted_gcps,
+        outputSRS="EPSG:3857"
     )
+    gdal.Warp(
+        MASK_TILE_GEO,
+        TMP_TILE,
+        dstSRS="EPSG:3857",
+        resampleAlg="near",
+        polynomialOrder=1,
+        creationOptions=["COMPRESS=LZW"]
     )
+    yield "Done."
+def georefImg(image_path, gcp_path):
+    yield "Reading GCP CSV..."
+    TMP_FILE = os.path.join(OUTPUT_DIR,"tmp.tif")
+    GEO_FILE = os.path.join(OUTPUT_DIR,"georeferenced.tif")
+    for f in [TMP_FILE, GEO_FILE]:
+        if os.path.exists(f):
+            os.remove(f)
+    df = pd.read_csv(gcp_path)
+    H,W,_ = img_shape(image_path)
+    # Build GCPs
+    gcps = []
+    for _, r in df.iterrows():
+        gcps.append(
+            gdal.GCP(
+                float(r['mapX']),
+                float(r['mapY']),
+                0,
+                float(r['sourceX']),
+                #H-float(r['sourceY'])
+                abs(float(r['sourceY']))
+            )
+        )
+    gdal.Translate(
+            TMP_FILE,
+            image_path,
+            format="GTiff",
+            GCPs=gcps,
+            outputSRS="EPSG:3857"
+        )
+    yield "Running gdalwarp..."
+    gdal.Warp(
+        GEO_FILE,
+        TMP_FILE,
+        dstSRS="EPSG:3857",
+        resampleAlg="near",
+        polynomialOrder=1,
+        creationOptions=["COMPRESS=LZW"]
     )
+    yield "Done."
+def extractStreetNet(city_name):
+    yield f"Extract OSM street network for {city_name}"
+    MASK_TILE_GEO=os.path.join(OUTPUT_DIR,"mask_georef.tif")
+    ds = gdal.Open(MASK_TILE_GEO)
+    gt = ds.GetGeoTransform()
+    width = ds.RasterXSize
+    height = ds.RasterYSize
+    minx = gt[0]
+    maxy = gt[3]
+    maxx = gt[0] + width * gt[1] + height * gt[2]
+    miny = gt[3] + width * gt[4] + height * gt[5]
+    # Add 100 meters buffer in all directions
+    minx -= 100  # west
+    maxx += 100  # east
+    miny -= 100  # south
+    maxy += 100  # north
+    bbox = (maxy, miny, maxx, minx)
+    transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
+    north, south = transformer.transform(bbox[2], bbox[0])[1], transformer.transform(bbox[3], bbox[1])[1]
+    east, west   = transformer.transform(bbox[2], bbox[0])[0], transformer.transform(bbox[3], bbox[1])[0]
+    bbox = (west, south, east, north)
+    G = ox.graph_from_bbox(bbox,network_type='all')
+    G_proj = ox.project_graph(G)
+    edges = ox.graph_to_gdfs(G_proj, nodes=False, edges=True, fill_edge_geometry=True)
+    edges_3857 = edges.to_crs(epsg=3857)
+    edges_3857 = edges_3857[['osmid','name', 'geometry']]
+    edges_3857 = edges_3857[edges_3857['name'].notnull()]
+    edges_3857['name'] = edges_3857['name'].apply(
+                            lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)
+    OSM_PATH=os.path.join(OUTPUT_DIR,"osm_extract.geojson")
+    edges_3857.to_file(OSM_PATH, driver="GeoJSON")
+    yield "Done."
+def fuzzyMatch(score_th):
+    COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
+    OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
+    coords_df = pd.read_csv(COORD_PATH)
+    names_df = pd.read_csv(OCR_PATH,
+                           names=['blob_id','pred_text'],
+                           dtype={"blob_id": "int64", "pred_text": "string"})
+    merged_df = coords_df.merge(names_df, on="blob_id")
+    gdf = gpd.GeoDataFrame(
+        merged_df,
+        geometry=gpd.points_from_xy(merged_df.x, merged_df.y),
+        crs="EPSG:3857"
     )
+    OSM_PATH=os.path.join(OUTPUT_DIR,"osm_extract.geojson")
+    osm_gdf = gpd.read_file(OSM_PATH,dtype={"name": "str"})
+    yield "Process OSM candidates..."
+    results = []
+    for _, row in gdf.iterrows():
+        match = best_street_match(row.geometry, row['pred_text'], osm_gdf, max_distance=100)
+        if match:
+            results.append({
+                "blob_id": row.blob_id,
+                "x": row.x,
+                "y": row.y,
+                "blob_name": row.pred_text,
+                "best_osm_match": match[0],
+                "osm_match_score": match[1]
+            })
+        else:
+            results.append({
+                "blob_id": row.blob_id,
+                "x": row.x,
+                "y": row.y,
+                "blob_name": row.pred_text,
+                "best_osm_match": None,
+                "osm_match_score": 0
+            })
+    results_df = pd.DataFrame(results)
+    RES_PATH=os.path.join(OUTPUT_DIR,"street_matches.csv")
+    results_df.to_csv(RES_PATH, index=False)
+    # remove street labels from blobs folder that are more than or equal to score threshold
+    manual_df = results_df[results_df['osm_match_score'] >= int(score_th)]
+    for blob_id in manual_df['blob_id']:
+        file_path = os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
+        if os.path.exists(file_path):
+            os.remove(file_path)
+    yield f"{RES_PATH}"

inference_tab/inference_setup.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 TILE_SIZE = 1024
 TILE_FOLDER = "tiles"
 os.makedirs(TILE_FOLDER, exist_ok=True)
-tiles_cache = {"tiles": [], "selected_tile": None, "processed_tiles": set()}
 def make_tiles(image, tile_size=TILE_SIZE):
@@ -20,44 +20,29 @@ def make_tiles(image, tile_size=TILE_SIZE):
         for x in range(0, w, tile_size):
             tile = image[y:y+tile_size, x:x+tile_size]
             tiles.append(((x, y, x+tile_size, y+tile_size), tile))
-            # Draw thick rectangle for readability
-            cv2.rectangle(annotated, (x, y), (x+tile_size, y+tile_size), (255, 0, 0), 6)
-            cv2.putText(annotated, str(tile_id), (x+50, y+100),
-                        cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 0, 0), 8)
-            # Shade processed tiles
-            if tile_id in tiles_cache["processed_tiles"]:
-                overlay = annotated[y:y+tile_size, x:x+tile_size].copy()
-                overlay[:] = (0, 255, 0)  # light green
-                alpha = 0.4
-                annotated[y:y+tile_size, x:x+tile_size] = cv2.addWeighted(
-                    overlay, alpha, annotated[y:y+tile_size, x:x+tile_size], 1-alpha, 0
-                )
             tile_id += 1
     return annotated, tiles
 def create_tiles(image_file):
     img = Image.open(image_file.name).convert("RGB")
-    img_np = np.array(img)
-    annotated, tiles = make_tiles(img_np, TILE_SIZE)
     tiles_cache["tiles"] = []
     for idx, (coords, tile) in enumerate(tiles):
         tile_path = os.path.join(TILE_FOLDER, f"tile_{idx}.png")
         Image.fromarray(tile).save(tile_path)
-        tiles_cache["tiles"].append((coords, tile_path))
     tiles_cache["selected_tile"] = None
-    tiles_cache["processed_tiles"] = set()
     return annotated, gr.update(interactive=False)
-def select_tile(image, evt: gr.SelectData, state):
     if not tiles_cache["tiles"]:
         return None, gr.update(interactive=False), state
@@ -66,70 +51,73 @@ def select_tile(image, evt: gr.SelectData, state):
     if 0 <= tile_id < len(tiles_cache["tiles"]):
         coords, tile_path = tiles_cache["tiles"][tile_id]
-        tiles_cache["selected_tile"] = {"tile_path": tile_path, "coords": coords, "tile_id": tile_id}
-        updated_state = {"tile_path": tile_path, "coords": coords, "tile_id": tile_id}
         tile_array = np.array(Image.open(tile_path))
         cv2.putText(tile_array, str(tile_id), (100, 100),
-                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 4, cv2.LINE_AA)
-        return tile_array, gr.update(interactive=True), updated_state
-    return None, gr.update(interactive=False), state
-# Wrapper to shade processed tile after running inference
-def run_inference_with_shading(selected_tile_state, gcp_input, city_name, score_th, annotated_image, run_inference_fn):
-    # Call original inference
-    output, download_file = run_inference_fn(selected_tile_state, gcp_input, city_name, score_th)
-    # Mark tile as processed
-    tile_info = tiles_cache.get("selected_tile")
-    if tile_info:
-        tiles_cache["processed_tiles"].add(tile_info["tile_id"])
-    # Update annotated map with shading
-    annotated, _ = make_tiles(annotated_image, TILE_SIZE)
-    return annotated, output, download_file
-def get_inference_widgets(run_inference, georefImg):
     with gr.Row():
-        with gr.Column(scale=1, min_width=500):
-            annotated_out = gr.Image(type="numpy", label="City Map", height=500, width=500)
             city_name = gr.Textbox(label="Enter city name")
             image_input = gr.File(label="Select Image File")
             gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
             create_btn = gr.Button("Create Tiles")
             georef_btn = gr.Button("Georeference Full Map")
         with gr.Column(scale=1):
-            selected_tile = gr.Image(type="numpy", label="Selected Tile", height=500, width=500)
             score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
             run_button = gr.Button("Run Inference", interactive=False)
             output = gr.Textbox(label="Progress", lines=5, interactive=False)
             download_file = gr.File(label="Download CSV")
-    selected_tile_state = gr.State()
-    annotated_image_state = gr.State()
     # Wire events
     create_btn.click(
-        fn=create_tiles,
-        inputs=image_input,
         outputs=[annotated_out, run_button]
     )
     annotated_out.select(
-        fn=select_tile,
-        inputs=[annotated_out, selected_tile_state],
-        outputs=[selected_tile, run_button, selected_tile_state]
     )
     run_button.click(
-        fn=lambda selected_tile_state, gcp_input, city_name, score_th, annotated_image:
-            run_inference_with_shading(selected_tile_state, gcp_input, city_name, score_th, annotated_image, run_inference),
-        inputs=[selected_tile_state, gcp_input, city_name, score_th, annotated_out],
-        outputs=[annotated_out, output, download_file]
     )
     georef_btn.click(
@@ -138,4 +126,5 @@ def get_inference_widgets(run_inference, georefImg):
         outputs=[output]
     )
-    return image_input, gcp_input, city_name, score_th, run_button, output, download_file

 TILE_SIZE = 1024
 TILE_FOLDER = "tiles"
 os.makedirs(TILE_FOLDER, exist_ok=True)
+tiles_cache = {"tiles": [], "selected_tile": None}
 def make_tiles(image, tile_size=TILE_SIZE):
         for x in range(0, w, tile_size):
             tile = image[y:y+tile_size, x:x+tile_size]
             tiles.append(((x, y, x+tile_size, y+tile_size), tile))
+            cv2.rectangle(annotated, (x, y), (x+tile_size, y+tile_size), (255,0,0), 2)
+            cv2.putText(annotated, str(tile_id), (x+50, y+50),
+                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 5)
             tile_id += 1
     return annotated, tiles
 def create_tiles(image_file):
     img = Image.open(image_file.name).convert("RGB")
+    img = np.array(img)
+    annotated, tiles = make_tiles(img, TILE_SIZE)
     tiles_cache["tiles"] = []
     for idx, (coords, tile) in enumerate(tiles):
         tile_path = os.path.join(TILE_FOLDER, f"tile_{idx}.png")
         Image.fromarray(tile).save(tile_path)
+        tiles_cache["tiles"].append((coords, tile_path))  # store path instead of array
     tiles_cache["selected_tile"] = None
     return annotated, gr.update(interactive=False)
+def select_tile(evt: gr.SelectData,state):
+    # compute tile index
     if not tiles_cache["tiles"]:
         return None, gr.update(interactive=False), state
     if 0 <= tile_id < len(tiles_cache["tiles"]):
         coords, tile_path = tiles_cache["tiles"][tile_id]
+        # store the path, not the array
+        tiles_cache["selected_tile"] = {
+            "tile_path": tile_path,
+            "coords": coords
+        }
+        updated_state = {
+            "tile_path": tile_path,
+            "coords": coords
+        }
+        # load tile only for display
         tile_array = np.array(Image.open(tile_path))
         cv2.putText(tile_array, str(tile_id), (100, 100),
+                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,0), 4, cv2.LINE_AA)
+        return tile_array, gr.update(interactive=True),updated_state
+    return None, gr.update(interactive=False), state
+def get_inference_widgets(run_inference,georefImg):
     with gr.Row():
+        # Left column
+        with gr.Column(scale=1,min_width=500):
+            annotated_out = gr.Image(
+                type="numpy", label="City Map",
+                height=500, width=500
+            )
             city_name = gr.Textbox(label="Enter city name")
             image_input = gr.File(label="Select Image File")
             gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
             create_btn = gr.Button("Create Tiles")
             georef_btn = gr.Button("Georeference Full Map")
+        # Right column
         with gr.Column(scale=1):
+            selected_tile = gr.Image(
+                type="numpy", label="Selected Tile",
+                height=500, width=500
+            )
             score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
             run_button = gr.Button("Run Inference", interactive=False)
             output = gr.Textbox(label="Progress", lines=5, interactive=False)
             download_file = gr.File(label="Download CSV")
+    selected_tile_path = gr.State()
     # Wire events
     create_btn.click(
+        fn=create_tiles, inputs=image_input,
         outputs=[annotated_out, run_button]
     )
     annotated_out.select(
+        fn=select_tile, inputs=[selected_tile_path],
+        outputs=[selected_tile, run_button, selected_tile_path]
     )
     run_button.click(
+        fn=run_inference,
+        inputs=[selected_tile_path, gcp_input, city_name, score_th],
+        outputs=[output, download_file]
     )
     georef_btn.click(
         outputs=[output]
     )
+    return image_input, gcp_input, city_name, score_th, run_button, output, download_file