Spaces:

muk42
/

histOSM

Running on Zero

App Files Files Community

muk42 commited on Oct 9, 2025

Commit

3d8a768

1 Parent(s): 45be334

historic score matching

Browse files

Files changed (3) hide show

app.py +1 -1
inference_tab/inference_logic.py +76 -2
inference_tab/inference_setup.py +16 -4

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ logging.basicConfig(level=logging.DEBUG)
 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
-        image_input, gcp_input, city_name,user_crs, score_th, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
     with gr.Tab("Map"):

 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
+        image_input, gcp_input, city_name,user_crs, score_th, hist_th, hist_dic, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
     with gr.Tab("Map"):

inference_tab/inference_logic.py CHANGED Viewed

@@ -34,7 +34,7 @@ _trocr_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def run_inference(tile_dict, gcp_path, city_name, score_th):
     IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
     CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
     MASK_FILE = os.path.join(OUTPUT_DIR, "mask.tif")
@@ -110,6 +110,18 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
             log += msg + "\n"
             yield log, None
 def load_trocr_model():
@@ -556,7 +568,6 @@ def georefImg(image_path, gcp_path, user_crs):
         format='GTiff'
     )
     yield "The map is georeferenced."
@@ -607,6 +618,69 @@ def extractStreetNet(city_name):
 def fuzzyMatch(score_th,tile_dict):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
     OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")

+def run_inference(tile_dict, gcp_path, city_name, score_th, hist_th, hist_dic):
     IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
     CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
     MASK_FILE = os.path.join(OUTPUT_DIR, "mask.tif")
             log += msg + "\n"
             yield log, None
+    if hist_dic is not None:
+        # Run fuzzy match against historic street names
+        for msg in fuzzyMatchHist(hist_dic, hist_th):
+            if isinstance(msg, list):
+                log += "Historic fuzzy matching finished! CSVs saved at:\n"
+                for f in msg:
+                    log += f"  - {f}\n"
+                yield log, msg
+            else:
+                log += msg + "\n"
+                yield log, None
 def load_trocr_model():
         format='GTiff'
     )
     yield "The map is georeferenced."
+def fuzzyMatchHist(hist_dic, hist_th):
+    # Convert threshold to numeric
+    hist_th = int(hist_th)
+    # === Load Data ===
+    hist_df = pd.read_csv(hist_dic,header=None, names=["hist_name"])
+    OCR_PATH = os.path.join(OUTPUT_DIR, "ocr.csv")
+    names_df = pd.read_csv(
+        OCR_PATH,
+        names=['blob_id', 'pred_text'],
+        dtype={"blob_id": "int64", "pred_text": "string"}
+    )
+    historic_names = hist_df["hist_name"].dropna().astype(str).tolist()
+    # === Fuzzy Match ===
+    results = []
+    for _, row in names_df.iterrows():
+        ocr_name = row["pred_text"]
+        if pd.isna(ocr_name):
+            continue
+        best_match, best_score = process.extractOne(
+            ocr_name,
+            historic_names,
+            scorer=fuzz.token_sort_ratio
+        )
+        results.append({
+            "blob_id": row["blob_id"],
+            "ocr_name": ocr_name,
+            "best_hist_match": best_match,
+            "match_score": best_score
+        })
+    results_df = pd.DataFrame(results)
+    # === Save all results ===
+    all_results_path = os.path.join(OUTPUT_DIR, "historic_matches.csv")
+    results_df.to_csv(all_results_path, index=False)
+    # === Filter for manual annotation ===
+    manual_df = results_df[results_df["match_score"] >= hist_th]
+    for blob_id in manual_df['blob_id']:
+        # original blob
+        orig_path = os.path.join(OUTPUT_DIR, "blobs", f"{blob_id}.png")
+        if os.path.exists(orig_path):
+            os.remove(orig_path)
+        # marginalized blob
+        margin_path = os.path.join(OUTPUT_DIR, "blobs", f"{blob_id}_margin.png")
+        if os.path.exists(margin_path):
+            os.remove(margin_path)
+    yield "Historic fuzzy matching complete."
+    yield [all_results_path]
 def fuzzyMatch(score_th,tile_dict):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
     OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")

inference_tab/inference_setup.py CHANGED Viewed

@@ -73,7 +73,8 @@ def select_tile(evt: gr.SelectData,state):
     return None, gr.update(interactive=False), state
 def get_inference_widgets(run_inference,georefImg):
@@ -101,7 +102,18 @@ def get_inference_widgets(run_inference,georefImg):
                 type="numpy", label="Selected Tile",
                 height=500, width=500
             )
-            score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
             run_button = gr.Button("Run Inference", interactive=False)
             output = gr.Textbox(label="Progress", lines=5, interactive=False)
             download_file = gr.File(label="Download CSV",
@@ -122,7 +134,7 @@ def get_inference_widgets(run_inference,georefImg):
     )
     run_button.click(
         fn=run_inference,
-        inputs=[selected_tile_path, gcp_input, city_name, score_th],
         outputs=[output, download_file]
     )
@@ -133,4 +145,4 @@ def get_inference_widgets(run_inference,georefImg):
     )
-    return image_input, gcp_input, city_name, user_crs, score_th, run_button, output, download_file

     return None, gr.update(interactive=False), state
+def enable_textbox(file):
+    return gr.update(interactive=bool(file))
 def get_inference_widgets(run_inference,georefImg):
                 type="numpy", label="Selected Tile",
                 height=500, width=500
             )
+            score_th = gr.Textbox(label="Score threshold below which to annotate manually (OSM)",
+                                  info="Computes fuzzy match of the detected street names with OSM street names within 100m buffer")
+            # Historic dictionary of street names and matching score threshold
+            hist_dic = gr.File(label="Upload csv with historic street names",file_types=[".csv"])
+            hist_th = gr.Textbox(label="Score threshold below which to annotate manually (Directory)",
+                                  info="Computes fuzzy match of the detected street names with the historic street names",
+                                  interactive=False)
+            hist_dic.change(enable_textbox, inputs=hist_dic, outputs=hist_th)
             run_button = gr.Button("Run Inference", interactive=False)
             output = gr.Textbox(label="Progress", lines=5, interactive=False)
             download_file = gr.File(label="Download CSV",
     )
     run_button.click(
         fn=run_inference,
+        inputs=[selected_tile_path, gcp_input, city_name, score_th, hist_th,hist_dic],
         outputs=[output, download_file]
     )
     )
+    return image_input, gcp_input, city_name, user_crs, score_th, hist_th,hist_dic, run_button, output, download_file