muk42 commited on
Commit
3d8a768
·
1 Parent(s): 45be334

historic score matching

Browse files
app.py CHANGED
@@ -16,7 +16,7 @@ logging.basicConfig(level=logging.DEBUG)
16
 
17
  with gr.Blocks() as demo:
18
  with gr.Tab("Inference"):
19
- image_input, gcp_input, city_name,user_crs, score_th, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
20
  with gr.Tab("Annotation"):
21
  get_annotation_widgets()
22
  with gr.Tab("Map"):
 
16
 
17
  with gr.Blocks() as demo:
18
  with gr.Tab("Inference"):
19
+ image_input, gcp_input, city_name,user_crs, score_th, hist_th, hist_dic, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
20
  with gr.Tab("Annotation"):
21
  get_annotation_widgets()
22
  with gr.Tab("Map"):
inference_tab/inference_logic.py CHANGED
@@ -34,7 +34,7 @@ _trocr_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
 
35
 
36
 
37
- def run_inference(tile_dict, gcp_path, city_name, score_th):
38
  IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
39
  CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
40
  MASK_FILE = os.path.join(OUTPUT_DIR, "mask.tif")
@@ -110,6 +110,18 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
110
  log += msg + "\n"
111
  yield log, None
112
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  def load_trocr_model():
@@ -556,7 +568,6 @@ def georefImg(image_path, gcp_path, user_crs):
556
  format='GTiff'
557
  )
558
 
559
-
560
 
561
  yield "The map is georeferenced."
562
 
@@ -607,6 +618,69 @@ def extractStreetNet(city_name):
607
 
608
 
609
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  def fuzzyMatch(score_th,tile_dict):
611
  COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
612
  OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
 
34
 
35
 
36
 
37
+ def run_inference(tile_dict, gcp_path, city_name, score_th, hist_th, hist_dic):
38
  IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
39
  CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
40
  MASK_FILE = os.path.join(OUTPUT_DIR, "mask.tif")
 
110
  log += msg + "\n"
111
  yield log, None
112
 
113
+ if hist_dic is not None:
114
+ # Run fuzzy match against historic street names
115
+ for msg in fuzzyMatchHist(hist_dic, hist_th):
116
+ if isinstance(msg, list):
117
+ log += "Historic fuzzy matching finished! CSVs saved at:\n"
118
+ for f in msg:
119
+ log += f" - {f}\n"
120
+ yield log, msg
121
+ else:
122
+ log += msg + "\n"
123
+ yield log, None
124
+
125
 
126
 
127
  def load_trocr_model():
 
568
  format='GTiff'
569
  )
570
 
 
571
 
572
  yield "The map is georeferenced."
573
 
 
618
 
619
 
620
 
621
+ def fuzzyMatchHist(hist_dic, hist_th):
622
+ # Convert threshold to numeric
623
+ hist_th = int(hist_th)
624
+
625
+ # === Load Data ===
626
+ hist_df = pd.read_csv(hist_dic,header=None, names=["hist_name"])
627
+ OCR_PATH = os.path.join(OUTPUT_DIR, "ocr.csv")
628
+
629
+ names_df = pd.read_csv(
630
+ OCR_PATH,
631
+ names=['blob_id', 'pred_text'],
632
+ dtype={"blob_id": "int64", "pred_text": "string"}
633
+ )
634
+
635
+ historic_names = hist_df["hist_name"].dropna().astype(str).tolist()
636
+
637
+ # === Fuzzy Match ===
638
+ results = []
639
+ for _, row in names_df.iterrows():
640
+ ocr_name = row["pred_text"]
641
+ if pd.isna(ocr_name):
642
+ continue
643
+
644
+ best_match, best_score = process.extractOne(
645
+ ocr_name,
646
+ historic_names,
647
+ scorer=fuzz.token_sort_ratio
648
+ )
649
+
650
+ results.append({
651
+ "blob_id": row["blob_id"],
652
+ "ocr_name": ocr_name,
653
+ "best_hist_match": best_match,
654
+ "match_score": best_score
655
+ })
656
+
657
+ results_df = pd.DataFrame(results)
658
+
659
+ # === Save all results ===
660
+ all_results_path = os.path.join(OUTPUT_DIR, "historic_matches.csv")
661
+ results_df.to_csv(all_results_path, index=False)
662
+
663
+ # === Filter for manual annotation ===
664
+ manual_df = results_df[results_df["match_score"] >= hist_th]
665
+ for blob_id in manual_df['blob_id']:
666
+ # original blob
667
+ orig_path = os.path.join(OUTPUT_DIR, "blobs", f"{blob_id}.png")
668
+ if os.path.exists(orig_path):
669
+ os.remove(orig_path)
670
+
671
+ # marginalized blob
672
+ margin_path = os.path.join(OUTPUT_DIR, "blobs", f"{blob_id}_margin.png")
673
+ if os.path.exists(margin_path):
674
+ os.remove(margin_path)
675
+
676
+ yield "Historic fuzzy matching complete."
677
+ yield [all_results_path]
678
+
679
+
680
+
681
+
682
+
683
+
684
  def fuzzyMatch(score_th,tile_dict):
685
  COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
686
  OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
inference_tab/inference_setup.py CHANGED
@@ -73,7 +73,8 @@ def select_tile(evt: gr.SelectData,state):
73
  return None, gr.update(interactive=False), state
74
 
75
 
76
-
 
77
 
78
 
79
  def get_inference_widgets(run_inference,georefImg):
@@ -101,7 +102,18 @@ def get_inference_widgets(run_inference,georefImg):
101
  type="numpy", label="Selected Tile",
102
  height=500, width=500
103
  )
104
- score_th = gr.Textbox(label="Enter a score threshold below which to annotate manually")
 
 
 
 
 
 
 
 
 
 
 
105
  run_button = gr.Button("Run Inference", interactive=False)
106
  output = gr.Textbox(label="Progress", lines=5, interactive=False)
107
  download_file = gr.File(label="Download CSV",
@@ -122,7 +134,7 @@ def get_inference_widgets(run_inference,georefImg):
122
  )
123
  run_button.click(
124
  fn=run_inference,
125
- inputs=[selected_tile_path, gcp_input, city_name, score_th],
126
  outputs=[output, download_file]
127
  )
128
 
@@ -133,4 +145,4 @@ def get_inference_widgets(run_inference,georefImg):
133
  )
134
 
135
 
136
- return image_input, gcp_input, city_name, user_crs, score_th, run_button, output, download_file
 
73
  return None, gr.update(interactive=False), state
74
 
75
 
76
+ def enable_textbox(file):
77
+ return gr.update(interactive=bool(file))
78
 
79
 
80
  def get_inference_widgets(run_inference,georefImg):
 
102
  type="numpy", label="Selected Tile",
103
  height=500, width=500
104
  )
105
+ score_th = gr.Textbox(label="Score threshold below which to annotate manually (OSM)",
106
+ info="Computes fuzzy match of the detected street names with OSM street names within 100m buffer")
107
+
108
+ # Historic dictionary of street names and matching score threshold
109
+ hist_dic = gr.File(label="Upload csv with historic street names",file_types=[".csv"])
110
+ hist_th = gr.Textbox(label="Score threshold below which to annotate manually (Directory)",
111
+ info="Computes fuzzy match of the detected street names with the historic street names",
112
+ interactive=False)
113
+ hist_dic.change(enable_textbox, inputs=hist_dic, outputs=hist_th)
114
+
115
+
116
+
117
  run_button = gr.Button("Run Inference", interactive=False)
118
  output = gr.Textbox(label="Progress", lines=5, interactive=False)
119
  download_file = gr.File(label="Download CSV",
 
134
  )
135
  run_button.click(
136
  fn=run_inference,
137
+ inputs=[selected_tile_path, gcp_input, city_name, score_th, hist_th,hist_dic],
138
  outputs=[output, download_file]
139
  )
140
 
 
145
  )
146
 
147
 
148
+ return image_input, gcp_input, city_name, user_crs, score_th, hist_th,hist_dic, run_button, output, download_file