Tryfonas commited on
Commit
ea2d95b
·
verified ·
1 Parent(s): d901fb6

Upload location_standardization.py

Browse files
Files changed (1) hide show
  1. location_standardization.py +20 -0
location_standardization.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # location_standardization.py
2
+
3
+ from rapidfuzz import process, fuzz
4
+
5
+ def group_similar_locations(locations, threshold=85):
6
+ clustered = {}
7
+ canonical_map = {}
8
+
9
+ for loc in locations:
10
+ if not loc or not isinstance(loc, str): continue
11
+ loc = loc.strip()
12
+ match, score, _ = process.extractOne(loc, clustered.keys(), scorer=fuzz.ratio)
13
+ if score >= threshold:
14
+ canonical_map[loc] = match
15
+ clustered[match].append(loc)
16
+ else:
17
+ clustered[loc] = [loc]
18
+ canonical_map[loc] = loc
19
+
20
+ return canonical_map