OttoYu commited on
Commit
83be062
·
verified ·
1 Parent(s): 18a8050

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -27
app.py CHANGED
@@ -3,6 +3,7 @@ from difflib import SequenceMatcher
3
  import requests
4
  import xml.etree.ElementTree as ET
5
  import gradio as gr
 
6
 
7
  areaData = {
8
  "Hong Kong": {
@@ -80,15 +81,14 @@ areaData = {
80
  }
81
  }
82
 
 
83
  def normalize_text(text):
84
- normalized = text.lower().strip()
85
- standardized = re.sub(r'\s+', ' ', normalized)
86
- return standardized
87
 
88
  def normalize_address(address):
89
- address = re.sub(r'[^\w\s]', '', address)
90
- address = re.sub(r'\s+', ' ', address)
91
- return address.strip().upper()
92
 
93
  def load_and_normalize_address_pool(file_paths):
94
  address_pool = []
@@ -106,15 +106,19 @@ def load_and_normalize_address_pool(file_paths):
106
  print(f"Error reading file {file_path}: {e}")
107
  return address_pool
108
 
 
109
  def similarity(a, b):
 
110
  return SequenceMatcher(None, a, b).ratio()
111
 
 
112
  def extract_relevant_part(user_input):
113
  number_part = re.findall(r'\d+', user_input)
114
  number_part = number_part[0] if number_part else ''
115
  address_part = re.sub(r'^\d+', '', user_input).strip()
116
  return number_part, address_part
117
 
 
118
  def match_address(user_input, address_pool):
119
  number_part, address_part = extract_relevant_part(user_input)
120
  normalized_input = normalize_address(address_part)
@@ -129,6 +133,7 @@ def match_address(user_input, address_pool):
129
  best_match = f"{number_part} {best_match}".strip() if number_part else best_match
130
  return best_match, highest_similarity
131
 
 
132
  def fetch_address_from_als_api(user_input):
133
  api_url = f"https://www.als.gov.hk/lookup?q={requests.utils.quote(user_input)}"
134
  try:
@@ -175,27 +180,34 @@ def fetch_address_from_als_api(user_input):
175
  def extract_building_from_address(user_input):
176
  normalized_input = normalize_text(user_input)
177
  match = re.match(r'([^,]+)', normalized_input)
178
- building_part = match.group(1).strip() if match else normalized_input
 
179
 
180
- return building_part
 
 
181
 
 
 
 
 
 
182
 
183
- def address_search(user_input):
184
- building_part = extract_building_from_address(user_input)
185
- normalized_input = normalize_address(building_part)
186
- best_match, similarity_score = match_address(normalized_input, address_pool)
187
- als_result = fetch_address_from_als_api(best_match) if best_match else "No match found."
 
 
 
188
 
189
- result_str = f"Best match: {best_match} (Similarity: {similarity_score:.2f})\n"
190
- if isinstance(als_result, dict):
191
- for address_type, details in als_result.items():
192
- result_str += f"\n{address_type}:\n"
193
- for key, value in details.items():
194
- result_str += f"{key}: {value}\n"
195
- else:
196
- result_str += als_result
197
 
198
- return result_str
 
 
 
199
 
200
 
201
  def clean_area_data(area_data):
@@ -203,11 +215,13 @@ def clean_area_data(area_data):
203
  for region, districts in area_data.items():
204
  cleaned_districts = {}
205
  for district, subdistricts in districts.items():
206
- valid_subdistricts = [normalize_text(name) for name in subdistricts if not re.search(r'Non-Building|Invalid|Other', name, re.I)]
 
207
  cleaned_districts[normalize_text(district)] = valid_subdistricts
208
  cleaned_area_data[normalize_text(region)] = cleaned_districts
209
  return cleaned_area_data
210
 
 
211
  cleaned_area_data = clean_area_data(areaData)
212
  file_paths = [
213
  'EngBuilding.txt',
@@ -219,10 +233,10 @@ address_pool = load_and_normalize_address_pool(file_paths)
219
 
220
  interface = gr.Interface(
221
  fn=address_search,
222
- inputs=gr.Textbox(label="Enter Address"),
223
- outputs=gr.Textbox(label="ALS API Result"),
224
  title="Address Lookup and Matching (English)",
225
- description="Enter an address to find the closest match and fetch details from the ALS API."
226
  )
227
 
228
- interface.launch()
 
3
  import requests
4
  import xml.etree.ElementTree as ET
5
  import gradio as gr
6
+ from concurrent.futures import ThreadPoolExecutor
7
 
8
  areaData = {
9
  "Hong Kong": {
 
81
  }
82
  }
83
 
84
+
85
  def normalize_text(text):
86
+ return re.sub(r'\s+', ' ', text.lower().strip())
87
+
 
88
 
89
  def normalize_address(address):
90
+ return re.sub(r'[^\w\s]', '', re.sub(r'\s+', ' ', address)).strip().upper()
91
+
 
92
 
93
  def load_and_normalize_address_pool(file_paths):
94
  address_pool = []
 
106
  print(f"Error reading file {file_path}: {e}")
107
  return address_pool
108
 
109
+
110
  def similarity(a, b):
111
+ a, b = a.replace(' ', ''), b.replace(' ', '')
112
  return SequenceMatcher(None, a, b).ratio()
113
 
114
+
115
  def extract_relevant_part(user_input):
116
  number_part = re.findall(r'\d+', user_input)
117
  number_part = number_part[0] if number_part else ''
118
  address_part = re.sub(r'^\d+', '', user_input).strip()
119
  return number_part, address_part
120
 
121
+
122
  def match_address(user_input, address_pool):
123
  number_part, address_part = extract_relevant_part(user_input)
124
  normalized_input = normalize_address(address_part)
 
133
  best_match = f"{number_part} {best_match}".strip() if number_part else best_match
134
  return best_match, highest_similarity
135
 
136
+
137
  def fetch_address_from_als_api(user_input):
138
  api_url = f"https://www.als.gov.hk/lookup?q={requests.utils.quote(user_input)}"
139
  try:
 
180
  def extract_building_from_address(user_input):
181
  normalized_input = normalize_text(user_input)
182
  match = re.match(r'([^,]+)', normalized_input)
183
+ return match.group(1).strip() if match else normalized_input
184
+
185
 
186
+ def address_search(user_inputs):
187
+ results = []
188
+ user_inputs_list = user_inputs.splitlines()
189
 
190
+ def process_input(user_input):
191
+ building_part = extract_building_from_address(user_input)
192
+ normalized_input = normalize_address(building_part)
193
+ best_match, similarity_score = match_address(normalized_input, address_pool)
194
+ als_result = fetch_address_from_als_api(best_match) if best_match else "No match found."
195
 
196
+ result_str = f"Best match: {best_match} (Similarity: {similarity_score:.2f})\n"
197
+ if isinstance(als_result, dict):
198
+ for address_type, details in als_result.items():
199
+ result_str += f"\n{address_type}:\n"
200
+ for key, value in details.items():
201
+ result_str += f"{key}: {value}\n"
202
+ else:
203
+ result_str += als_result
204
 
205
+ return result_str
 
 
 
 
 
 
 
206
 
207
+ with ThreadPoolExecutor() as executor:
208
+ results = list(executor.map(process_input, user_inputs_list))
209
+
210
+ return "\n\n".join(results)
211
 
212
 
213
  def clean_area_data(area_data):
 
215
  for region, districts in area_data.items():
216
  cleaned_districts = {}
217
  for district, subdistricts in districts.items():
218
+ valid_subdistricts = [normalize_text(name) for name in subdistricts if
219
+ not re.search(r'Non-Building|Invalid|Other', name, re.I)]
220
  cleaned_districts[normalize_text(district)] = valid_subdistricts
221
  cleaned_area_data[normalize_text(region)] = cleaned_districts
222
  return cleaned_area_data
223
 
224
+
225
  cleaned_area_data = clean_area_data(areaData)
226
  file_paths = [
227
  'EngBuilding.txt',
 
233
 
234
  interface = gr.Interface(
235
  fn=address_search,
236
+ inputs=gr.Textbox(label="Enter Addresses (one per line, allow Batch Processing)", lines=10),
237
+ outputs=gr.Textbox(label="ALS API Results"),
238
  title="Address Lookup and Matching (English)",
239
+ description="Enter addresses to find the closest matches and fetch details from the ALS API."
240
  )
241
 
242
+ interface.launch()