Spaces:
Build error
Build error
File size: 9,149 Bytes
18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 18a8050 83be062 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
import re
from difflib import SequenceMatcher
import requests
import xml.etree.ElementTree as ET
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
areaData = {
"Hong Kong": {
"Central and Western": [
"Sai Ying Pun", "Kennedy Town", "Shek Tong Tsui", "Sai Wan", "Sheung Wan",
"Central", "Admiralty", "Mid-Levels West", "Mid-Levels", "The Peak"
],
"Wan Chai": [
"Wan Chai", "Causeway Bay", "Happy Valley", "Tai Hang", "Stubbs Road",
"Jardine's Lookout"
],
"Eastern": [
"Tin Hau", "Braemar Hill", "North Point", "Quarry Bay", "Sai Wan Ho",
"Shau Kei Wan", "Chai Wan", "Siu Sai Wan"
],
"Southern": [
"Pok Fu Lam", "Aberdeen", "Ap Lei Chau", "Wong Chuk Hang", "Shouson Hill",
"Repulse Bay", "Chung Hom Kok", "Stanley", "Tai Tam", "Shek O", "Telegraph Bay"
]
},
"Kowloon": {
"Yau Tsim Mong": [
"Tsim Sha Tsui", "Yau Ma Tei", "West Kowloon", "Kowloon Tong", "Mong Kok",
"Tai Kok Tsui", "Jordan", "Prince Edward"
],
"Sham Shui Po": [
"Mei Foo", "Lai Chi Kok", "Cheung Sha Wan", "Sham Shui Po", "Shek Kip Mei", "Tai Wo Ping", "Stonecutters Island"
],
"Kowloon City": [
"Hung Hom", "To Kwa Wan", "Ma Tau Kok", "Ma Tau Wai", "Kai Tak", "Kowloon City",
"Ho Man Tin", "Kowloon Tong", "Beacon Hill"
],
"Wong Tai Sin": [
"San Po Kong", "Wong Tai Sin", "Tung Tau", "Wang Tau Hom", "Lok Fu", "Diamond Hill",
"Tsz Wan Shan", "Ngau Chi Wan"
],
"Kwun Tong": [
"Ping Shek", "Kowloon Bay", "Ngau Tau Kok", "Tsz Wan Shan", "Kwun Tong",
"Sau Mau Ping", "Lam Tin", "Yau Tong", "Lei Yue Mun"
]
},
"New Territories": {
"Kwai Tsing": [
"Kwai Chung", "Tsing Yi", "Kwai Fong"
],
"Tsuen Wan": [
"Tsuen Wan", "Tsing Lung Bridge", "Tsing Hung Bridge", "Shen Tsuen", "Tsing Chung Koon",
"Ma Wan", "Tsing Lung Bridge"
],
"Tuen Mun": [
"Tai Lam Chung", "Siu Lam", "Tuen Mun", "Lam Tei"
],
"Yuen Long": [
"Hung Shui Kiu", "Ha Tsuen", "Lau Fau Shan", "Tin Shui Wai", "Yuen Long", "San Tin",
"Lok Ma Chau", "Kam Tin", "Shek Kong", "Pat Heung"
],
"North": [
"Fanling", "Luen Wo Hui", "Sheung Shui", "Shek Wu Hui", "Sha Tau Kok", "Lok Keng",
"Wu Kau Tang"
],
"Tai Po": [
"Tai Po Market", "Tai Po", "Tai Po Kau", "Tai Mei Tuk", "Plover Cove", "Cheung Uk Tau",
"Tai Wo"
],
"Sha Tin": [
"Tai Wai", "Sha Tin", "Fo Tan", "Ma On Shan", "Shui Chuen O", "Ma On Shan"
],
"Sai Kung": [
"Clear Water Bay", "Sai Kung", "Tai Mong Tsai", "Tseung Kwan O", "Hang Hau",
"Tiu Keng Leng", "Ma Yau Tong"
],
"Islands": [
"Cheung Chau", "Peng Chau", "Lantau Island", "Tung Chung", "Lamma Island"
]
}
}
def normalize_text(text):
return re.sub(r'\s+', ' ', text.lower().strip())
def normalize_address(address):
return re.sub(r'[^\w\s]', '', re.sub(r'\s+', ' ', address)).strip().upper()
def load_and_normalize_address_pool(file_paths):
address_pool = []
for file_path in file_paths:
try:
with open(file_path, 'r') as f:
for line in f:
address = line.strip()
if address:
normalized = normalize_address(address)
address_pool.append((address, normalized))
except FileNotFoundError:
print(f"File not found: {file_path}")
except Exception as e:
print(f"Error reading file {file_path}: {e}")
return address_pool
def similarity(a, b):
a, b = a.replace(' ', ''), b.replace(' ', '')
return SequenceMatcher(None, a, b).ratio()
def extract_relevant_part(user_input):
number_part = re.findall(r'\d+', user_input)
number_part = number_part[0] if number_part else ''
address_part = re.sub(r'^\d+', '', user_input).strip()
return number_part, address_part
def match_address(user_input, address_pool):
number_part, address_part = extract_relevant_part(user_input)
normalized_input = normalize_address(address_part)
best_match = None
highest_similarity = 0
for original_address, normalized_address in address_pool:
sim = similarity(normalized_input, normalized_address)
if sim > highest_similarity:
highest_similarity = sim
best_match = original_address
if best_match:
best_match = f"{number_part} {best_match}".strip() if number_part else best_match
return best_match, highest_similarity
def fetch_address_from_als_api(user_input):
api_url = f"https://www.als.gov.hk/lookup?q={requests.utils.quote(user_input)}"
try:
response = requests.get(api_url)
response.raise_for_status()
tree = ET.ElementTree(ET.fromstring(response.content))
root = tree.getroot()
result = {}
eng_premises = root.find(".//EngPremisesAddress")
if eng_premises is not None:
result['English Address'] = {
'Estate': eng_premises.findtext(".//EstateName", ''),
'Street': eng_premises.findtext(".//StreetName", ''),
'Building No': eng_premises.findtext(".//BuildingNoFrom", ''),
'District': eng_premises.findtext(".//DcDistrict", ''),
'Region': eng_premises.findtext(".//Region", '')
}
chi_premises = root.find(".//ChiPremisesAddress")
if chi_premises is not None:
result['Chinese Address'] = {
'Estate': chi_premises.findtext(".//EstateName", ''),
'Street': chi_premises.findtext(".//StreetName", ''),
'Building No': chi_premises.findtext(".//BuildingNoFrom", ''),
'District': chi_premises.findtext(".//DcDistrict", ''),
'Region': chi_premises.findtext(".//Region", '')
}
geo_info = root.find(".//GeospatialInformation")
if geo_info is not None:
result['Geospatial Information'] = {
'Latitude': geo_info.findtext(".//Latitude", ''),
'Longitude': geo_info.findtext(".//Longitude", ''),
'Northing': geo_info.findtext(".//Northing", ''),
'Easting': geo_info.findtext(".//Easting", '')
}
return result
except requests.RequestException as e:
return f"Error fetching data from ALS API: {e}"
def extract_building_from_address(user_input):
normalized_input = normalize_text(user_input)
match = re.match(r'([^,]+)', normalized_input)
return match.group(1).strip() if match else normalized_input
def address_search(user_inputs):
results = []
user_inputs_list = user_inputs.splitlines()
def process_input(user_input):
building_part = extract_building_from_address(user_input)
normalized_input = normalize_address(building_part)
best_match, similarity_score = match_address(normalized_input, address_pool)
als_result = fetch_address_from_als_api(best_match) if best_match else "No match found."
result_str = f"Best match: {best_match} (Similarity: {similarity_score:.2f})\n"
if isinstance(als_result, dict):
for address_type, details in als_result.items():
result_str += f"\n{address_type}:\n"
for key, value in details.items():
result_str += f"{key}: {value}\n"
else:
result_str += als_result
return result_str
with ThreadPoolExecutor() as executor:
results = list(executor.map(process_input, user_inputs_list))
return "\n\n".join(results)
def clean_area_data(area_data):
cleaned_area_data = {}
for region, districts in area_data.items():
cleaned_districts = {}
for district, subdistricts in districts.items():
valid_subdistricts = [normalize_text(name) for name in subdistricts if
not re.search(r'Non-Building|Invalid|Other', name, re.I)]
cleaned_districts[normalize_text(district)] = valid_subdistricts
cleaned_area_data[normalize_text(region)] = cleaned_districts
return cleaned_area_data
cleaned_area_data = clean_area_data(areaData)
file_paths = [
'EngBuilding.txt',
'EngEstate.txt',
'EngStreet.txt',
'EngVillage.txt'
]
address_pool = load_and_normalize_address_pool(file_paths)
interface = gr.Interface(
fn=address_search,
inputs=gr.Textbox(label="Enter Addresses (one per line, allow Batch Processing)", lines=10),
outputs=gr.Textbox(label="ALS API Results"),
title="Address Lookup and Matching (English)",
description="Enter addresses to find the closest matches and fetch details from the ALS API."
)
interface.launch() |