Spaces:
Build error
Build error
| import re | |
| from difflib import SequenceMatcher | |
| import requests | |
| import xml.etree.ElementTree as ET | |
| import gradio as gr | |
| from concurrent.futures import ThreadPoolExecutor | |
| areaData = { | |
| "Hong Kong": { | |
| "Central and Western": [ | |
| "Sai Ying Pun", "Kennedy Town", "Shek Tong Tsui", "Sai Wan", "Sheung Wan", | |
| "Central", "Admiralty", "Mid-Levels West", "Mid-Levels", "The Peak" | |
| ], | |
| "Wan Chai": [ | |
| "Wan Chai", "Causeway Bay", "Happy Valley", "Tai Hang", "Stubbs Road", | |
| "Jardine's Lookout" | |
| ], | |
| "Eastern": [ | |
| "Tin Hau", "Braemar Hill", "North Point", "Quarry Bay", "Sai Wan Ho", | |
| "Shau Kei Wan", "Chai Wan", "Siu Sai Wan" | |
| ], | |
| "Southern": [ | |
| "Pok Fu Lam", "Aberdeen", "Ap Lei Chau", "Wong Chuk Hang", "Shouson Hill", | |
| "Repulse Bay", "Chung Hom Kok", "Stanley", "Tai Tam", "Shek O", "Telegraph Bay" | |
| ] | |
| }, | |
| "Kowloon": { | |
| "Yau Tsim Mong": [ | |
| "Tsim Sha Tsui", "Yau Ma Tei", "West Kowloon", "Kowloon Tong", "Mong Kok", | |
| "Tai Kok Tsui", "Jordan", "Prince Edward" | |
| ], | |
| "Sham Shui Po": [ | |
| "Mei Foo", "Lai Chi Kok", "Cheung Sha Wan", "Sham Shui Po", "Shek Kip Mei", "Tai Wo Ping", "Stonecutters Island" | |
| ], | |
| "Kowloon City": [ | |
| "Hung Hom", "To Kwa Wan", "Ma Tau Kok", "Ma Tau Wai", "Kai Tak", "Kowloon City", | |
| "Ho Man Tin", "Kowloon Tong", "Beacon Hill" | |
| ], | |
| "Wong Tai Sin": [ | |
| "San Po Kong", "Wong Tai Sin", "Tung Tau", "Wang Tau Hom", "Lok Fu", "Diamond Hill", | |
| "Tsz Wan Shan", "Ngau Chi Wan" | |
| ], | |
| "Kwun Tong": [ | |
| "Ping Shek", "Kowloon Bay", "Ngau Tau Kok", "Tsz Wan Shan", "Kwun Tong", | |
| "Sau Mau Ping", "Lam Tin", "Yau Tong", "Lei Yue Mun" | |
| ] | |
| }, | |
| "New Territories": { | |
| "Kwai Tsing": [ | |
| "Kwai Chung", "Tsing Yi", "Kwai Fong" | |
| ], | |
| "Tsuen Wan": [ | |
| "Tsuen Wan", "Tsing Lung Bridge", "Tsing Hung Bridge", "Shen Tsuen", "Tsing Chung Koon", | |
| "Ma Wan", "Tsing Lung Bridge" | |
| ], | |
| "Tuen Mun": [ | |
| "Tai Lam Chung", "Siu Lam", "Tuen Mun", "Lam Tei" | |
| ], | |
| "Yuen Long": [ | |
| "Hung Shui Kiu", "Ha Tsuen", "Lau Fau Shan", "Tin Shui Wai", "Yuen Long", "San Tin", | |
| "Lok Ma Chau", "Kam Tin", "Shek Kong", "Pat Heung" | |
| ], | |
| "North": [ | |
| "Fanling", "Luen Wo Hui", "Sheung Shui", "Shek Wu Hui", "Sha Tau Kok", "Lok Keng", | |
| "Wu Kau Tang" | |
| ], | |
| "Tai Po": [ | |
| "Tai Po Market", "Tai Po", "Tai Po Kau", "Tai Mei Tuk", "Plover Cove", "Cheung Uk Tau", | |
| "Tai Wo" | |
| ], | |
| "Sha Tin": [ | |
| "Tai Wai", "Sha Tin", "Fo Tan", "Ma On Shan", "Shui Chuen O", "Ma On Shan" | |
| ], | |
| "Sai Kung": [ | |
| "Clear Water Bay", "Sai Kung", "Tai Mong Tsai", "Tseung Kwan O", "Hang Hau", | |
| "Tiu Keng Leng", "Ma Yau Tong" | |
| ], | |
| "Islands": [ | |
| "Cheung Chau", "Peng Chau", "Lantau Island", "Tung Chung", "Lamma Island" | |
| ] | |
| } | |
| } | |
| def normalize_text(text): | |
| return re.sub(r'\s+', ' ', text.lower().strip()) | |
| def normalize_address(address): | |
| return re.sub(r'[^\w\s]', '', re.sub(r'\s+', ' ', address)).strip().upper() | |
| def load_and_normalize_address_pool(file_paths): | |
| address_pool = [] | |
| for file_path in file_paths: | |
| try: | |
| with open(file_path, 'r') as f: | |
| for line in f: | |
| address = line.strip() | |
| if address: | |
| normalized = normalize_address(address) | |
| address_pool.append((address, normalized)) | |
| except FileNotFoundError: | |
| print(f"File not found: {file_path}") | |
| except Exception as e: | |
| print(f"Error reading file {file_path}: {e}") | |
| return address_pool | |
| def similarity(a, b): | |
| a, b = a.replace(' ', ''), b.replace(' ', '') | |
| return SequenceMatcher(None, a, b).ratio() | |
| def extract_relevant_part(user_input): | |
| number_part = re.findall(r'\d+', user_input) | |
| number_part = number_part[0] if number_part else '' | |
| address_part = re.sub(r'^\d+', '', user_input).strip() | |
| return number_part, address_part | |
| def match_address(user_input, address_pool): | |
| number_part, address_part = extract_relevant_part(user_input) | |
| normalized_input = normalize_address(address_part) | |
| best_match = None | |
| highest_similarity = 0 | |
| for original_address, normalized_address in address_pool: | |
| sim = similarity(normalized_input, normalized_address) | |
| if sim > highest_similarity: | |
| highest_similarity = sim | |
| best_match = original_address | |
| if best_match: | |
| best_match = f"{number_part} {best_match}".strip() if number_part else best_match | |
| return best_match, highest_similarity | |
| def fetch_address_from_als_api(user_input): | |
| api_url = f"https://www.als.gov.hk/lookup?q={requests.utils.quote(user_input)}" | |
| try: | |
| response = requests.get(api_url) | |
| response.raise_for_status() | |
| tree = ET.ElementTree(ET.fromstring(response.content)) | |
| root = tree.getroot() | |
| result = {} | |
| eng_premises = root.find(".//EngPremisesAddress") | |
| if eng_premises is not None: | |
| result['English Address'] = { | |
| 'Estate': eng_premises.findtext(".//EstateName", ''), | |
| 'Street': eng_premises.findtext(".//StreetName", ''), | |
| 'Building No': eng_premises.findtext(".//BuildingNoFrom", ''), | |
| 'District': eng_premises.findtext(".//DcDistrict", ''), | |
| 'Region': eng_premises.findtext(".//Region", '') | |
| } | |
| chi_premises = root.find(".//ChiPremisesAddress") | |
| if chi_premises is not None: | |
| result['Chinese Address'] = { | |
| 'Estate': chi_premises.findtext(".//EstateName", ''), | |
| 'Street': chi_premises.findtext(".//StreetName", ''), | |
| 'Building No': chi_premises.findtext(".//BuildingNoFrom", ''), | |
| 'District': chi_premises.findtext(".//DcDistrict", ''), | |
| 'Region': chi_premises.findtext(".//Region", '') | |
| } | |
| geo_info = root.find(".//GeospatialInformation") | |
| if geo_info is not None: | |
| result['Geospatial Information'] = { | |
| 'Latitude': geo_info.findtext(".//Latitude", ''), | |
| 'Longitude': geo_info.findtext(".//Longitude", ''), | |
| 'Northing': geo_info.findtext(".//Northing", ''), | |
| 'Easting': geo_info.findtext(".//Easting", '') | |
| } | |
| return result | |
| except requests.RequestException as e: | |
| return f"Error fetching data from ALS API: {e}" | |
| def extract_building_from_address(user_input): | |
| normalized_input = normalize_text(user_input) | |
| match = re.match(r'([^,]+)', normalized_input) | |
| return match.group(1).strip() if match else normalized_input | |
| def address_search(user_inputs): | |
| results = [] | |
| user_inputs_list = user_inputs.splitlines() | |
| def process_input(user_input): | |
| building_part = extract_building_from_address(user_input) | |
| normalized_input = normalize_address(building_part) | |
| best_match, similarity_score = match_address(normalized_input, address_pool) | |
| als_result = fetch_address_from_als_api(best_match) if best_match else "No match found." | |
| result_str = f"Best match: {best_match} (Similarity: {similarity_score:.2f})\n" | |
| if isinstance(als_result, dict): | |
| for address_type, details in als_result.items(): | |
| result_str += f"\n{address_type}:\n" | |
| for key, value in details.items(): | |
| result_str += f"{key}: {value}\n" | |
| else: | |
| result_str += als_result | |
| return result_str | |
| with ThreadPoolExecutor() as executor: | |
| results = list(executor.map(process_input, user_inputs_list)) | |
| return "\n\n".join(results) | |
| def clean_area_data(area_data): | |
| cleaned_area_data = {} | |
| for region, districts in area_data.items(): | |
| cleaned_districts = {} | |
| for district, subdistricts in districts.items(): | |
| valid_subdistricts = [normalize_text(name) for name in subdistricts if | |
| not re.search(r'Non-Building|Invalid|Other', name, re.I)] | |
| cleaned_districts[normalize_text(district)] = valid_subdistricts | |
| cleaned_area_data[normalize_text(region)] = cleaned_districts | |
| return cleaned_area_data | |
| cleaned_area_data = clean_area_data(areaData) | |
| file_paths = [ | |
| 'EngBuilding.txt', | |
| 'EngEstate.txt', | |
| 'EngStreet.txt', | |
| 'EngVillage.txt' | |
| ] | |
| address_pool = load_and_normalize_address_pool(file_paths) | |
| interface = gr.Interface( | |
| fn=address_search, | |
| inputs=gr.Textbox(label="Enter Addresses (one per line, allow Batch Processing)", lines=10), | |
| outputs=gr.Textbox(label="ALS API Results"), | |
| title="Address Lookup and Matching (English)", | |
| description="Enter addresses to find the closest matches and fetch details from the ALS API." | |
| ) | |
| interface.launch() |