from collections import Counter import pandas as pd import numpy as np from scipy.spatial import cKDTree import requests import asyncio CATEGORY_MAPPING = { # --- Mapped to 'supermarket' --- 'supermarket': 'supermarket', 'swalayan': 'supermarket', # Indonesian for supermarket/grocery 'greengrocer': 'supermarket', 'grocery': 'supermarket', 'frozen_food': 'supermarket', 'wholesale': 'supermarket', 'rice': 'supermarket', 'water;rice': 'supermarket', 'butcher': 'supermarket', 'dairy': 'supermarket', 'beverages': 'supermarket', # --- Mapped to 'convenience_store' --- 'convenience': 'convenience_store', 'kiosk': 'convenience_store', 'variety_store': 'convenience_store', # --- Mapped to 'shopping_mall' --- 'mall': 'shopping_mall', 'taman anggrek': 'shopping_mall', # A known mall name, assuming it's a category match # --- Mapped to 'electronics_store' --- 'electronics': 'electronics_store', 'mobile_phone': 'electronics_store', 'computer': 'electronics_store', 'hifi': 'electronics_store', 'radiotechnics': 'electronics_store', 'device_charging_station': 'electronics_store', # --- Mapped to 'clothing_store' --- 'clothes': 'clothing_store', 'shoes': 'clothing_store', 'tailor': 'clothing_store', 'boutique': 'clothing_store', 'fashion_accessories': 'clothing_store', 'second_hand': 'clothing_store', 'bag': 'clothing_store', # --- Mapped to 'jewelry_store' --- 'jewelry': 'jewelry_store', 'watches': 'jewelry_store', # --- Mapped to 'bookstore' --- 'books': 'bookstore', 'copyshop': 'bookstore', # Often linked to bookstores/stationery 'stationery': 'bookstore', # --- Mapped to 'department_store' --- 'department_store': 'department_store', # --- Mapped to 'cafe' --- 'cafe': 'cafe', 'coffee': 'cafe', 'tea': 'cafe', 'fast_food;cafe': 'cafe', # Primary includes cafe 'internet_cafe': 'cafe', 'biergarten': 'cafe', # Often functions as a casual cafe/bar space # --- Mapped to 'restaurant' --- 'restaurant': 'restaurant', 'nightclub;restaurant': 'restaurant', # Primary includes restaurant 'seafood': 'restaurant', # Specific type of restaurant 'food_court': 'restaurant', # Collection of eating places 'outdoor_seating': 'restaurant', # Implies a place that serves food # --- Mapped to 'fast_food' --- 'fast_food': 'fast_food', 'snack': 'fast_food', # Closest general food item 'deli': 'fast_food', # Quick-service food counter # --- Mapped to 'bakery' --- 'bakery': 'bakery', 'confectionery': 'bakery', 'ice_cream': 'bakery', 'pastry': 'bakery', 'pastry;seafood': 'bakery', # Primary includes pastry # --- Mapped to 'clinic' --- 'clinic': 'clinic', 'midwife': 'clinic', 'bidan': 'clinic', # Indonesian for midwife 'posyandu': 'clinic', # Indonesian for community health post 'medical_supply': 'clinic', 'doctors': 'clinic', 'veterinary': 'clinic', # --- Mapped to 'dentist' --- 'dentist': 'dentist', # --- Mapped to 'hospital' --- 'hospital': 'hospital', 'nursing_home': 'hospital', 'mortuary': 'hospital', 'funeral_hall': 'hospital', 'crematorium': 'hospital', # --- Mapped to 'pharmacy' --- 'pharmacy': 'pharmacy', 'chemist': 'pharmacy', 'herbalist': 'pharmacy', # --- Mapped to 'gym' --- 'gym': 'gym', # Not in the original list, but 'fitness_centre' and 'sports_centre' are close 'sports_centre': 'gym', 'fitness_centre': 'gym', 'fitness_station': 'gym', # --- Mapped to 'yoga' --- 'yoga': 'yoga', # Not in the original list, so mapping will be None unless a similar tag exists # --- Mapped to 'school' --- 'school': 'school', 'kindergarten': 'school', 'music_school': 'school', 'driving_school': 'school', 'prep_school': 'school', 'tuition': 'school', 'language_school': 'school', 'dancing_school': 'school', # --- Mapped to 'college' --- 'college': 'college', # --- Mapped to 'university' --- 'university': 'university', # --- Mapped to 'hotel' --- 'hotel': 'hotel', 'motel': 'hotel', 'guest_house': 'hotel', 'resort': 'hotel', 'beach_resort': 'hotel', 'chalet': 'hotel', 'apartment': 'hotel', 'resort': 'hotel', # --- Mapped to 'hostel' --- 'hostel': 'hostel', # --- Mapped to 'attraction' --- 'attraction': 'attraction', 'museum': 'attraction', 'cinema': 'attraction', 'arts_centre': 'attraction', 'theme_park': 'attraction', 'amusement_arcade': 'attraction', 'aquarium': 'attraction', 'water_park': 'attraction', 'gallery': 'attraction', 'theatre': 'attraction', 'casino': 'attraction', 'zoo': 'attraction', 'planetarium': 'attraction', 'events_venue': 'attraction', # --- Mapped to 'viewpoint' --- 'viewpoint': 'viewpoint', 'outpost': 'viewpoint', 'observation_deck': 'viewpoint', # Not in list, but a close match to viewpoint # --- Mapped to 'bank' --- 'bank': 'bank', 'bureau_de_change': 'bank', # --- Mapped to 'atm' --- 'atm': 'atm', 'money_transfer': 'atm', 'payment_centre': 'atm', 'payment_terminal': 'atm', # --- Mapped to 'co_working' --- 'office': 'co_working', # General office space, often includes co-working 'studio': 'co_working', # Can be co-working # --- Mapped to 'charging_station' --- 'charging_station': 'charging_station', 'device_charging_station': 'charging_station', # --- Mapped to None (Unmatched) --- 'information': None, 'nightclub': None, 'post_office': None, 'fuel': None, 'taxi': None, 'bar': None, 'parking': None, 'pawnbroker': None, 'yes': None, 'swimming_pool': None, 'motorcycle_parking': None, 'clock': None, 'hairdresser': None, 'community_centre': None, 'police': None, 'nature_reserve': None, 'artwork': None, 'playground': None, 'car_repair': None, 'car_wash': None, 'telephone': None, 'laundry': None, 'dry_cleaning': None, 'bicycle': None, 'bicycle_rental': None, 'park': None, 'pub': None, 'parking_entrance': None, 'fountain': None, 'basket': None, 'waste_disposal': None, 'marketplace': None, 'townhall': None, 'doityourself': None, 'furniture': None, 'parking_space': None, 'grave_yard': None, 'place_of_worship': None, 'ferry_terminal': None, 'ngo': None, 'trade': None, 'florist': None, 'tyres': None, 'car': None, 'car_parts': None, 'toys': None, 'sports': None, 'kitchen': None, 'pet': None, 'travel_agency': None, 'interior_decoration': None, 'frame': None, 'outdoor': None, 'hardware': None, 'beauty': None, 'anime': None, 'motorcycle': None, 'bus_station': None, 'ticket': None, 'photo': None, 'library': None, 'fishing': None, 'waste_basket': None, 'scuba_diving': None, 'farm': None, 'social_facility': None, 'bicycle_parking': None, 'garden': None, 'shelter': None, 'bench': None, 'car_rental': None, 'gift': None, 'courthouse': None, 'toilets': None, 'pitch': None, 'drinking_water': None, 'grit_bin': None, 'massage': None, 'alcohol': None, 'optician': None, 'houseware': None, 'chocolate': None, 'ranger_station': None, 'water_point': None, 'smoking_area': None, 'wine': None, 'animal_breeding': None, 'marina': None, 'karaoke_box': None, 'vacant': None, 'vending_machine': None, 'telecommunication': None, 'post_box': None, 'wilderness_hut': None, 'cosmetics': None, 'childcare': None, 'distributor minuman dan makana': None, 'rental': None, 'bag': None, 'chair': None, 'motorcycle_repair': None, 'slipway': None, 'general': None, 'photo_booth': None, 'water': None, 'camera': None, 'gas': None, 'horse_riding': None, 'parcel_locker': None, 'training': None, 'ticket_validator': None, 'nutrition_supplements': None, 'carpet': None, 'antiques': None, 'recycling': None, 'archive': None, 'table': None, 'bicycle_repair': None, 'fixme': None, 'hot_tub': None, 'letter_box': None, 'musical_instrument': None, 'pet_grooming': None, 'flooring': None, 'art': None, 'printing': None, 'baby_goods': None, 'video': None, 'hearing_aids': None, 'e-cigarette': None, 'repair': None, 'soccer': None, 'pawn_shop': None, 'appliance': None, 'bed': None, 'sewing': None, 'party': None, 'pottery': None, 'indoor_play': None, 'video_games': None, 'boat_rental': None, 'rak besi': None, 'padel': None, 'golf_course': None, 'swimming': None, 'prison': None, 'tennis': None, 'basketball': None, 'car_pooling': None, 'posko banjir': None, 'common': None, 'multi': None, 'taman': None, 'rth': None, 'ruko': None, 'gedung': None, 'lapangan bulu tangkis dan sepak bola': None, 'rumah kantor': None, 'tempat pemakaman': None, 'tpu': None, 'camp_site': None, 'volleyball': None, 'climbing': None, 'sauna': None, 'bicycle_repair_station': None, 'no': None, 'perfumery': None, 'storage': None, 'animal_boarding': None, 'sports_hall': None, 'bicycle_wash': None, 'badminton': None, 'agrarian': None, 'nature_reserve;beach_resort': None, 'track': None, 'dog_park': None, 'leisure': None, 'motor': None, 'bandstand': None, 'picnic_site': None, 'waste_transfer_station': None, 'compressed_air': None, 'medical;cosmetics': None, 'conference_centre': None, 'security_booth': None, 'dressing_room': None, 'bbq': None } import math def bbox_from_point(lat, lon, radius_m=500): lat_delta = radius_m / 111_320 lon_delta = radius_m / (111_320 * math.cos(math.radians(lat))) south = lat - lat_delta north = lat + lat_delta west = lon - lon_delta east = lon + lon_delta return south, west, north, east def compute_features(lat,lon, radius=500): BBOX = ",".join([str(x) for x in bbox_from_point(lat,lon,radius)]) print("BBOX",BBOX) # BBOX = "-6.21,106.82,-6.15,106.85" # MONAS # BBOX = "-6.39,106.64,-6.10,106.97" # JAKARTA # 2. Define the Overpass Query # This query asks for all 'amenity=restaurant' POIs within the BBOX query = f""" [out:json][timeout:120]; // Bounding box for the query ( node["amenity"]({BBOX}); node["tourism"]({BBOX}); node["shop"]({BBOX}); way["amenity"]({BBOX}); way["tourism"]({BBOX}); way["shop"]({BBOX}); relation["amenity"]({BBOX}); relation["tourism"]({BBOX}); relation["shop"]({BBOX}); node["leisure"]({BBOX}); way["leisure"]({BBOX}); relation["leisure"]({BBOX}); node["sport"]({BBOX}); way["sport"]({BBOX}); relation["sport"]({BBOX}); ); out center; """ # Note: 'out center' tells Overpass to provide a lat/lon for ways and relations as well. overpass_url = "http://overpass-api.de/api/interpreter" response = requests.post(overpass_url, data={'data': query}) print(response.content) data = response.json() # 4. Convert to Pandas DataFrame elements = data.get('elements', []) # Extract POI attributes records = [] for el in elements: record = { 'id': el['id'], # Use 'lat'/'lon' for nodes, or 'center' for ways/relations 'lat': el.get('lat', el.get('center', {}).get('lat')), 'lon': el.get('lon', el.get('center', {}).get('lon')), # Get the primary category tag 'category': el['tags'].get('amenity', el['type']), # Default to type if amenity not present 'name': el['tags'].get('name', 'N/A'), 'tourism':el['tags'].get('tourism'), 'shop':el['tags'].get('shop'), 'leisure':el['tags'].get('leisure'), 'sport':el['tags'].get('sport') } records.append(record) # Create the DataFrame df_pois = pd.DataFrame(records) tourism_unused_mask = ( df_pois["tourism"].notna() & ~df_pois["category"].isin(["node", "relation", "way"]) ) df_pois_new = df_pois[~tourism_unused_mask] mask_geom = df_pois_new["category"].isin(["node", "way", "relation"]) df_pois_new.loc[mask_geom, "category"] = ( df_pois_new.loc[mask_geom, "tourism"] .fillna(df_pois_new.loc[mask_geom, "shop"]) ) df_pois_new["category"] = ( df_pois_new["category"] .fillna(df_pois_new["tourism"]) .fillna(df_pois_new["shop"]) .fillna(df_pois_new["leisure"]) .fillna(df_pois_new["sport"]) ) df_pois_new["category"] = df_pois_new["category"].apply(lambda x:x.lower()) df_pois_new["name"] = df_pois_new["name"].apply(lambda x:x.lower()) print("\nUnique Categories:") print(df_pois_new['category'].unique()) df_pois_new = df_pois_new[~(df_pois_new['category'].isin(['yes','fixme','no','shop','general']))] df_pois_new_cleaned = df_pois_new.copy() df_pois_new_cleaned["category"] = df_pois_new_cleaned["category"].apply(lambda x: CATEGORY_MAPPING.get(x)) df_pois_new_cleaned.dropna(subset=['category'], inplace=True) res = df_pois_new_cleaned.groupby("category")["id"].count().to_dict() res['num_banks_in_radius'] = res.pop('atm',0) return res # print(f"Total POI found in the area: {len(df_pois)}") # df_amenities = pd.read_csv("df_indonesia.csv").rename( # columns={"latitude":"lat", "longitude":"lon"} # ) # df_banks = pd.read_csv("df_bank_indonesia.csv").rename( # columns={"latitude":"lat", "longitude":"lon"} # ) # df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply( # lambda x: eval(x) # ) # bank_coords = df_banks[['lat','lon']].values # tree_banks = cKDTree(bank_coords) # amenity_coords = df_amenities[['lat','lon']].values # tree_amenities = cKDTree(amenity_coords) # DATASET_COLUMNS = [ # 'Dining and Drinking', 'Community and Government', 'Retail', # 'Business and Professional Services', 'Landmarks and Outdoors', # 'Arts and Entertainment', 'Health and Medicine', # 'Travel and Transportation', 'Sports and Recreation', # 'Event' # ] # def compute_features(candidate_point, radius=0.005): # candidates = df_amenities[['lat','lon']].copy() # candidates['id'] = range(len(candidates)) # bank_coords = df_atm[['lat','lon']].values # tree_banks = cKDTree(bank_coords) # amenity_coords = df_amenities[['lat','lon']].values # tree_amenities = cKDTree(amenity_coords) # lat, lon = candidate_point # # Banks # bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius) # print("[BANK]", bank_idxs) # n_banks = len(bank_idxs) # if n_banks > 0: # neighbors = df_banks.iloc[bank_idxs] # mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) # min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) # else: # mean_dist_banks = radius # min_dist_bank = radius # # Amenities # amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius) # amenities = df_amenities.iloc[amenity_idxs] # total_amenities = len(amenities) # # Flatten all category IDs # # for cats in amenities['fsq_category_labels']: # # all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0] # all_category_ids = amenities["category"].tolist() # category_diversity = len(set(all_category_ids)) # features = { # 'num_banks_in_radius': n_banks, # # 'mean_dist_banks': mean_dist_banks, # # 'min_dist_bank': min_dist_bank, # 'total_amenities': total_amenities, # 'category_diversity': category_diversity # } # # Count occurrences per category # print("[CATEGORIES]", all_category_ids) # count_per_category = Counter(all_category_ids) # for feat in DATASET_COLUMNS: # print("[FEAT]",feat) # # for cat, cnt in count_per_category.items(): # features[f'num_{feat}'] = count_per_category.get(feat, 0) # # # Count occurrences of first category # # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0] # # count_first_category = Counter(first_categories) # # for cat, cnt in count_first_category.items(): # # features[f'num_first_{cat}'] = cnt # return features