Spaces:
Sleeping
Sleeping
| from collections import Counter | |
| import pandas as pd | |
| import numpy as np | |
| from scipy.spatial import cKDTree | |
| import requests | |
| import asyncio | |
| CATEGORY_MAPPING = { | |
| # --- Mapped to 'supermarket' --- | |
| 'supermarket': 'supermarket', | |
| 'swalayan': 'supermarket', # Indonesian for supermarket/grocery | |
| 'greengrocer': 'supermarket', | |
| 'grocery': 'supermarket', | |
| 'frozen_food': 'supermarket', | |
| 'wholesale': 'supermarket', | |
| 'rice': 'supermarket', | |
| 'water;rice': 'supermarket', | |
| 'butcher': 'supermarket', | |
| 'dairy': 'supermarket', | |
| 'beverages': 'supermarket', | |
| # --- Mapped to 'convenience_store' --- | |
| 'convenience': 'convenience_store', | |
| 'kiosk': 'convenience_store', | |
| 'variety_store': 'convenience_store', | |
| # --- Mapped to 'shopping_mall' --- | |
| 'mall': 'shopping_mall', | |
| 'taman anggrek': 'shopping_mall', # A known mall name, assuming it's a category match | |
| # --- Mapped to 'electronics_store' --- | |
| 'electronics': 'electronics_store', | |
| 'mobile_phone': 'electronics_store', | |
| 'computer': 'electronics_store', | |
| 'hifi': 'electronics_store', | |
| 'radiotechnics': 'electronics_store', | |
| 'device_charging_station': 'electronics_store', | |
| # --- Mapped to 'clothing_store' --- | |
| 'clothes': 'clothing_store', | |
| 'shoes': 'clothing_store', | |
| 'tailor': 'clothing_store', | |
| 'boutique': 'clothing_store', | |
| 'fashion_accessories': 'clothing_store', | |
| 'second_hand': 'clothing_store', | |
| 'bag': 'clothing_store', | |
| # --- Mapped to 'jewelry_store' --- | |
| 'jewelry': 'jewelry_store', | |
| 'watches': 'jewelry_store', | |
| # --- Mapped to 'bookstore' --- | |
| 'books': 'bookstore', | |
| 'copyshop': 'bookstore', # Often linked to bookstores/stationery | |
| 'stationery': 'bookstore', | |
| # --- Mapped to 'department_store' --- | |
| 'department_store': 'department_store', | |
| # --- Mapped to 'cafe' --- | |
| 'cafe': 'cafe', | |
| 'coffee': 'cafe', | |
| 'tea': 'cafe', | |
| 'fast_food;cafe': 'cafe', # Primary includes cafe | |
| 'internet_cafe': 'cafe', | |
| 'biergarten': 'cafe', # Often functions as a casual cafe/bar space | |
| # --- Mapped to 'restaurant' --- | |
| 'restaurant': 'restaurant', | |
| 'nightclub;restaurant': 'restaurant', # Primary includes restaurant | |
| 'seafood': 'restaurant', # Specific type of restaurant | |
| 'food_court': 'restaurant', # Collection of eating places | |
| 'outdoor_seating': 'restaurant', # Implies a place that serves food | |
| # --- Mapped to 'fast_food' --- | |
| 'fast_food': 'fast_food', | |
| 'snack': 'fast_food', # Closest general food item | |
| 'deli': 'fast_food', # Quick-service food counter | |
| # --- Mapped to 'bakery' --- | |
| 'bakery': 'bakery', | |
| 'confectionery': 'bakery', | |
| 'ice_cream': 'bakery', | |
| 'pastry': 'bakery', | |
| 'pastry;seafood': 'bakery', # Primary includes pastry | |
| # --- Mapped to 'clinic' --- | |
| 'clinic': 'clinic', | |
| 'midwife': 'clinic', | |
| 'bidan': 'clinic', # Indonesian for midwife | |
| 'posyandu': 'clinic', # Indonesian for community health post | |
| 'medical_supply': 'clinic', | |
| 'doctors': 'clinic', | |
| 'veterinary': 'clinic', | |
| # --- Mapped to 'dentist' --- | |
| 'dentist': 'dentist', | |
| # --- Mapped to 'hospital' --- | |
| 'hospital': 'hospital', | |
| 'nursing_home': 'hospital', | |
| 'mortuary': 'hospital', | |
| 'funeral_hall': 'hospital', | |
| 'crematorium': 'hospital', | |
| # --- Mapped to 'pharmacy' --- | |
| 'pharmacy': 'pharmacy', | |
| 'chemist': 'pharmacy', | |
| 'herbalist': 'pharmacy', | |
| # --- Mapped to 'gym' --- | |
| 'gym': 'gym', # Not in the original list, but 'fitness_centre' and 'sports_centre' are close | |
| 'sports_centre': 'gym', | |
| 'fitness_centre': 'gym', | |
| 'fitness_station': 'gym', | |
| # --- Mapped to 'yoga' --- | |
| 'yoga': 'yoga', # Not in the original list, so mapping will be None unless a similar tag exists | |
| # --- Mapped to 'school' --- | |
| 'school': 'school', | |
| 'kindergarten': 'school', | |
| 'music_school': 'school', | |
| 'driving_school': 'school', | |
| 'prep_school': 'school', | |
| 'tuition': 'school', | |
| 'language_school': 'school', | |
| 'dancing_school': 'school', | |
| # --- Mapped to 'college' --- | |
| 'college': 'college', | |
| # --- Mapped to 'university' --- | |
| 'university': 'university', | |
| # --- Mapped to 'hotel' --- | |
| 'hotel': 'hotel', | |
| 'motel': 'hotel', | |
| 'guest_house': 'hotel', | |
| 'resort': 'hotel', | |
| 'beach_resort': 'hotel', | |
| 'chalet': 'hotel', | |
| 'apartment': 'hotel', | |
| 'resort': 'hotel', | |
| # --- Mapped to 'hostel' --- | |
| 'hostel': 'hostel', | |
| # --- Mapped to 'attraction' --- | |
| 'attraction': 'attraction', | |
| 'museum': 'attraction', | |
| 'cinema': 'attraction', | |
| 'arts_centre': 'attraction', | |
| 'theme_park': 'attraction', | |
| 'amusement_arcade': 'attraction', | |
| 'aquarium': 'attraction', | |
| 'water_park': 'attraction', | |
| 'gallery': 'attraction', | |
| 'theatre': 'attraction', | |
| 'casino': 'attraction', | |
| 'zoo': 'attraction', | |
| 'planetarium': 'attraction', | |
| 'events_venue': 'attraction', | |
| # --- Mapped to 'viewpoint' --- | |
| 'viewpoint': 'viewpoint', | |
| 'outpost': 'viewpoint', | |
| 'observation_deck': 'viewpoint', # Not in list, but a close match to viewpoint | |
| # --- Mapped to 'bank' --- | |
| 'bank': 'bank', | |
| 'bureau_de_change': 'bank', | |
| # --- Mapped to 'atm' --- | |
| 'atm': 'atm', | |
| 'money_transfer': 'atm', | |
| 'payment_centre': 'atm', | |
| 'payment_terminal': 'atm', | |
| # --- Mapped to 'co_working' --- | |
| 'office': 'co_working', # General office space, often includes co-working | |
| 'studio': 'co_working', # Can be co-working | |
| # --- Mapped to 'charging_station' --- | |
| 'charging_station': 'charging_station', | |
| 'device_charging_station': 'charging_station', | |
| # --- Mapped to None (Unmatched) --- | |
| 'information': None, 'nightclub': None, 'post_office': None, 'fuel': None, | |
| 'taxi': None, 'bar': None, 'parking': None, 'pawnbroker': None, 'yes': None, | |
| 'swimming_pool': None, 'motorcycle_parking': None, 'clock': None, | |
| 'hairdresser': None, 'community_centre': None, 'police': None, | |
| 'nature_reserve': None, 'artwork': None, 'playground': None, | |
| 'car_repair': None, 'car_wash': None, 'telephone': None, 'laundry': None, | |
| 'dry_cleaning': None, 'bicycle': None, 'bicycle_rental': None, 'park': None, | |
| 'pub': None, 'parking_entrance': None, 'fountain': None, 'basket': None, | |
| 'waste_disposal': None, 'marketplace': None, 'townhall': None, | |
| 'doityourself': None, 'furniture': None, 'parking_space': None, | |
| 'grave_yard': None, 'place_of_worship': None, 'ferry_terminal': None, | |
| 'ngo': None, 'trade': None, 'florist': None, 'tyres': None, 'car': None, | |
| 'car_parts': None, 'toys': None, 'sports': None, 'kitchen': None, 'pet': None, | |
| 'travel_agency': None, 'interior_decoration': None, 'frame': None, | |
| 'outdoor': None, 'hardware': None, 'beauty': None, 'anime': None, | |
| 'motorcycle': None, 'bus_station': None, 'ticket': None, 'photo': None, | |
| 'library': None, 'fishing': None, 'waste_basket': None, 'scuba_diving': None, | |
| 'farm': None, 'social_facility': None, 'bicycle_parking': None, 'garden': None, | |
| 'shelter': None, 'bench': None, 'car_rental': None, 'gift': None, | |
| 'courthouse': None, 'toilets': None, 'pitch': None, 'drinking_water': None, | |
| 'grit_bin': None, 'massage': None, 'alcohol': None, 'optician': None, | |
| 'houseware': None, 'chocolate': None, 'ranger_station': None, | |
| 'water_point': None, 'smoking_area': None, 'wine': None, 'animal_breeding': None, | |
| 'marina': None, 'karaoke_box': None, 'vacant': None, 'vending_machine': None, | |
| 'telecommunication': None, 'post_box': None, 'wilderness_hut': None, | |
| 'cosmetics': None, 'childcare': None, 'distributor minuman dan makana': None, | |
| 'rental': None, 'bag': None, 'chair': None, 'motorcycle_repair': None, | |
| 'slipway': None, 'general': None, 'photo_booth': None, 'water': None, | |
| 'camera': None, 'gas': None, 'horse_riding': None, 'parcel_locker': None, | |
| 'training': None, 'ticket_validator': None, 'nutrition_supplements': None, | |
| 'carpet': None, 'antiques': None, 'recycling': None, 'archive': None, | |
| 'table': None, 'bicycle_repair': None, 'fixme': None, 'hot_tub': None, | |
| 'letter_box': None, 'musical_instrument': None, 'pet_grooming': None, | |
| 'flooring': None, 'art': None, 'printing': None, 'baby_goods': None, | |
| 'video': None, 'hearing_aids': None, 'e-cigarette': None, 'repair': None, | |
| 'soccer': None, 'pawn_shop': None, 'appliance': None, 'bed': None, | |
| 'sewing': None, 'party': None, 'pottery': None, 'indoor_play': None, | |
| 'video_games': None, 'boat_rental': None, 'rak besi': None, 'padel': None, | |
| 'golf_course': None, 'swimming': None, 'prison': None, 'tennis': None, | |
| 'basketball': None, 'car_pooling': None, 'posko banjir': None, 'common': None, | |
| 'multi': None, 'taman': None, 'rth': None, 'ruko': None, 'gedung': None, | |
| 'lapangan bulu tangkis dan sepak bola': None, 'rumah kantor': None, | |
| 'tempat pemakaman': None, 'tpu': None, 'camp_site': None, 'volleyball': None, | |
| 'climbing': None, 'sauna': None, 'bicycle_repair_station': None, 'no': None, | |
| 'perfumery': None, 'storage': None, 'animal_boarding': None, 'sports_hall': None, | |
| 'bicycle_wash': None, 'badminton': None, 'agrarian': None, | |
| 'nature_reserve;beach_resort': None, 'track': None, 'dog_park': None, | |
| 'leisure': None, 'motor': None, 'bandstand': None, 'picnic_site': None, | |
| 'waste_transfer_station': None, 'compressed_air': None, | |
| 'medical;cosmetics': None, 'conference_centre': None, 'security_booth': None, | |
| 'dressing_room': None, 'bbq': None | |
| } | |
| import math | |
| def bbox_from_point(lat, lon, radius_m=500): | |
| lat_delta = radius_m / 111_320 | |
| lon_delta = radius_m / (111_320 * math.cos(math.radians(lat))) | |
| south = lat - lat_delta | |
| north = lat + lat_delta | |
| west = lon - lon_delta | |
| east = lon + lon_delta | |
| return south, west, north, east | |
| def compute_features(lat,lon, radius=500): | |
| BBOX = ",".join([str(x) for x in bbox_from_point(lat,lon,radius)]) | |
| print("BBOX",BBOX) | |
| # BBOX = "-6.21,106.82,-6.15,106.85" # MONAS | |
| # BBOX = "-6.39,106.64,-6.10,106.97" # JAKARTA | |
| # 2. Define the Overpass Query | |
| # This query asks for all 'amenity=restaurant' POIs within the BBOX | |
| query = f""" | |
| [out:json][timeout:120]; | |
| // Bounding box for the query | |
| ( | |
| node["amenity"]({BBOX}); | |
| node["tourism"]({BBOX}); | |
| node["shop"]({BBOX}); | |
| way["amenity"]({BBOX}); | |
| way["tourism"]({BBOX}); | |
| way["shop"]({BBOX}); | |
| relation["amenity"]({BBOX}); | |
| relation["tourism"]({BBOX}); | |
| relation["shop"]({BBOX}); | |
| node["leisure"]({BBOX}); | |
| way["leisure"]({BBOX}); | |
| relation["leisure"]({BBOX}); | |
| node["sport"]({BBOX}); | |
| way["sport"]({BBOX}); | |
| relation["sport"]({BBOX}); | |
| ); | |
| out center; | |
| """ | |
| # Note: 'out center' tells Overpass to provide a lat/lon for ways and relations as well. | |
| overpass_url = "http://overpass-api.de/api/interpreter" | |
| response = requests.post(overpass_url, data={'data': query}) | |
| print(response.content) | |
| data = response.json() | |
| # 4. Convert to Pandas DataFrame | |
| elements = data.get('elements', []) | |
| # Extract POI attributes | |
| records = [] | |
| for el in elements: | |
| record = { | |
| 'id': el['id'], | |
| # Use 'lat'/'lon' for nodes, or 'center' for ways/relations | |
| 'lat': el.get('lat', el.get('center', {}).get('lat')), | |
| 'lon': el.get('lon', el.get('center', {}).get('lon')), | |
| # Get the primary category tag | |
| 'category': el['tags'].get('amenity', el['type']), # Default to type if amenity not present | |
| 'name': el['tags'].get('name', 'N/A'), | |
| 'tourism':el['tags'].get('tourism'), | |
| 'shop':el['tags'].get('shop'), | |
| 'leisure':el['tags'].get('leisure'), | |
| 'sport':el['tags'].get('sport') | |
| } | |
| records.append(record) | |
| # Create the DataFrame | |
| df_pois = pd.DataFrame(records) | |
| tourism_unused_mask = ( | |
| df_pois["tourism"].notna() | |
| & ~df_pois["category"].isin(["node", "relation", "way"]) | |
| ) | |
| df_pois_new = df_pois[~tourism_unused_mask] | |
| mask_geom = df_pois_new["category"].isin(["node", "way", "relation"]) | |
| df_pois_new.loc[mask_geom, "category"] = ( | |
| df_pois_new.loc[mask_geom, "tourism"] | |
| .fillna(df_pois_new.loc[mask_geom, "shop"]) | |
| ) | |
| df_pois_new["category"] = ( | |
| df_pois_new["category"] | |
| .fillna(df_pois_new["tourism"]) | |
| .fillna(df_pois_new["shop"]) | |
| .fillna(df_pois_new["leisure"]) | |
| .fillna(df_pois_new["sport"]) | |
| ) | |
| df_pois_new["category"] = df_pois_new["category"].apply(lambda x:x.lower()) | |
| df_pois_new["name"] = df_pois_new["name"].apply(lambda x:x.lower()) | |
| print("\nUnique Categories:") | |
| print(df_pois_new['category'].unique()) | |
| df_pois_new = df_pois_new[~(df_pois_new['category'].isin(['yes','fixme','no','shop','general']))] | |
| df_pois_new_cleaned = df_pois_new.copy() | |
| df_pois_new_cleaned["category"] = df_pois_new_cleaned["category"].apply(lambda x: CATEGORY_MAPPING.get(x)) | |
| df_pois_new_cleaned.dropna(subset=['category'], inplace=True) | |
| res = df_pois_new_cleaned.groupby("category")["id"].count().to_dict() | |
| res['num_banks_in_radius'] = res.pop('atm',0) | |
| return res | |
| # print(f"Total POI found in the area: {len(df_pois)}") | |
| # df_amenities = pd.read_csv("df_indonesia.csv").rename( | |
| # columns={"latitude":"lat", "longitude":"lon"} | |
| # ) | |
| # df_banks = pd.read_csv("df_bank_indonesia.csv").rename( | |
| # columns={"latitude":"lat", "longitude":"lon"} | |
| # ) | |
| # df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply( | |
| # lambda x: eval(x) | |
| # ) | |
| # bank_coords = df_banks[['lat','lon']].values | |
| # tree_banks = cKDTree(bank_coords) | |
| # amenity_coords = df_amenities[['lat','lon']].values | |
| # tree_amenities = cKDTree(amenity_coords) | |
| # DATASET_COLUMNS = [ | |
| # 'Dining and Drinking', 'Community and Government', 'Retail', | |
| # 'Business and Professional Services', 'Landmarks and Outdoors', | |
| # 'Arts and Entertainment', 'Health and Medicine', | |
| # 'Travel and Transportation', 'Sports and Recreation', | |
| # 'Event' | |
| # ] | |
| # def compute_features(candidate_point, radius=0.005): | |
| # candidates = df_amenities[['lat','lon']].copy() | |
| # candidates['id'] = range(len(candidates)) | |
| # bank_coords = df_atm[['lat','lon']].values | |
| # tree_banks = cKDTree(bank_coords) | |
| # amenity_coords = df_amenities[['lat','lon']].values | |
| # tree_amenities = cKDTree(amenity_coords) | |
| # lat, lon = candidate_point | |
| # # Banks | |
| # bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius) | |
| # print("[BANK]", bank_idxs) | |
| # n_banks = len(bank_idxs) | |
| # if n_banks > 0: | |
| # neighbors = df_banks.iloc[bank_idxs] | |
| # mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) | |
| # min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) | |
| # else: | |
| # mean_dist_banks = radius | |
| # min_dist_bank = radius | |
| # # Amenities | |
| # amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius) | |
| # amenities = df_amenities.iloc[amenity_idxs] | |
| # total_amenities = len(amenities) | |
| # # Flatten all category IDs | |
| # # for cats in amenities['fsq_category_labels']: | |
| # # all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0] | |
| # all_category_ids = amenities["category"].tolist() | |
| # category_diversity = len(set(all_category_ids)) | |
| # features = { | |
| # 'num_banks_in_radius': n_banks, | |
| # # 'mean_dist_banks': mean_dist_banks, | |
| # # 'min_dist_bank': min_dist_bank, | |
| # 'total_amenities': total_amenities, | |
| # 'category_diversity': category_diversity | |
| # } | |
| # # Count occurrences per category | |
| # print("[CATEGORIES]", all_category_ids) | |
| # count_per_category = Counter(all_category_ids) | |
| # for feat in DATASET_COLUMNS: | |
| # print("[FEAT]",feat) | |
| # # for cat, cnt in count_per_category.items(): | |
| # features[f'num_{feat}'] = count_per_category.get(feat, 0) | |
| # # # Count occurrences of first category | |
| # # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0] | |
| # # count_first_category = Counter(first_categories) | |
| # # for cat, cnt in count_first_category.items(): | |
| # # features[f'num_first_{cat}'] = cnt | |
| # return features |