demo-atm-location / utils.py
jonathanjordan21's picture
Update utils.py
d4a2e37 verified
from collections import Counter
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
import requests
import asyncio
CATEGORY_MAPPING = {
# --- Mapped to 'supermarket' ---
'supermarket': 'supermarket',
'swalayan': 'supermarket', # Indonesian for supermarket/grocery
'greengrocer': 'supermarket',
'grocery': 'supermarket',
'frozen_food': 'supermarket',
'wholesale': 'supermarket',
'rice': 'supermarket',
'water;rice': 'supermarket',
'butcher': 'supermarket',
'dairy': 'supermarket',
'beverages': 'supermarket',
# --- Mapped to 'convenience_store' ---
'convenience': 'convenience_store',
'kiosk': 'convenience_store',
'variety_store': 'convenience_store',
# --- Mapped to 'shopping_mall' ---
'mall': 'shopping_mall',
'taman anggrek': 'shopping_mall', # A known mall name, assuming it's a category match
# --- Mapped to 'electronics_store' ---
'electronics': 'electronics_store',
'mobile_phone': 'electronics_store',
'computer': 'electronics_store',
'hifi': 'electronics_store',
'radiotechnics': 'electronics_store',
'device_charging_station': 'electronics_store',
# --- Mapped to 'clothing_store' ---
'clothes': 'clothing_store',
'shoes': 'clothing_store',
'tailor': 'clothing_store',
'boutique': 'clothing_store',
'fashion_accessories': 'clothing_store',
'second_hand': 'clothing_store',
'bag': 'clothing_store',
# --- Mapped to 'jewelry_store' ---
'jewelry': 'jewelry_store',
'watches': 'jewelry_store',
# --- Mapped to 'bookstore' ---
'books': 'bookstore',
'copyshop': 'bookstore', # Often linked to bookstores/stationery
'stationery': 'bookstore',
# --- Mapped to 'department_store' ---
'department_store': 'department_store',
# --- Mapped to 'cafe' ---
'cafe': 'cafe',
'coffee': 'cafe',
'tea': 'cafe',
'fast_food;cafe': 'cafe', # Primary includes cafe
'internet_cafe': 'cafe',
'biergarten': 'cafe', # Often functions as a casual cafe/bar space
# --- Mapped to 'restaurant' ---
'restaurant': 'restaurant',
'nightclub;restaurant': 'restaurant', # Primary includes restaurant
'seafood': 'restaurant', # Specific type of restaurant
'food_court': 'restaurant', # Collection of eating places
'outdoor_seating': 'restaurant', # Implies a place that serves food
# --- Mapped to 'fast_food' ---
'fast_food': 'fast_food',
'snack': 'fast_food', # Closest general food item
'deli': 'fast_food', # Quick-service food counter
# --- Mapped to 'bakery' ---
'bakery': 'bakery',
'confectionery': 'bakery',
'ice_cream': 'bakery',
'pastry': 'bakery',
'pastry;seafood': 'bakery', # Primary includes pastry
# --- Mapped to 'clinic' ---
'clinic': 'clinic',
'midwife': 'clinic',
'bidan': 'clinic', # Indonesian for midwife
'posyandu': 'clinic', # Indonesian for community health post
'medical_supply': 'clinic',
'doctors': 'clinic',
'veterinary': 'clinic',
# --- Mapped to 'dentist' ---
'dentist': 'dentist',
# --- Mapped to 'hospital' ---
'hospital': 'hospital',
'nursing_home': 'hospital',
'mortuary': 'hospital',
'funeral_hall': 'hospital',
'crematorium': 'hospital',
# --- Mapped to 'pharmacy' ---
'pharmacy': 'pharmacy',
'chemist': 'pharmacy',
'herbalist': 'pharmacy',
# --- Mapped to 'gym' ---
'gym': 'gym', # Not in the original list, but 'fitness_centre' and 'sports_centre' are close
'sports_centre': 'gym',
'fitness_centre': 'gym',
'fitness_station': 'gym',
# --- Mapped to 'yoga' ---
'yoga': 'yoga', # Not in the original list, so mapping will be None unless a similar tag exists
# --- Mapped to 'school' ---
'school': 'school',
'kindergarten': 'school',
'music_school': 'school',
'driving_school': 'school',
'prep_school': 'school',
'tuition': 'school',
'language_school': 'school',
'dancing_school': 'school',
# --- Mapped to 'college' ---
'college': 'college',
# --- Mapped to 'university' ---
'university': 'university',
# --- Mapped to 'hotel' ---
'hotel': 'hotel',
'motel': 'hotel',
'guest_house': 'hotel',
'resort': 'hotel',
'beach_resort': 'hotel',
'chalet': 'hotel',
'apartment': 'hotel',
'resort': 'hotel',
# --- Mapped to 'hostel' ---
'hostel': 'hostel',
# --- Mapped to 'attraction' ---
'attraction': 'attraction',
'museum': 'attraction',
'cinema': 'attraction',
'arts_centre': 'attraction',
'theme_park': 'attraction',
'amusement_arcade': 'attraction',
'aquarium': 'attraction',
'water_park': 'attraction',
'gallery': 'attraction',
'theatre': 'attraction',
'casino': 'attraction',
'zoo': 'attraction',
'planetarium': 'attraction',
'events_venue': 'attraction',
# --- Mapped to 'viewpoint' ---
'viewpoint': 'viewpoint',
'outpost': 'viewpoint',
'observation_deck': 'viewpoint', # Not in list, but a close match to viewpoint
# --- Mapped to 'bank' ---
'bank': 'bank',
'bureau_de_change': 'bank',
# --- Mapped to 'atm' ---
'atm': 'atm',
'money_transfer': 'atm',
'payment_centre': 'atm',
'payment_terminal': 'atm',
# --- Mapped to 'co_working' ---
'office': 'co_working', # General office space, often includes co-working
'studio': 'co_working', # Can be co-working
# --- Mapped to 'charging_station' ---
'charging_station': 'charging_station',
'device_charging_station': 'charging_station',
# --- Mapped to None (Unmatched) ---
'information': None, 'nightclub': None, 'post_office': None, 'fuel': None,
'taxi': None, 'bar': None, 'parking': None, 'pawnbroker': None, 'yes': None,
'swimming_pool': None, 'motorcycle_parking': None, 'clock': None,
'hairdresser': None, 'community_centre': None, 'police': None,
'nature_reserve': None, 'artwork': None, 'playground': None,
'car_repair': None, 'car_wash': None, 'telephone': None, 'laundry': None,
'dry_cleaning': None, 'bicycle': None, 'bicycle_rental': None, 'park': None,
'pub': None, 'parking_entrance': None, 'fountain': None, 'basket': None,
'waste_disposal': None, 'marketplace': None, 'townhall': None,
'doityourself': None, 'furniture': None, 'parking_space': None,
'grave_yard': None, 'place_of_worship': None, 'ferry_terminal': None,
'ngo': None, 'trade': None, 'florist': None, 'tyres': None, 'car': None,
'car_parts': None, 'toys': None, 'sports': None, 'kitchen': None, 'pet': None,
'travel_agency': None, 'interior_decoration': None, 'frame': None,
'outdoor': None, 'hardware': None, 'beauty': None, 'anime': None,
'motorcycle': None, 'bus_station': None, 'ticket': None, 'photo': None,
'library': None, 'fishing': None, 'waste_basket': None, 'scuba_diving': None,
'farm': None, 'social_facility': None, 'bicycle_parking': None, 'garden': None,
'shelter': None, 'bench': None, 'car_rental': None, 'gift': None,
'courthouse': None, 'toilets': None, 'pitch': None, 'drinking_water': None,
'grit_bin': None, 'massage': None, 'alcohol': None, 'optician': None,
'houseware': None, 'chocolate': None, 'ranger_station': None,
'water_point': None, 'smoking_area': None, 'wine': None, 'animal_breeding': None,
'marina': None, 'karaoke_box': None, 'vacant': None, 'vending_machine': None,
'telecommunication': None, 'post_box': None, 'wilderness_hut': None,
'cosmetics': None, 'childcare': None, 'distributor minuman dan makana': None,
'rental': None, 'bag': None, 'chair': None, 'motorcycle_repair': None,
'slipway': None, 'general': None, 'photo_booth': None, 'water': None,
'camera': None, 'gas': None, 'horse_riding': None, 'parcel_locker': None,
'training': None, 'ticket_validator': None, 'nutrition_supplements': None,
'carpet': None, 'antiques': None, 'recycling': None, 'archive': None,
'table': None, 'bicycle_repair': None, 'fixme': None, 'hot_tub': None,
'letter_box': None, 'musical_instrument': None, 'pet_grooming': None,
'flooring': None, 'art': None, 'printing': None, 'baby_goods': None,
'video': None, 'hearing_aids': None, 'e-cigarette': None, 'repair': None,
'soccer': None, 'pawn_shop': None, 'appliance': None, 'bed': None,
'sewing': None, 'party': None, 'pottery': None, 'indoor_play': None,
'video_games': None, 'boat_rental': None, 'rak besi': None, 'padel': None,
'golf_course': None, 'swimming': None, 'prison': None, 'tennis': None,
'basketball': None, 'car_pooling': None, 'posko banjir': None, 'common': None,
'multi': None, 'taman': None, 'rth': None, 'ruko': None, 'gedung': None,
'lapangan bulu tangkis dan sepak bola': None, 'rumah kantor': None,
'tempat pemakaman': None, 'tpu': None, 'camp_site': None, 'volleyball': None,
'climbing': None, 'sauna': None, 'bicycle_repair_station': None, 'no': None,
'perfumery': None, 'storage': None, 'animal_boarding': None, 'sports_hall': None,
'bicycle_wash': None, 'badminton': None, 'agrarian': None,
'nature_reserve;beach_resort': None, 'track': None, 'dog_park': None,
'leisure': None, 'motor': None, 'bandstand': None, 'picnic_site': None,
'waste_transfer_station': None, 'compressed_air': None,
'medical;cosmetics': None, 'conference_centre': None, 'security_booth': None,
'dressing_room': None, 'bbq': None
}
import math
def bbox_from_point(lat, lon, radius_m=500):
lat_delta = radius_m / 111_320
lon_delta = radius_m / (111_320 * math.cos(math.radians(lat)))
south = lat - lat_delta
north = lat + lat_delta
west = lon - lon_delta
east = lon + lon_delta
return south, west, north, east
def compute_features(lat,lon, radius=500):
BBOX = ",".join([str(x) for x in bbox_from_point(lat,lon,radius)])
print("BBOX",BBOX)
# BBOX = "-6.21,106.82,-6.15,106.85" # MONAS
# BBOX = "-6.39,106.64,-6.10,106.97" # JAKARTA
# 2. Define the Overpass Query
# This query asks for all 'amenity=restaurant' POIs within the BBOX
query = f"""
[out:json][timeout:120];
// Bounding box for the query
(
node["amenity"]({BBOX});
node["tourism"]({BBOX});
node["shop"]({BBOX});
way["amenity"]({BBOX});
way["tourism"]({BBOX});
way["shop"]({BBOX});
relation["amenity"]({BBOX});
relation["tourism"]({BBOX});
relation["shop"]({BBOX});
node["leisure"]({BBOX});
way["leisure"]({BBOX});
relation["leisure"]({BBOX});
node["sport"]({BBOX});
way["sport"]({BBOX});
relation["sport"]({BBOX});
);
out center;
"""
# Note: 'out center' tells Overpass to provide a lat/lon for ways and relations as well.
overpass_url = "http://overpass-api.de/api/interpreter"
response = requests.post(overpass_url, data={'data': query})
print(response.content)
data = response.json()
# 4. Convert to Pandas DataFrame
elements = data.get('elements', [])
# Extract POI attributes
records = []
for el in elements:
record = {
'id': el['id'],
# Use 'lat'/'lon' for nodes, or 'center' for ways/relations
'lat': el.get('lat', el.get('center', {}).get('lat')),
'lon': el.get('lon', el.get('center', {}).get('lon')),
# Get the primary category tag
'category': el['tags'].get('amenity', el['type']), # Default to type if amenity not present
'name': el['tags'].get('name', 'N/A'),
'tourism':el['tags'].get('tourism'),
'shop':el['tags'].get('shop'),
'leisure':el['tags'].get('leisure'),
'sport':el['tags'].get('sport')
}
records.append(record)
# Create the DataFrame
df_pois = pd.DataFrame(records)
tourism_unused_mask = (
df_pois["tourism"].notna()
& ~df_pois["category"].isin(["node", "relation", "way"])
)
df_pois_new = df_pois[~tourism_unused_mask]
mask_geom = df_pois_new["category"].isin(["node", "way", "relation"])
df_pois_new.loc[mask_geom, "category"] = (
df_pois_new.loc[mask_geom, "tourism"]
.fillna(df_pois_new.loc[mask_geom, "shop"])
)
df_pois_new["category"] = (
df_pois_new["category"]
.fillna(df_pois_new["tourism"])
.fillna(df_pois_new["shop"])
.fillna(df_pois_new["leisure"])
.fillna(df_pois_new["sport"])
)
df_pois_new["category"] = df_pois_new["category"].apply(lambda x:x.lower())
df_pois_new["name"] = df_pois_new["name"].apply(lambda x:x.lower())
print("\nUnique Categories:")
print(df_pois_new['category'].unique())
df_pois_new = df_pois_new[~(df_pois_new['category'].isin(['yes','fixme','no','shop','general']))]
df_pois_new_cleaned = df_pois_new.copy()
df_pois_new_cleaned["category"] = df_pois_new_cleaned["category"].apply(lambda x: CATEGORY_MAPPING.get(x))
df_pois_new_cleaned.dropna(subset=['category'], inplace=True)
res = df_pois_new_cleaned.groupby("category")["id"].count().to_dict()
res['num_banks_in_radius'] = res.pop('atm',0)
return res
# print(f"Total POI found in the area: {len(df_pois)}")
# df_amenities = pd.read_csv("df_indonesia.csv").rename(
# columns={"latitude":"lat", "longitude":"lon"}
# )
# df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
# columns={"latitude":"lat", "longitude":"lon"}
# )
# df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
# lambda x: eval(x)
# )
# bank_coords = df_banks[['lat','lon']].values
# tree_banks = cKDTree(bank_coords)
# amenity_coords = df_amenities[['lat','lon']].values
# tree_amenities = cKDTree(amenity_coords)
# DATASET_COLUMNS = [
# 'Dining and Drinking', 'Community and Government', 'Retail',
# 'Business and Professional Services', 'Landmarks and Outdoors',
# 'Arts and Entertainment', 'Health and Medicine',
# 'Travel and Transportation', 'Sports and Recreation',
# 'Event'
# ]
# def compute_features(candidate_point, radius=0.005):
# candidates = df_amenities[['lat','lon']].copy()
# candidates['id'] = range(len(candidates))
# bank_coords = df_atm[['lat','lon']].values
# tree_banks = cKDTree(bank_coords)
# amenity_coords = df_amenities[['lat','lon']].values
# tree_amenities = cKDTree(amenity_coords)
# lat, lon = candidate_point
# # Banks
# bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
# print("[BANK]", bank_idxs)
# n_banks = len(bank_idxs)
# if n_banks > 0:
# neighbors = df_banks.iloc[bank_idxs]
# mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
# min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
# else:
# mean_dist_banks = radius
# min_dist_bank = radius
# # Amenities
# amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
# amenities = df_amenities.iloc[amenity_idxs]
# total_amenities = len(amenities)
# # Flatten all category IDs
# # for cats in amenities['fsq_category_labels']:
# # all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
# all_category_ids = amenities["category"].tolist()
# category_diversity = len(set(all_category_ids))
# features = {
# 'num_banks_in_radius': n_banks,
# # 'mean_dist_banks': mean_dist_banks,
# # 'min_dist_bank': min_dist_bank,
# 'total_amenities': total_amenities,
# 'category_diversity': category_diversity
# }
# # Count occurrences per category
# print("[CATEGORIES]", all_category_ids)
# count_per_category = Counter(all_category_ids)
# for feat in DATASET_COLUMNS:
# print("[FEAT]",feat)
# # for cat, cnt in count_per_category.items():
# features[f'num_{feat}'] = count_per_category.get(feat, 0)
# # # Count occurrences of first category
# # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
# # count_first_category = Counter(first_categories)
# # for cat, cnt in count_first_category.items():
# # features[f'num_first_{cat}'] = cnt
# return features