from collections import Counter import pandas as pd import numpy as np from scipy.spatial import cKDTree df_amenities = pd.read_csv("df_indonesia.csv").rename( columns={"latitude":"lat", "longitude":"lon"} ) df_banks = pd.read_csv("df_bank_indonesia.csv").rename( columns={"latitude":"lat", "longitude":"lon"} ) df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply( lambda x: eval(x) ) bank_coords = df_banks[['lat','lon']].values tree_banks = cKDTree(bank_coords) amenity_coords = df_amenities[['lat','lon']].values tree_amenities = cKDTree(amenity_coords) # DATASET_COLUMNS = [ # 'Dining and Drinking', 'Community and Government', 'Retail', # 'Business and Professional Services', 'Landmarks and Outdoors', # 'Arts and Entertainment', 'Health and Medicine', # 'Travel and Transportation', 'Sports and Recreation', # 'Event' # ] DATASET_COLUMNS = [ "Community and Government > Education", "Landmarks and Outdoors > Structure", "Arts and Entertainment > Art Gallery", "Arts and Entertainment > Movie Theater", "Retail > Department Store", "Business and Professional Services > Office", "Travel and Transportation > Road", "Dining and Drinking > Restaurant", "Community and Government > Residential Building", "Dining and Drinking > Breakfast Spot", "Business and Professional Services", "Arts and Entertainment > Amusement Park", "Travel and Transportation > Lodging", "Arts and Entertainment", "Dining and Drinking > Food Truck", "Dining and Drinking > Bar", "Retail > Sporting Goods Retail", "Retail > Computers and Electronics Retail", "Arts and Entertainment > Arcade", "Dining and Drinking > Cafe, Coffee, and Tea House", "Travel and Transportation > Transport Hub", "Health and Medicine > Physician", "Community and Government > Government Building", "Business and Professional Services > Convention Center", "Arts and Entertainment > Performing Arts Venue", "Landmarks and Outdoors > Field", "Business and Professional Services > Financial Service", "Landmarks and Outdoors > Park", "Sports and Recreation > Water Sports", "Landmarks and Outdoors > Other Great Outdoors", "Event > Convention", "Retail > Shopping Mall", "Business and Professional Services > Distribution Center", "Business and Professional Services > Automotive Service", "Health and Medicine > Hospital", "Dining and Drinking > Snack Place", "Business and Professional Services > Event Space", "Dining and Drinking > Food Court", "Travel and Transportation > Bike Rental", "Travel and Transportation > Parking", "Business and Professional Services > Radio Station", "Health and Medicine > Dentist", "Landmarks and Outdoors > Beach", "Retail > Flea Market", "Retail > Fashion Retail", "Retail > Food and Beverage Retail", "Retail > Office Supply Store", "Community and Government > Spiritual Center", "Health and Medicine > Medical Center", "Dining and Drinking > Bakery", "Dining and Drinking > Cafeteria", "Retail > Convenience Store", "Arts and Entertainment > Public Art", "Retail > Newsstand", "Retail > Furniture and Home Store", "Business and Professional Services > Auditorium", "Landmarks and Outdoors > Garden", "Community and Government > Library", "Community and Government > Organization", "Business and Professional Services > Health and Beauty Service", "Dining and Drinking > Food Stand", "Retail > Pharmacy", "Retail > Record Store", "Arts and Entertainment > Night Club", "Landmarks and Outdoors > Farm", "Community and Government > Social Club", "Sports and Recreation > Gym and Studio", "Sports and Recreation > Racquet Sports", "Retail > Costume Store", "Sports and Recreation > Soccer", "Event > Entertainment Event", "Retail > Market", "Sports and Recreation", "Community and Government > Town Hall", "Arts and Entertainment > Water Park", "Landmarks and Outdoors", "Business and Professional Services > Shoe Repair Service", "Retail > Automotive Retail", "Dining and Drinking > Dessert Shop", "Retail > Music Store", "Business and Professional Services > Factory", "Community and Government > Assisted Living", "Travel and Transportation > Transportation Service", "Health and Medicine > Medical Lab", "Retail", "Travel and Transportation > Rest Area", "Landmarks and Outdoors > Hiking Trail", "Sports and Recreation > Martial Arts Dojo", "Business and Professional Services > Laundry Service", "Retail > Pet Supplies Store", "Business and Professional Services > Design Studio", "Community and Government > Housing Development", "Business and Professional Services > Construction", "Business and Professional Services > Real Estate Service", "Landmarks and Outdoors > Historic and Protected Site", "Business and Professional Services > Photography Service", "Landmarks and Outdoors > Plaza", "Business and Professional Services > Child Care Service", "Event > Other Event", "Arts and Entertainment > Comedy Club", "Arts and Entertainment > Strip Club", "Arts and Entertainment > Casino", "Business and Professional Services > Legal Service", "Retail > Miscellaneous Store", "Travel and Transportation", "Retail > Boutique", "Sports and Recreation > Race Track", "Event > Marketplace", "Sports and Recreation > Baseball", "Retail > Arts and Crafts Store", "Travel and Transportation > Moving Target", "Community and Government > Cemetery", "Business and Professional Services > Wholesaler", "Business and Professional Services > Advertising Agency", "Arts and Entertainment > Stadium", "Arts and Entertainment > Country Dance Club", "Landmarks and Outdoors > Bridge", "Health and Medicine > Alternative Medicine Clinic", "Arts and Entertainment > Zoo", "Business and Professional Services > Food and Beverage Service", "Business and Professional Services > Storage Facility", "Business and Professional Services > Recycling Facility", "Business and Professional Services > Funeral Home", "Arts and Entertainment > Bowling Alley", "Retail > Hardware Store", "Retail > Stationery Store", "Retail > Bookstore", "Retail > Smoke Shop", "Sports and Recreation > Running and Track", "Retail > Vape Store", "Landmarks and Outdoors > Campground", "Retail > Gift Store", "Landmarks and Outdoors > Palace", "Business and Professional Services > Recording Studio", "Landmarks and Outdoors > States and Municipalities", "Community and Government > Community Center", "Travel and Transportation > Fuel Station", "Retail > Cosmetics Store", "Retail > Hobby Store", "Travel and Transportation > Travel Lounge", "Community and Government > Polling Place", "Retail > Baby Store", "Business and Professional Services > Telecommunication Service", "Health and Medicine > Healthcare Clinic", "Dining and Drinking > Donut Shop", "Travel and Transportation > Tourist Information and Service", "Dining and Drinking > Juice Bar", "Retail > Antique Store", "Retail > Toy Store", "Business and Professional Services > Event Service", "Landmarks and Outdoors > Monument", "Sports and Recreation > Snow Sports", "Travel and Transportation > Travel Agency", "Landmarks and Outdoors > Stable", "Arts and Entertainment > Museum", "Business and Professional Services > Home Improvement Service", "Retail > Flower Store", "Business and Professional Services > Technology Business", "Travel and Transportation > Electric Vehicle Charging Station", "Business and Professional Services > Business Center", "Landmarks and Outdoors > Lake", "Business and Professional Services > Film Studio", "Dining and Drinking > Vineyard", "Dining and Drinking > Distillery", "Business and Professional Services > Warehouse", "Business and Professional Services > Pet Service", "Dining and Drinking > Brewery", "Retail > Eyecare Store", "Health and Medicine", "Business and Professional Services > Tailor", "Retail > Board Store", "Sports and Recreation > Golf", "Business and Professional Services > Audiovisual Service", "Business and Professional Services > Manufacturer", "Retail > Shopping Plaza", "Business and Professional Services > Publisher", "Retail > Print Store", "Sports and Recreation > Gymnastics", "Landmarks and Outdoors > Mountain", "Retail > Perfume Store", "Arts and Entertainment > Gaming Cafe", "Travel and Transportation > Boat or Ferry", "Community and Government > Cultural Center", "Business and Professional Services > Employment Agency", "Arts and Entertainment > Psychic and Astrologer", "Retail > Big Box Store", "Arts and Entertainment > Pool Hall", "Community and Government > Animal Shelter", "Landmarks and Outdoors > Surf Spot", "Landmarks and Outdoors > Castle", "Arts and Entertainment > Internet Cafe", "Health and Medicine > Emergency Service", "Sports and Recreation > Volleyball Court", "Business and Professional Services > Security and Safety", "Sports and Recreation > Basketball", "Business and Professional Services > Shipping, Freight, and Material Transportation Service", "Health and Medicine > Veterinarian", "Health and Medicine > Acupuncture Clinic", "Dining and Drinking > Creperie", "Retail > Outlet Store", "Dining and Drinking > Bagel Shop", "Landmarks and Outdoors > Scenic Lookout", "Dining and Drinking > Winery", "Retail > Garden Center", "Business and Professional Services > Import and Export Service", "Sports and Recreation > Skating", "Health and Medicine > Physical Therapy Clinic", "Landmarks and Outdoors > Botanical Garden", "Travel and Transportation > Train", "Retail > Warehouse or Wholesale Store", "Retail > Luggage Store", "Business and Professional Services > TV Station", "Retail > Pop-Up Store", "Business and Professional Services > Entertainment Service", "Landmarks and Outdoors > Sculpture Garden", "Landmarks and Outdoors > Harbor or Marina", "Health and Medicine > Home Health Care Service", "Retail > Adult Store", "Health and Medicine > Chiropractor", "Health and Medicine > Mental Health Service", "Business and Professional Services > Art Studio", "Business and Professional Services > Metals Supplier", "Landmarks and Outdoors > Island", "Travel and Transportation > RV Park", "Landmarks and Outdoors > Lighthouse", "Retail > Construction Supplies Store", "Business and Professional Services > Plastics Supplier", "Retail > Video Store", "Event > Conference", "Retail > Tobacco Store", "Business and Professional Services > Agriculture and Forestry Service", "Landmarks and Outdoors > River", "Business and Professional Services > Chemicals and Gasses Manufacturer", "Business and Professional Services > Repair Service", "Business and Professional Services > Rubber Supplier", "Landmarks and Outdoors > Roof Deck", "Retail > Vintage and Thrift Store", "Sports and Recreation > Fishing Area", "Sports and Recreation > Sports Club", "Business and Professional Services > Business Service", "Business and Professional Services > Media Agency", "Landmarks and Outdoors > Hot Spring", "Landmarks and Outdoors > Well", "Business and Professional Services > Insurance Agency", "Community and Government > Summer Camp", "Landmarks and Outdoors > Bay", "Business and Professional Services > Industrial Equipment Supplier", "Retail > Comic Book Store", "Travel and Transportation > Toll Booth", "Dining and Drinking", "Arts and Entertainment > Fair", "Retail > Souvenir Store", "Sports and Recreation > Paintball Field", "Retail > Drugstore", "Arts and Entertainment > Exhibit", "Retail > Supplement Store", "Retail > Outdoor Supply Store", "Arts and Entertainment > Escape Room", "Health and Medicine > Optometrist", "Business and Professional Services > Engineer", "Sports and Recreation > Indoor Play Area", "Retail > Leather Goods Store", "Community and Government > Public Bathroom", "Business and Professional Services > Electrical Equipment Supplier", "Travel and Transportation > Baggage Locker", "Arts and Entertainment > Go Kart Track", "Arts and Entertainment > Circus", "Sports and Recreation > Hockey", "Landmarks and Outdoors > Forest", "Business and Professional Services > Computer Repair Service", "Business and Professional Services > Waste Management Service", "Travel and Transportation > Platform", "Retail > Textiles Store", "Business and Professional Services > Scientific Equipment Supplier", "Landmarks and Outdoors > Pedestrian Plaza", "Community and Government > Utility Company", "Community and Government > Public and Social Service", "Business and Professional Services > Petroleum Supplier", "Business and Professional Services > Wedding Hall", "Landmarks and Outdoors > Nature Preserve", "Business and Professional Services > Ballroom", "Community and Government > Prison", "Sports and Recreation > Recreation Center", "Sports and Recreation > Gun Range", "Dining and Drinking > Smoothie Shop", "Dining and Drinking > Night Market", "Retail > Discount Store", "Business and Professional Services > Welding Service", "Travel and Transportation > Pier", "Landmarks and Outdoors > Bathing Area", "Business and Professional Services > Print, TV, Radio and Outdoor Advertising Service", "Business and Professional Services > Online Advertising Service", "Arts and Entertainment > Aquarium", "Arts and Entertainment > Roller Rink", "Community and Government > Trailer Park", "Business and Professional Services > Paper Supplier", "Retail > Framing Store", "Landmarks and Outdoors > Tunnel", "Health and Medicine > Urgent Care Center", "Community and Government > Rehabilitation Center", "Landmarks and Outdoors > Fountain", "Arts and Entertainment > Planetarium", "Sports and Recreation > Cricket Ground", "Landmarks and Outdoors > Volcano", "Business and Professional Services > Research Laboratory", "Business and Professional Services > Equipment Rental Service", "Community and Government", "Retail > Medical Supply Store", "Landmarks and Outdoors > Bike Trail", "Business and Professional Services > Outdoor Event Space", "Sports and Recreation > Rugby", "Business and Professional Services > Laboratory", "Business and Professional Services > Water Treatment Service", "Business and Professional Services > Entertainment Agency", "Retail > Pawn Shop", "Arts and Entertainment > Salsa Club", "Landmarks and Outdoors > Tree", "Travel and Transportation > Toll Plaza", "Travel and Transportation > Port", "Landmarks and Outdoors > Rock Climbing Spot", "Business and Professional Services > Creative Service", "Business and Professional Services > Research Station", "Business and Professional Services > Refrigeration and Ice Supplier", "Business and Professional Services > Rental Service", "Travel and Transportation > Border Crossing", "Business and Professional Services > Industrial Estate", "Business and Professional Services > Tutoring Service", "Business and Professional Services > Laundromat", "Arts and Entertainment > Mini Golf Course", "Retail > Packaging Supply Store", "Business and Professional Services > Translation Service", "Retail > Duty-free Store", "Community and Government > Observatory", "Retail > Knitting Store", "Travel and Transportation > Cruise", "Business and Professional Services > Management Consultant", "Retail > Betting Shop", "Retail > Outlet Mall", "Retail > Auction House", "Travel and Transportation > Cable Car", "Business and Professional Services > Power Plant", "Landmarks and Outdoors > Dive Spot", "Health and Medicine > Maternity Clinic", "Health and Medicine > Women's Health Clinic", "Health and Medicine > Nutritionist", "Retail > Mobility Store", "Business and Professional Services > Renewable Energy Service", "Landmarks and Outdoors > Hill", "Health and Medicine > Hospice", "Business and Professional Services > Machine Shop", "Landmarks and Outdoors > Memorial Site", "Landmarks and Outdoors > Cave", "Travel and Transportation > Truck Stop", "Business and Professional Services > Logging Service", "Landmarks and Outdoors > Waterfront", "Sports and Recreation > Skydiving Center", "Arts and Entertainment > Disc Golf", "Business and Professional Services > Promotional Item Service", "Business and Professional Services > Lottery Retailer", ] def compute_features(candidate_point, radius=0.005): lat, lon = candidate_point # Banks bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius) print("[BANK]", bank_idxs) n_banks = len(bank_idxs) if n_banks > 0: neighbors = df_banks.iloc[bank_idxs] mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) else: mean_dist_banks = radius min_dist_bank = radius # Amenities amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius) amenities = df_amenities.iloc[amenity_idxs] total_amenities = len(amenities) # Flatten all category IDs # for cats in amenities['fsq_category_labels']: all_category_ids = [">".join(cats[0].split(">")[:2]).strip() for cats in amenities['fsq_category_labels'] if len(cats)>0] category_diversity = len(set(all_category_ids)) features = { 'num_banks_in_radius': n_banks, # 'mean_dist_banks': mean_dist_banks, # 'min_dist_bank': min_dist_bank, 'total_amenities': total_amenities, 'category_diversity': category_diversity } # Count occurrences per category print("[CATEGORIES]", all_category_ids) count_per_category = Counter(all_category_ids) for feat in DATASET_COLUMNS: print("[FEAT]",feat) # for cat, cnt in count_per_category.items(): features[f'num_{feat}'] = count_per_category.get(feat, 0) # # Count occurrences of first category # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0] # count_first_category = Counter(first_categories) # for cat, cnt in count_first_category.items(): # features[f'num_first_{cat}'] = cnt return features