jonathanjordan21's picture
Update utils2.py
6826990 verified
from collections import Counter
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
df_amenities = pd.read_csv("df_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
lambda x: eval(x)
)
bank_coords = df_banks[['lat','lon']].values
tree_banks = cKDTree(bank_coords)
amenity_coords = df_amenities[['lat','lon']].values
tree_amenities = cKDTree(amenity_coords)
# DATASET_COLUMNS = [
# 'Dining and Drinking', 'Community and Government', 'Retail',
# 'Business and Professional Services', 'Landmarks and Outdoors',
# 'Arts and Entertainment', 'Health and Medicine',
# 'Travel and Transportation', 'Sports and Recreation',
# 'Event'
# ]
DATASET_COLUMNS = [
"Community and Government > Education",
"Landmarks and Outdoors > Structure",
"Arts and Entertainment > Art Gallery",
"Arts and Entertainment > Movie Theater",
"Retail > Department Store",
"Business and Professional Services > Office",
"Travel and Transportation > Road",
"Dining and Drinking > Restaurant",
"Community and Government > Residential Building",
"Dining and Drinking > Breakfast Spot",
"Business and Professional Services",
"Arts and Entertainment > Amusement Park",
"Travel and Transportation > Lodging",
"Arts and Entertainment",
"Dining and Drinking > Food Truck",
"Dining and Drinking > Bar",
"Retail > Sporting Goods Retail",
"Retail > Computers and Electronics Retail",
"Arts and Entertainment > Arcade",
"Dining and Drinking > Cafe, Coffee, and Tea House",
"Travel and Transportation > Transport Hub",
"Health and Medicine > Physician",
"Community and Government > Government Building",
"Business and Professional Services > Convention Center",
"Arts and Entertainment > Performing Arts Venue",
"Landmarks and Outdoors > Field",
"Business and Professional Services > Financial Service",
"Landmarks and Outdoors > Park",
"Sports and Recreation > Water Sports",
"Landmarks and Outdoors > Other Great Outdoors",
"Event > Convention",
"Retail > Shopping Mall",
"Business and Professional Services > Distribution Center",
"Business and Professional Services > Automotive Service",
"Health and Medicine > Hospital",
"Dining and Drinking > Snack Place",
"Business and Professional Services > Event Space",
"Dining and Drinking > Food Court",
"Travel and Transportation > Bike Rental",
"Travel and Transportation > Parking",
"Business and Professional Services > Radio Station",
"Health and Medicine > Dentist",
"Landmarks and Outdoors > Beach",
"Retail > Flea Market",
"Retail > Fashion Retail",
"Retail > Food and Beverage Retail",
"Retail > Office Supply Store",
"Community and Government > Spiritual Center",
"Health and Medicine > Medical Center",
"Dining and Drinking > Bakery",
"Dining and Drinking > Cafeteria",
"Retail > Convenience Store",
"Arts and Entertainment > Public Art",
"Retail > Newsstand",
"Retail > Furniture and Home Store",
"Business and Professional Services > Auditorium",
"Landmarks and Outdoors > Garden",
"Community and Government > Library",
"Community and Government > Organization",
"Business and Professional Services > Health and Beauty Service",
"Dining and Drinking > Food Stand",
"Retail > Pharmacy",
"Retail > Record Store",
"Arts and Entertainment > Night Club",
"Landmarks and Outdoors > Farm",
"Community and Government > Social Club",
"Sports and Recreation > Gym and Studio",
"Sports and Recreation > Racquet Sports",
"Retail > Costume Store",
"Sports and Recreation > Soccer",
"Event > Entertainment Event",
"Retail > Market",
"Sports and Recreation",
"Community and Government > Town Hall",
"Arts and Entertainment > Water Park",
"Landmarks and Outdoors",
"Business and Professional Services > Shoe Repair Service",
"Retail > Automotive Retail",
"Dining and Drinking > Dessert Shop",
"Retail > Music Store",
"Business and Professional Services > Factory",
"Community and Government > Assisted Living",
"Travel and Transportation > Transportation Service",
"Health and Medicine > Medical Lab",
"Retail",
"Travel and Transportation > Rest Area",
"Landmarks and Outdoors > Hiking Trail",
"Sports and Recreation > Martial Arts Dojo",
"Business and Professional Services > Laundry Service",
"Retail > Pet Supplies Store",
"Business and Professional Services > Design Studio",
"Community and Government > Housing Development",
"Business and Professional Services > Construction",
"Business and Professional Services > Real Estate Service",
"Landmarks and Outdoors > Historic and Protected Site",
"Business and Professional Services > Photography Service",
"Landmarks and Outdoors > Plaza",
"Business and Professional Services > Child Care Service",
"Event > Other Event",
"Arts and Entertainment > Comedy Club",
"Arts and Entertainment > Strip Club",
"Arts and Entertainment > Casino",
"Business and Professional Services > Legal Service",
"Retail > Miscellaneous Store",
"Travel and Transportation",
"Retail > Boutique",
"Sports and Recreation > Race Track",
"Event > Marketplace",
"Sports and Recreation > Baseball",
"Retail > Arts and Crafts Store",
"Travel and Transportation > Moving Target",
"Community and Government > Cemetery",
"Business and Professional Services > Wholesaler",
"Business and Professional Services > Advertising Agency",
"Arts and Entertainment > Stadium",
"Arts and Entertainment > Country Dance Club",
"Landmarks and Outdoors > Bridge",
"Health and Medicine > Alternative Medicine Clinic",
"Arts and Entertainment > Zoo",
"Business and Professional Services > Food and Beverage Service",
"Business and Professional Services > Storage Facility",
"Business and Professional Services > Recycling Facility",
"Business and Professional Services > Funeral Home",
"Arts and Entertainment > Bowling Alley",
"Retail > Hardware Store",
"Retail > Stationery Store",
"Retail > Bookstore",
"Retail > Smoke Shop",
"Sports and Recreation > Running and Track",
"Retail > Vape Store",
"Landmarks and Outdoors > Campground",
"Retail > Gift Store",
"Landmarks and Outdoors > Palace",
"Business and Professional Services > Recording Studio",
"Landmarks and Outdoors > States and Municipalities",
"Community and Government > Community Center",
"Travel and Transportation > Fuel Station",
"Retail > Cosmetics Store",
"Retail > Hobby Store",
"Travel and Transportation > Travel Lounge",
"Community and Government > Polling Place",
"Retail > Baby Store",
"Business and Professional Services > Telecommunication Service",
"Health and Medicine > Healthcare Clinic",
"Dining and Drinking > Donut Shop",
"Travel and Transportation > Tourist Information and Service",
"Dining and Drinking > Juice Bar",
"Retail > Antique Store",
"Retail > Toy Store",
"Business and Professional Services > Event Service",
"Landmarks and Outdoors > Monument",
"Sports and Recreation > Snow Sports",
"Travel and Transportation > Travel Agency",
"Landmarks and Outdoors > Stable",
"Arts and Entertainment > Museum",
"Business and Professional Services > Home Improvement Service",
"Retail > Flower Store",
"Business and Professional Services > Technology Business",
"Travel and Transportation > Electric Vehicle Charging Station",
"Business and Professional Services > Business Center",
"Landmarks and Outdoors > Lake",
"Business and Professional Services > Film Studio",
"Dining and Drinking > Vineyard",
"Dining and Drinking > Distillery",
"Business and Professional Services > Warehouse",
"Business and Professional Services > Pet Service",
"Dining and Drinking > Brewery",
"Retail > Eyecare Store",
"Health and Medicine",
"Business and Professional Services > Tailor",
"Retail > Board Store",
"Sports and Recreation > Golf",
"Business and Professional Services > Audiovisual Service",
"Business and Professional Services > Manufacturer",
"Retail > Shopping Plaza",
"Business and Professional Services > Publisher",
"Retail > Print Store",
"Sports and Recreation > Gymnastics",
"Landmarks and Outdoors > Mountain",
"Retail > Perfume Store",
"Arts and Entertainment > Gaming Cafe",
"Travel and Transportation > Boat or Ferry",
"Community and Government > Cultural Center",
"Business and Professional Services > Employment Agency",
"Arts and Entertainment > Psychic and Astrologer",
"Retail > Big Box Store",
"Arts and Entertainment > Pool Hall",
"Community and Government > Animal Shelter",
"Landmarks and Outdoors > Surf Spot",
"Landmarks and Outdoors > Castle",
"Arts and Entertainment > Internet Cafe",
"Health and Medicine > Emergency Service",
"Sports and Recreation > Volleyball Court",
"Business and Professional Services > Security and Safety",
"Sports and Recreation > Basketball",
"Business and Professional Services > Shipping, Freight, and Material Transportation Service",
"Health and Medicine > Veterinarian",
"Health and Medicine > Acupuncture Clinic",
"Dining and Drinking > Creperie",
"Retail > Outlet Store",
"Dining and Drinking > Bagel Shop",
"Landmarks and Outdoors > Scenic Lookout",
"Dining and Drinking > Winery",
"Retail > Garden Center",
"Business and Professional Services > Import and Export Service",
"Sports and Recreation > Skating",
"Health and Medicine > Physical Therapy Clinic",
"Landmarks and Outdoors > Botanical Garden",
"Travel and Transportation > Train",
"Retail > Warehouse or Wholesale Store",
"Retail > Luggage Store",
"Business and Professional Services > TV Station",
"Retail > Pop-Up Store",
"Business and Professional Services > Entertainment Service",
"Landmarks and Outdoors > Sculpture Garden",
"Landmarks and Outdoors > Harbor or Marina",
"Health and Medicine > Home Health Care Service",
"Retail > Adult Store",
"Health and Medicine > Chiropractor",
"Health and Medicine > Mental Health Service",
"Business and Professional Services > Art Studio",
"Business and Professional Services > Metals Supplier",
"Landmarks and Outdoors > Island",
"Travel and Transportation > RV Park",
"Landmarks and Outdoors > Lighthouse",
"Retail > Construction Supplies Store",
"Business and Professional Services > Plastics Supplier",
"Retail > Video Store",
"Event > Conference",
"Retail > Tobacco Store",
"Business and Professional Services > Agriculture and Forestry Service",
"Landmarks and Outdoors > River",
"Business and Professional Services > Chemicals and Gasses Manufacturer",
"Business and Professional Services > Repair Service",
"Business and Professional Services > Rubber Supplier",
"Landmarks and Outdoors > Roof Deck",
"Retail > Vintage and Thrift Store",
"Sports and Recreation > Fishing Area",
"Sports and Recreation > Sports Club",
"Business and Professional Services > Business Service",
"Business and Professional Services > Media Agency",
"Landmarks and Outdoors > Hot Spring",
"Landmarks and Outdoors > Well",
"Business and Professional Services > Insurance Agency",
"Community and Government > Summer Camp",
"Landmarks and Outdoors > Bay",
"Business and Professional Services > Industrial Equipment Supplier",
"Retail > Comic Book Store",
"Travel and Transportation > Toll Booth",
"Dining and Drinking",
"Arts and Entertainment > Fair",
"Retail > Souvenir Store",
"Sports and Recreation > Paintball Field",
"Retail > Drugstore",
"Arts and Entertainment > Exhibit",
"Retail > Supplement Store",
"Retail > Outdoor Supply Store",
"Arts and Entertainment > Escape Room",
"Health and Medicine > Optometrist",
"Business and Professional Services > Engineer",
"Sports and Recreation > Indoor Play Area",
"Retail > Leather Goods Store",
"Community and Government > Public Bathroom",
"Business and Professional Services > Electrical Equipment Supplier",
"Travel and Transportation > Baggage Locker",
"Arts and Entertainment > Go Kart Track",
"Arts and Entertainment > Circus",
"Sports and Recreation > Hockey",
"Landmarks and Outdoors > Forest",
"Business and Professional Services > Computer Repair Service",
"Business and Professional Services > Waste Management Service",
"Travel and Transportation > Platform",
"Retail > Textiles Store",
"Business and Professional Services > Scientific Equipment Supplier",
"Landmarks and Outdoors > Pedestrian Plaza",
"Community and Government > Utility Company",
"Community and Government > Public and Social Service",
"Business and Professional Services > Petroleum Supplier",
"Business and Professional Services > Wedding Hall",
"Landmarks and Outdoors > Nature Preserve",
"Business and Professional Services > Ballroom",
"Community and Government > Prison",
"Sports and Recreation > Recreation Center",
"Sports and Recreation > Gun Range",
"Dining and Drinking > Smoothie Shop",
"Dining and Drinking > Night Market",
"Retail > Discount Store",
"Business and Professional Services > Welding Service",
"Travel and Transportation > Pier",
"Landmarks and Outdoors > Bathing Area",
"Business and Professional Services > Print, TV, Radio and Outdoor Advertising Service",
"Business and Professional Services > Online Advertising Service",
"Arts and Entertainment > Aquarium",
"Arts and Entertainment > Roller Rink",
"Community and Government > Trailer Park",
"Business and Professional Services > Paper Supplier",
"Retail > Framing Store",
"Landmarks and Outdoors > Tunnel",
"Health and Medicine > Urgent Care Center",
"Community and Government > Rehabilitation Center",
"Landmarks and Outdoors > Fountain",
"Arts and Entertainment > Planetarium",
"Sports and Recreation > Cricket Ground",
"Landmarks and Outdoors > Volcano",
"Business and Professional Services > Research Laboratory",
"Business and Professional Services > Equipment Rental Service",
"Community and Government",
"Retail > Medical Supply Store",
"Landmarks and Outdoors > Bike Trail",
"Business and Professional Services > Outdoor Event Space",
"Sports and Recreation > Rugby",
"Business and Professional Services > Laboratory",
"Business and Professional Services > Water Treatment Service",
"Business and Professional Services > Entertainment Agency",
"Retail > Pawn Shop",
"Arts and Entertainment > Salsa Club",
"Landmarks and Outdoors > Tree",
"Travel and Transportation > Toll Plaza",
"Travel and Transportation > Port",
"Landmarks and Outdoors > Rock Climbing Spot",
"Business and Professional Services > Creative Service",
"Business and Professional Services > Research Station",
"Business and Professional Services > Refrigeration and Ice Supplier",
"Business and Professional Services > Rental Service",
"Travel and Transportation > Border Crossing",
"Business and Professional Services > Industrial Estate",
"Business and Professional Services > Tutoring Service",
"Business and Professional Services > Laundromat",
"Arts and Entertainment > Mini Golf Course",
"Retail > Packaging Supply Store",
"Business and Professional Services > Translation Service",
"Retail > Duty-free Store",
"Community and Government > Observatory",
"Retail > Knitting Store",
"Travel and Transportation > Cruise",
"Business and Professional Services > Management Consultant",
"Retail > Betting Shop",
"Retail > Outlet Mall",
"Retail > Auction House",
"Travel and Transportation > Cable Car",
"Business and Professional Services > Power Plant",
"Landmarks and Outdoors > Dive Spot",
"Health and Medicine > Maternity Clinic",
"Health and Medicine > Women's Health Clinic",
"Health and Medicine > Nutritionist",
"Retail > Mobility Store",
"Business and Professional Services > Renewable Energy Service",
"Landmarks and Outdoors > Hill",
"Health and Medicine > Hospice",
"Business and Professional Services > Machine Shop",
"Landmarks and Outdoors > Memorial Site",
"Landmarks and Outdoors > Cave",
"Travel and Transportation > Truck Stop",
"Business and Professional Services > Logging Service",
"Landmarks and Outdoors > Waterfront",
"Sports and Recreation > Skydiving Center",
"Arts and Entertainment > Disc Golf",
"Business and Professional Services > Promotional Item Service",
"Business and Professional Services > Lottery Retailer",
]
def compute_features(candidate_point, radius=0.005):
lat, lon = candidate_point
# Banks
bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
print("[BANK]", bank_idxs)
n_banks = len(bank_idxs)
if n_banks > 0:
neighbors = df_banks.iloc[bank_idxs]
mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
else:
mean_dist_banks = radius
min_dist_bank = radius
# Amenities
amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
amenities = df_amenities.iloc[amenity_idxs]
total_amenities = len(amenities)
# Flatten all category IDs
# for cats in amenities['fsq_category_labels']:
all_category_ids = [">".join(cats[0].split(">")[:2]).strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
category_diversity = len(set(all_category_ids))
features = {
'num_banks_in_radius': n_banks,
# 'mean_dist_banks': mean_dist_banks,
# 'min_dist_bank': min_dist_bank,
'total_amenities': total_amenities,
'category_diversity': category_diversity
}
# Count occurrences per category
print("[CATEGORIES]", all_category_ids)
count_per_category = Counter(all_category_ids)
for feat in DATASET_COLUMNS:
print("[FEAT]",feat)
# for cat, cnt in count_per_category.items():
features[f'num_{feat}'] = count_per_category.get(feat, 0)
# # Count occurrences of first category
# first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
# count_first_category = Counter(first_categories)
# for cat, cnt in count_first_category.items():
# features[f'num_first_{cat}'] = cnt
return features