jonathanjordan21's picture
Update utils2.py
fa604ce verified
from collections import Counter
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
df_amenities = pd.read_csv("df_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
lambda x: eval(x)
)
bank_coords = df_banks[['lat','lon']].values
tree_banks = cKDTree(bank_coords)
amenity_coords = df_amenities[['lat','lon']].values
tree_amenities = cKDTree(amenity_coords)
# DATASET_COLUMNS = [
# 'Dining and Drinking', 'Community and Government', 'Retail',
# 'Business and Professional Services', 'Landmarks and Outdoors',
# 'Arts and Entertainment', 'Health and Medicine',
# 'Travel and Transportation', 'Sports and Recreation',
# 'Event'
# ]
DATASET_COLUMNS = [
"Community and Government > Education"
"Landmarks and Outdoors > Structure"
"Arts and Entertainment > Art Gallery"
"Arts and Entertainment > Movie Theater"
"Retail > Department Store"
"Business and Professional Services > Office"
"Travel and Transportation > Road"
"Dining and Drinking > Restaurant"
"Community and Government > Residential Building"
"Dining and Drinking > Breakfast Spot"
"Business and Professional Services"
"Arts and Entertainment > Amusement Park"
"Travel and Transportation > Lodging"
"Arts and Entertainment"
"Dining and Drinking > Food Truck"
"Dining and Drinking > Bar"
"Retail > Sporting Goods Retail"
"Retail > Computers and Electronics Retail"
"Arts and Entertainment > Arcade"
"Dining and Drinking > Cafe, Coffee, and Tea House"
"Travel and Transportation > Transport Hub"
"Health and Medicine > Physician"
"Community and Government > Government Building"
"Business and Professional Services > Convention Center"
"Arts and Entertainment > Performing Arts Venue"
"Landmarks and Outdoors > Field"
"Business and Professional Services > Financial Service"
"Landmarks and Outdoors > Park"
"Sports and Recreation > Water Sports"
"Landmarks and Outdoors > Other Great Outdoors"
"Event > Convention"
"Retail > Shopping Mall"
"Business and Professional Services > Distribution Center"
"Business and Professional Services > Automotive Service"
"Health and Medicine > Hospital"
"Dining and Drinking > Snack Place"
"Business and Professional Services > Event Space"
"Dining and Drinking > Food Court"
"Travel and Transportation > Bike Rental"
"Travel and Transportation > Parking"
"Business and Professional Services > Radio Station"
"Health and Medicine > Dentist"
"Landmarks and Outdoors > Beach"
"Retail > Flea Market"
"Retail > Fashion Retail"
"Retail > Food and Beverage Retail"
"Retail > Office Supply Store"
"Community and Government > Spiritual Center"
"Health and Medicine > Medical Center"
"Dining and Drinking > Bakery"
"Dining and Drinking > Cafeteria"
"Retail > Convenience Store"
"Arts and Entertainment > Public Art"
"Retail > Newsstand"
"Retail > Furniture and Home Store"
"Business and Professional Services > Auditorium"
"Landmarks and Outdoors > Garden"
"Community and Government > Library"
"Community and Government > Organization"
"Business and Professional Services > Health and Beauty Service"
"Dining and Drinking > Food Stand"
"Retail > Pharmacy"
"Retail > Record Store"
"Arts and Entertainment > Night Club"
"Landmarks and Outdoors > Farm"
"Community and Government > Social Club"
"Sports and Recreation > Gym and Studio"
"Sports and Recreation > Racquet Sports"
"Retail > Costume Store"
"Sports and Recreation > Soccer"
"Event > Entertainment Event"
"Retail > Market"
"Sports and Recreation"
"Community and Government > Town Hall"
"Arts and Entertainment > Water Park"
"Landmarks and Outdoors"
"Business and Professional Services > Shoe Repair Service"
"Retail > Automotive Retail"
"Dining and Drinking > Dessert Shop"
"Retail > Music Store"
"Business and Professional Services > Factory"
"Community and Government > Assisted Living"
"Travel and Transportation > Transportation Service"
"Health and Medicine > Medical Lab"
"Retail"
"Travel and Transportation > Rest Area"
"Landmarks and Outdoors > Hiking Trail"
"Sports and Recreation > Martial Arts Dojo"
"Business and Professional Services > Laundry Service"
"Retail > Pet Supplies Store"
"Business and Professional Services > Design Studio"
"Community and Government > Housing Development"
"Business and Professional Services > Construction"
"Business and Professional Services > Real Estate Service"
"Landmarks and Outdoors > Historic and Protected Site"
"Business and Professional Services > Photography Service"
"Landmarks and Outdoors > Plaza"
"Business and Professional Services > Child Care Service"
"Event > Other Event"
"Arts and Entertainment > Comedy Club"
"Arts and Entertainment > Strip Club"
"Arts and Entertainment > Casino"
"Business and Professional Services > Legal Service"
"Retail > Miscellaneous Store"
"Travel and Transportation"
"Retail > Boutique"
"Sports and Recreation > Race Track"
"Event > Marketplace"
"Sports and Recreation > Baseball"
"Retail > Arts and Crafts Store"
"Travel and Transportation > Moving Target"
"Community and Government > Cemetery"
"Business and Professional Services > Wholesaler"
"Business and Professional Services > Advertising Agency"
"Arts and Entertainment > Stadium"
"Arts and Entertainment > Country Dance Club"
"Landmarks and Outdoors > Bridge"
"Health and Medicine > Alternative Medicine Clinic"
"Arts and Entertainment > Zoo"
"Business and Professional Services > Food and Beverage Service"
"Business and Professional Services > Storage Facility"
"Business and Professional Services > Recycling Facility"
"Business and Professional Services > Funeral Home"
"Arts and Entertainment > Bowling Alley"
"Retail > Hardware Store"
"Retail > Stationery Store"
"Retail > Bookstore"
"Retail > Smoke Shop"
"Sports and Recreation > Running and Track"
"Retail > Vape Store"
"Landmarks and Outdoors > Campground"
"Retail > Gift Store"
"Landmarks and Outdoors > Palace"
"Business and Professional Services > Recording Studio"
"Landmarks and Outdoors > States and Municipalities"
"Community and Government > Community Center"
"Travel and Transportation > Fuel Station"
"Retail > Cosmetics Store"
"Retail > Hobby Store"
"Travel and Transportation > Travel Lounge"
"Community and Government > Polling Place"
"Retail > Baby Store"
"Business and Professional Services > Telecommunication Service"
"Health and Medicine > Healthcare Clinic"
"Dining and Drinking > Donut Shop"
"Travel and Transportation > Tourist Information and Service"
"Dining and Drinking > Juice Bar"
"Retail > Antique Store"
"Retail > Toy Store"
"Business and Professional Services > Event Service"
"Landmarks and Outdoors > Monument"
"Sports and Recreation > Snow Sports"
"Travel and Transportation > Travel Agency"
"Landmarks and Outdoors > Stable"
"Arts and Entertainment > Museum"
"Business and Professional Services > Home Improvement Service"
"Retail > Flower Store"
"Business and Professional Services > Technology Business"
"Travel and Transportation > Electric Vehicle Charging Station"
"Business and Professional Services > Business Center"
"Landmarks and Outdoors > Lake"
"Business and Professional Services > Film Studio"
"Dining and Drinking > Vineyard"
"Dining and Drinking > Distillery"
"Business and Professional Services > Warehouse"
"Business and Professional Services > Pet Service"
"Dining and Drinking > Brewery"
"Retail > Eyecare Store"
"Health and Medicine"
"Business and Professional Services > Tailor"
"Retail > Board Store"
"Sports and Recreation > Golf"
"Business and Professional Services > Audiovisual Service"
"Business and Professional Services > Manufacturer"
"Retail > Shopping Plaza"
"Business and Professional Services > Publisher"
"Retail > Print Store"
"Sports and Recreation > Gymnastics"
"Landmarks and Outdoors > Mountain"
"Retail > Perfume Store"
"Arts and Entertainment > Gaming Cafe"
"Travel and Transportation > Boat or Ferry"
"Community and Government > Cultural Center"
"Business and Professional Services > Employment Agency"
"Arts and Entertainment > Psychic and Astrologer"
"Retail > Big Box Store"
"Arts and Entertainment > Pool Hall"
"Community and Government > Animal Shelter"
"Landmarks and Outdoors > Surf Spot"
"Landmarks and Outdoors > Castle"
"Arts and Entertainment > Internet Cafe"
"Health and Medicine > Emergency Service"
"Sports and Recreation > Volleyball Court"
"Business and Professional Services > Security and Safety"
"Sports and Recreation > Basketball"
"Business and Professional Services > Shipping, Freight, and Material Transportation Service"
"Health and Medicine > Veterinarian"
"Health and Medicine > Acupuncture Clinic"
"Dining and Drinking > Creperie"
"Retail > Outlet Store"
"Dining and Drinking > Bagel Shop"
"Landmarks and Outdoors > Scenic Lookout"
"Dining and Drinking > Winery"
"Retail > Garden Center"
"Business and Professional Services > Import and Export Service"
"Sports and Recreation > Skating"
"Health and Medicine > Physical Therapy Clinic"
"Landmarks and Outdoors > Botanical Garden"
"Travel and Transportation > Train"
"Retail > Warehouse or Wholesale Store"
"Retail > Luggage Store"
"Business and Professional Services > TV Station"
"Retail > Pop-Up Store"
"Business and Professional Services > Entertainment Service"
"Landmarks and Outdoors > Sculpture Garden"
"Landmarks and Outdoors > Harbor or Marina"
"Health and Medicine > Home Health Care Service"
"Retail > Adult Store"
"Health and Medicine > Chiropractor"
"Health and Medicine > Mental Health Service"
"Business and Professional Services > Art Studio"
"Business and Professional Services > Metals Supplier"
"Landmarks and Outdoors > Island"
"Travel and Transportation > RV Park"
"Landmarks and Outdoors > Lighthouse"
"Retail > Construction Supplies Store"
"Business and Professional Services > Plastics Supplier"
"Retail > Video Store"
"Event > Conference"
"Retail > Tobacco Store"
"Business and Professional Services > Agriculture and Forestry Service"
"Landmarks and Outdoors > River"
"Business and Professional Services > Chemicals and Gasses Manufacturer"
"Business and Professional Services > Repair Service"
"Business and Professional Services > Rubber Supplier"
"Landmarks and Outdoors > Roof Deck"
"Retail > Vintage and Thrift Store"
"Sports and Recreation > Fishing Area"
"Sports and Recreation > Sports Club"
"Business and Professional Services > Business Service"
"Business and Professional Services > Media Agency"
"Landmarks and Outdoors > Hot Spring"
"Landmarks and Outdoors > Well"
"Business and Professional Services > Insurance Agency"
"Community and Government > Summer Camp"
"Landmarks and Outdoors > Bay"
"Business and Professional Services > Industrial Equipment Supplier"
"Retail > Comic Book Store"
"Travel and Transportation > Toll Booth"
"Dining and Drinking"
"Arts and Entertainment > Fair"
"Retail > Souvenir Store"
"Sports and Recreation > Paintball Field"
"Retail > Drugstore"
"Arts and Entertainment > Exhibit"
"Retail > Supplement Store"
"Retail > Outdoor Supply Store"
"Arts and Entertainment > Escape Room"
"Health and Medicine > Optometrist"
"Business and Professional Services > Engineer"
"Sports and Recreation > Indoor Play Area"
"Retail > Leather Goods Store"
"Community and Government > Public Bathroom"
"Business and Professional Services > Electrical Equipment Supplier"
"Travel and Transportation > Baggage Locker"
"Arts and Entertainment > Go Kart Track"
"Arts and Entertainment > Circus"
"Sports and Recreation > Hockey"
"Landmarks and Outdoors > Forest"
"Business and Professional Services > Computer Repair Service"
"Business and Professional Services > Waste Management Service"
"Travel and Transportation > Platform"
"Retail > Textiles Store"
"Business and Professional Services > Scientific Equipment Supplier"
"Landmarks and Outdoors > Pedestrian Plaza"
"Community and Government > Utility Company"
"Community and Government > Public and Social Service"
"Business and Professional Services > Petroleum Supplier"
"Business and Professional Services > Wedding Hall"
"Landmarks and Outdoors > Nature Preserve"
"Business and Professional Services > Ballroom"
"Community and Government > Prison"
"Sports and Recreation > Recreation Center"
"Sports and Recreation > Gun Range"
"Dining and Drinking > Smoothie Shop"
"Dining and Drinking > Night Market"
"Retail > Discount Store"
"Business and Professional Services > Welding Service"
"Travel and Transportation > Pier"
"Landmarks and Outdoors > Bathing Area"
"Business and Professional Services > Print, TV, Radio and Outdoor Advertising Service"
"Business and Professional Services > Online Advertising Service"
"Arts and Entertainment > Aquarium"
"Arts and Entertainment > Roller Rink"
"Community and Government > Trailer Park"
"Business and Professional Services > Paper Supplier"
"Retail > Framing Store"
"Landmarks and Outdoors > Tunnel"
"Health and Medicine > Urgent Care Center"
"Community and Government > Rehabilitation Center"
"Landmarks and Outdoors > Fountain"
"Arts and Entertainment > Planetarium"
"Sports and Recreation > Cricket Ground"
"Landmarks and Outdoors > Volcano"
"Business and Professional Services > Research Laboratory"
"Business and Professional Services > Equipment Rental Service"
"Community and Government"
"Retail > Medical Supply Store"
"Landmarks and Outdoors > Bike Trail"
"Business and Professional Services > Outdoor Event Space"
"Sports and Recreation > Rugby"
"Business and Professional Services > Laboratory"
"Business and Professional Services > Water Treatment Service"
"Business and Professional Services > Entertainment Agency"
"Retail > Pawn Shop"
"Arts and Entertainment > Salsa Club"
"Landmarks and Outdoors > Tree"
"Travel and Transportation > Toll Plaza"
"Travel and Transportation > Port"
"Landmarks and Outdoors > Rock Climbing Spot"
"Business and Professional Services > Creative Service"
"Business and Professional Services > Research Station"
"Business and Professional Services > Refrigeration and Ice Supplier"
"Business and Professional Services > Rental Service"
"Travel and Transportation > Border Crossing"
"Business and Professional Services > Industrial Estate"
"Business and Professional Services > Tutoring Service"
"Business and Professional Services > Laundromat"
"Arts and Entertainment > Mini Golf Course"
"Retail > Packaging Supply Store"
"Business and Professional Services > Translation Service"
"Retail > Duty-free Store"
"Community and Government > Observatory"
"Retail > Knitting Store"
"Travel and Transportation > Cruise"
"Business and Professional Services > Management Consultant"
"Retail > Betting Shop"
"Retail > Outlet Mall"
"Retail > Auction House"
"Travel and Transportation > Cable Car"
"Business and Professional Services > Power Plant"
"Landmarks and Outdoors > Dive Spot"
"Health and Medicine > Maternity Clinic"
"Health and Medicine > Women's Health Clinic"
"Health and Medicine > Nutritionist"
"Retail > Mobility Store"
"Business and Professional Services > Renewable Energy Service"
"Landmarks and Outdoors > Hill"
"Health and Medicine > Hospice"
"Business and Professional Services > Machine Shop"
"Landmarks and Outdoors > Memorial Site"
"Landmarks and Outdoors > Cave"
"Travel and Transportation > Truck Stop"
"Business and Professional Services > Logging Service"
"Landmarks and Outdoors > Waterfront"
"Sports and Recreation > Skydiving Center"
"Arts and Entertainment > Disc Golf"
"Business and Professional Services > Promotional Item Service"
"Business and Professional Services > Lottery Retailer"
]
def compute_features(candidate_point, radius=0.005):
lat, lon = candidate_point
# Banks
bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
print("[BANK]", bank_idxs)
n_banks = len(bank_idxs)
if n_banks > 0:
neighbors = df_banks.iloc[bank_idxs]
mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
else:
mean_dist_banks = radius
min_dist_bank = radius
# Amenities
amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
amenities = df_amenities.iloc[amenity_idxs]
total_amenities = len(amenities)
# Flatten all category IDs
# for cats in amenities['fsq_category_labels']:
all_category_ids = [">".join(cats[0].split(">")[:2]).strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
category_diversity = len(set(all_category_ids))
features = {
'num_banks_in_radius': n_banks,
# 'mean_dist_banks': mean_dist_banks,
# 'min_dist_bank': min_dist_bank,
'total_amenities': total_amenities,
'category_diversity': category_diversity
}
# Count occurrences per category
print("[CATEGORIES]", all_category_ids)
count_per_category = Counter(all_category_ids)
for feat in DATASET_COLUMNS:
print("[FEAT]",feat)
# for cat, cnt in count_per_category.items():
features[f'num_{feat}'] = count_per_category.get(feat, 0)
# # Count occurrences of first category
# first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
# count_first_category = Counter(first_categories)
# for cat, cnt in count_first_category.items():
# features[f'num_first_{cat}'] = cnt
return features