File size: 19,244 Bytes
99733d4 6826990 99733d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 |
from collections import Counter
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
df_amenities = pd.read_csv("df_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
columns={"latitude":"lat", "longitude":"lon"}
)
df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
lambda x: eval(x)
)
bank_coords = df_banks[['lat','lon']].values
tree_banks = cKDTree(bank_coords)
amenity_coords = df_amenities[['lat','lon']].values
tree_amenities = cKDTree(amenity_coords)
# DATASET_COLUMNS = [
# 'Dining and Drinking', 'Community and Government', 'Retail',
# 'Business and Professional Services', 'Landmarks and Outdoors',
# 'Arts and Entertainment', 'Health and Medicine',
# 'Travel and Transportation', 'Sports and Recreation',
# 'Event'
# ]
DATASET_COLUMNS = [
"Community and Government > Education",
"Landmarks and Outdoors > Structure",
"Arts and Entertainment > Art Gallery",
"Arts and Entertainment > Movie Theater",
"Retail > Department Store",
"Business and Professional Services > Office",
"Travel and Transportation > Road",
"Dining and Drinking > Restaurant",
"Community and Government > Residential Building",
"Dining and Drinking > Breakfast Spot",
"Business and Professional Services",
"Arts and Entertainment > Amusement Park",
"Travel and Transportation > Lodging",
"Arts and Entertainment",
"Dining and Drinking > Food Truck",
"Dining and Drinking > Bar",
"Retail > Sporting Goods Retail",
"Retail > Computers and Electronics Retail",
"Arts and Entertainment > Arcade",
"Dining and Drinking > Cafe, Coffee, and Tea House",
"Travel and Transportation > Transport Hub",
"Health and Medicine > Physician",
"Community and Government > Government Building",
"Business and Professional Services > Convention Center",
"Arts and Entertainment > Performing Arts Venue",
"Landmarks and Outdoors > Field",
"Business and Professional Services > Financial Service",
"Landmarks and Outdoors > Park",
"Sports and Recreation > Water Sports",
"Landmarks and Outdoors > Other Great Outdoors",
"Event > Convention",
"Retail > Shopping Mall",
"Business and Professional Services > Distribution Center",
"Business and Professional Services > Automotive Service",
"Health and Medicine > Hospital",
"Dining and Drinking > Snack Place",
"Business and Professional Services > Event Space",
"Dining and Drinking > Food Court",
"Travel and Transportation > Bike Rental",
"Travel and Transportation > Parking",
"Business and Professional Services > Radio Station",
"Health and Medicine > Dentist",
"Landmarks and Outdoors > Beach",
"Retail > Flea Market",
"Retail > Fashion Retail",
"Retail > Food and Beverage Retail",
"Retail > Office Supply Store",
"Community and Government > Spiritual Center",
"Health and Medicine > Medical Center",
"Dining and Drinking > Bakery",
"Dining and Drinking > Cafeteria",
"Retail > Convenience Store",
"Arts and Entertainment > Public Art",
"Retail > Newsstand",
"Retail > Furniture and Home Store",
"Business and Professional Services > Auditorium",
"Landmarks and Outdoors > Garden",
"Community and Government > Library",
"Community and Government > Organization",
"Business and Professional Services > Health and Beauty Service",
"Dining and Drinking > Food Stand",
"Retail > Pharmacy",
"Retail > Record Store",
"Arts and Entertainment > Night Club",
"Landmarks and Outdoors > Farm",
"Community and Government > Social Club",
"Sports and Recreation > Gym and Studio",
"Sports and Recreation > Racquet Sports",
"Retail > Costume Store",
"Sports and Recreation > Soccer",
"Event > Entertainment Event",
"Retail > Market",
"Sports and Recreation",
"Community and Government > Town Hall",
"Arts and Entertainment > Water Park",
"Landmarks and Outdoors",
"Business and Professional Services > Shoe Repair Service",
"Retail > Automotive Retail",
"Dining and Drinking > Dessert Shop",
"Retail > Music Store",
"Business and Professional Services > Factory",
"Community and Government > Assisted Living",
"Travel and Transportation > Transportation Service",
"Health and Medicine > Medical Lab",
"Retail",
"Travel and Transportation > Rest Area",
"Landmarks and Outdoors > Hiking Trail",
"Sports and Recreation > Martial Arts Dojo",
"Business and Professional Services > Laundry Service",
"Retail > Pet Supplies Store",
"Business and Professional Services > Design Studio",
"Community and Government > Housing Development",
"Business and Professional Services > Construction",
"Business and Professional Services > Real Estate Service",
"Landmarks and Outdoors > Historic and Protected Site",
"Business and Professional Services > Photography Service",
"Landmarks and Outdoors > Plaza",
"Business and Professional Services > Child Care Service",
"Event > Other Event",
"Arts and Entertainment > Comedy Club",
"Arts and Entertainment > Strip Club",
"Arts and Entertainment > Casino",
"Business and Professional Services > Legal Service",
"Retail > Miscellaneous Store",
"Travel and Transportation",
"Retail > Boutique",
"Sports and Recreation > Race Track",
"Event > Marketplace",
"Sports and Recreation > Baseball",
"Retail > Arts and Crafts Store",
"Travel and Transportation > Moving Target",
"Community and Government > Cemetery",
"Business and Professional Services > Wholesaler",
"Business and Professional Services > Advertising Agency",
"Arts and Entertainment > Stadium",
"Arts and Entertainment > Country Dance Club",
"Landmarks and Outdoors > Bridge",
"Health and Medicine > Alternative Medicine Clinic",
"Arts and Entertainment > Zoo",
"Business and Professional Services > Food and Beverage Service",
"Business and Professional Services > Storage Facility",
"Business and Professional Services > Recycling Facility",
"Business and Professional Services > Funeral Home",
"Arts and Entertainment > Bowling Alley",
"Retail > Hardware Store",
"Retail > Stationery Store",
"Retail > Bookstore",
"Retail > Smoke Shop",
"Sports and Recreation > Running and Track",
"Retail > Vape Store",
"Landmarks and Outdoors > Campground",
"Retail > Gift Store",
"Landmarks and Outdoors > Palace",
"Business and Professional Services > Recording Studio",
"Landmarks and Outdoors > States and Municipalities",
"Community and Government > Community Center",
"Travel and Transportation > Fuel Station",
"Retail > Cosmetics Store",
"Retail > Hobby Store",
"Travel and Transportation > Travel Lounge",
"Community and Government > Polling Place",
"Retail > Baby Store",
"Business and Professional Services > Telecommunication Service",
"Health and Medicine > Healthcare Clinic",
"Dining and Drinking > Donut Shop",
"Travel and Transportation > Tourist Information and Service",
"Dining and Drinking > Juice Bar",
"Retail > Antique Store",
"Retail > Toy Store",
"Business and Professional Services > Event Service",
"Landmarks and Outdoors > Monument",
"Sports and Recreation > Snow Sports",
"Travel and Transportation > Travel Agency",
"Landmarks and Outdoors > Stable",
"Arts and Entertainment > Museum",
"Business and Professional Services > Home Improvement Service",
"Retail > Flower Store",
"Business and Professional Services > Technology Business",
"Travel and Transportation > Electric Vehicle Charging Station",
"Business and Professional Services > Business Center",
"Landmarks and Outdoors > Lake",
"Business and Professional Services > Film Studio",
"Dining and Drinking > Vineyard",
"Dining and Drinking > Distillery",
"Business and Professional Services > Warehouse",
"Business and Professional Services > Pet Service",
"Dining and Drinking > Brewery",
"Retail > Eyecare Store",
"Health and Medicine",
"Business and Professional Services > Tailor",
"Retail > Board Store",
"Sports and Recreation > Golf",
"Business and Professional Services > Audiovisual Service",
"Business and Professional Services > Manufacturer",
"Retail > Shopping Plaza",
"Business and Professional Services > Publisher",
"Retail > Print Store",
"Sports and Recreation > Gymnastics",
"Landmarks and Outdoors > Mountain",
"Retail > Perfume Store",
"Arts and Entertainment > Gaming Cafe",
"Travel and Transportation > Boat or Ferry",
"Community and Government > Cultural Center",
"Business and Professional Services > Employment Agency",
"Arts and Entertainment > Psychic and Astrologer",
"Retail > Big Box Store",
"Arts and Entertainment > Pool Hall",
"Community and Government > Animal Shelter",
"Landmarks and Outdoors > Surf Spot",
"Landmarks and Outdoors > Castle",
"Arts and Entertainment > Internet Cafe",
"Health and Medicine > Emergency Service",
"Sports and Recreation > Volleyball Court",
"Business and Professional Services > Security and Safety",
"Sports and Recreation > Basketball",
"Business and Professional Services > Shipping, Freight, and Material Transportation Service",
"Health and Medicine > Veterinarian",
"Health and Medicine > Acupuncture Clinic",
"Dining and Drinking > Creperie",
"Retail > Outlet Store",
"Dining and Drinking > Bagel Shop",
"Landmarks and Outdoors > Scenic Lookout",
"Dining and Drinking > Winery",
"Retail > Garden Center",
"Business and Professional Services > Import and Export Service",
"Sports and Recreation > Skating",
"Health and Medicine > Physical Therapy Clinic",
"Landmarks and Outdoors > Botanical Garden",
"Travel and Transportation > Train",
"Retail > Warehouse or Wholesale Store",
"Retail > Luggage Store",
"Business and Professional Services > TV Station",
"Retail > Pop-Up Store",
"Business and Professional Services > Entertainment Service",
"Landmarks and Outdoors > Sculpture Garden",
"Landmarks and Outdoors > Harbor or Marina",
"Health and Medicine > Home Health Care Service",
"Retail > Adult Store",
"Health and Medicine > Chiropractor",
"Health and Medicine > Mental Health Service",
"Business and Professional Services > Art Studio",
"Business and Professional Services > Metals Supplier",
"Landmarks and Outdoors > Island",
"Travel and Transportation > RV Park",
"Landmarks and Outdoors > Lighthouse",
"Retail > Construction Supplies Store",
"Business and Professional Services > Plastics Supplier",
"Retail > Video Store",
"Event > Conference",
"Retail > Tobacco Store",
"Business and Professional Services > Agriculture and Forestry Service",
"Landmarks and Outdoors > River",
"Business and Professional Services > Chemicals and Gasses Manufacturer",
"Business and Professional Services > Repair Service",
"Business and Professional Services > Rubber Supplier",
"Landmarks and Outdoors > Roof Deck",
"Retail > Vintage and Thrift Store",
"Sports and Recreation > Fishing Area",
"Sports and Recreation > Sports Club",
"Business and Professional Services > Business Service",
"Business and Professional Services > Media Agency",
"Landmarks and Outdoors > Hot Spring",
"Landmarks and Outdoors > Well",
"Business and Professional Services > Insurance Agency",
"Community and Government > Summer Camp",
"Landmarks and Outdoors > Bay",
"Business and Professional Services > Industrial Equipment Supplier",
"Retail > Comic Book Store",
"Travel and Transportation > Toll Booth",
"Dining and Drinking",
"Arts and Entertainment > Fair",
"Retail > Souvenir Store",
"Sports and Recreation > Paintball Field",
"Retail > Drugstore",
"Arts and Entertainment > Exhibit",
"Retail > Supplement Store",
"Retail > Outdoor Supply Store",
"Arts and Entertainment > Escape Room",
"Health and Medicine > Optometrist",
"Business and Professional Services > Engineer",
"Sports and Recreation > Indoor Play Area",
"Retail > Leather Goods Store",
"Community and Government > Public Bathroom",
"Business and Professional Services > Electrical Equipment Supplier",
"Travel and Transportation > Baggage Locker",
"Arts and Entertainment > Go Kart Track",
"Arts and Entertainment > Circus",
"Sports and Recreation > Hockey",
"Landmarks and Outdoors > Forest",
"Business and Professional Services > Computer Repair Service",
"Business and Professional Services > Waste Management Service",
"Travel and Transportation > Platform",
"Retail > Textiles Store",
"Business and Professional Services > Scientific Equipment Supplier",
"Landmarks and Outdoors > Pedestrian Plaza",
"Community and Government > Utility Company",
"Community and Government > Public and Social Service",
"Business and Professional Services > Petroleum Supplier",
"Business and Professional Services > Wedding Hall",
"Landmarks and Outdoors > Nature Preserve",
"Business and Professional Services > Ballroom",
"Community and Government > Prison",
"Sports and Recreation > Recreation Center",
"Sports and Recreation > Gun Range",
"Dining and Drinking > Smoothie Shop",
"Dining and Drinking > Night Market",
"Retail > Discount Store",
"Business and Professional Services > Welding Service",
"Travel and Transportation > Pier",
"Landmarks and Outdoors > Bathing Area",
"Business and Professional Services > Print, TV, Radio and Outdoor Advertising Service",
"Business and Professional Services > Online Advertising Service",
"Arts and Entertainment > Aquarium",
"Arts and Entertainment > Roller Rink",
"Community and Government > Trailer Park",
"Business and Professional Services > Paper Supplier",
"Retail > Framing Store",
"Landmarks and Outdoors > Tunnel",
"Health and Medicine > Urgent Care Center",
"Community and Government > Rehabilitation Center",
"Landmarks and Outdoors > Fountain",
"Arts and Entertainment > Planetarium",
"Sports and Recreation > Cricket Ground",
"Landmarks and Outdoors > Volcano",
"Business and Professional Services > Research Laboratory",
"Business and Professional Services > Equipment Rental Service",
"Community and Government",
"Retail > Medical Supply Store",
"Landmarks and Outdoors > Bike Trail",
"Business and Professional Services > Outdoor Event Space",
"Sports and Recreation > Rugby",
"Business and Professional Services > Laboratory",
"Business and Professional Services > Water Treatment Service",
"Business and Professional Services > Entertainment Agency",
"Retail > Pawn Shop",
"Arts and Entertainment > Salsa Club",
"Landmarks and Outdoors > Tree",
"Travel and Transportation > Toll Plaza",
"Travel and Transportation > Port",
"Landmarks and Outdoors > Rock Climbing Spot",
"Business and Professional Services > Creative Service",
"Business and Professional Services > Research Station",
"Business and Professional Services > Refrigeration and Ice Supplier",
"Business and Professional Services > Rental Service",
"Travel and Transportation > Border Crossing",
"Business and Professional Services > Industrial Estate",
"Business and Professional Services > Tutoring Service",
"Business and Professional Services > Laundromat",
"Arts and Entertainment > Mini Golf Course",
"Retail > Packaging Supply Store",
"Business and Professional Services > Translation Service",
"Retail > Duty-free Store",
"Community and Government > Observatory",
"Retail > Knitting Store",
"Travel and Transportation > Cruise",
"Business and Professional Services > Management Consultant",
"Retail > Betting Shop",
"Retail > Outlet Mall",
"Retail > Auction House",
"Travel and Transportation > Cable Car",
"Business and Professional Services > Power Plant",
"Landmarks and Outdoors > Dive Spot",
"Health and Medicine > Maternity Clinic",
"Health and Medicine > Women's Health Clinic",
"Health and Medicine > Nutritionist",
"Retail > Mobility Store",
"Business and Professional Services > Renewable Energy Service",
"Landmarks and Outdoors > Hill",
"Health and Medicine > Hospice",
"Business and Professional Services > Machine Shop",
"Landmarks and Outdoors > Memorial Site",
"Landmarks and Outdoors > Cave",
"Travel and Transportation > Truck Stop",
"Business and Professional Services > Logging Service",
"Landmarks and Outdoors > Waterfront",
"Sports and Recreation > Skydiving Center",
"Arts and Entertainment > Disc Golf",
"Business and Professional Services > Promotional Item Service",
"Business and Professional Services > Lottery Retailer",
]
def compute_features(candidate_point, radius=0.005):
lat, lon = candidate_point
# Banks
bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
print("[BANK]", bank_idxs)
n_banks = len(bank_idxs)
if n_banks > 0:
neighbors = df_banks.iloc[bank_idxs]
mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
else:
mean_dist_banks = radius
min_dist_bank = radius
# Amenities
amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
amenities = df_amenities.iloc[amenity_idxs]
total_amenities = len(amenities)
# Flatten all category IDs
# for cats in amenities['fsq_category_labels']:
all_category_ids = [">".join(cats[0].split(">")[:2]).strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
category_diversity = len(set(all_category_ids))
features = {
'num_banks_in_radius': n_banks,
# 'mean_dist_banks': mean_dist_banks,
# 'min_dist_bank': min_dist_bank,
'total_amenities': total_amenities,
'category_diversity': category_diversity
}
# Count occurrences per category
print("[CATEGORIES]", all_category_ids)
count_per_category = Counter(all_category_ids)
for feat in DATASET_COLUMNS:
print("[FEAT]",feat)
# for cat, cnt in count_per_category.items():
features[f'num_{feat}'] = count_per_category.get(feat, 0)
# # Count occurrences of first category
# first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
# count_first_category = Counter(first_categories)
# for cat, cnt in count_first_category.items():
# features[f'num_first_{cat}'] = cnt
return features |