Spaces:
Runtime error
Runtime error
| from collections import Counter | |
| import pandas as pd | |
| import numpy as np | |
| from scipy.spatial import cKDTree | |
| df_amenities = pd.read_csv("df_amenities.csv") | |
| df_banks = pd.read_csv("df_banks.csv") | |
| df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply( | |
| lambda x: eval(x) | |
| ) | |
| bank_coords = df_banks[['lat','lon']].values | |
| tree_banks = cKDTree(bank_coords) | |
| amenity_coords = df_amenities[['lat','lon']].values | |
| tree_amenities = cKDTree(amenity_coords) | |
| DATASET_COLUMNS = [ | |
| 'Dining and Drinking', 'Community and Government', 'Retail', | |
| 'Business and Professional Services', 'Landmarks and Outdoors', | |
| 'Arts and Entertainment', 'Health and Medicine', | |
| 'Travel and Transportation', 'Sports and Recreation', | |
| 'Event' | |
| ] | |
| def compute_features(candidate_point, radius=0.005): | |
| lat, lon = candidate_point | |
| # Banks | |
| bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius) | |
| print("[BANK]", bank_idxs) | |
| n_banks = len(bank_idxs) | |
| if n_banks > 0: | |
| neighbors = df_banks.iloc[bank_idxs] | |
| mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) | |
| min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2)) | |
| else: | |
| mean_dist_banks = radius | |
| min_dist_bank = radius | |
| # Amenities | |
| amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius) | |
| amenities = df_amenities.iloc[amenity_idxs] | |
| total_amenities = len(amenities) | |
| # Flatten all category IDs | |
| all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0] | |
| category_diversity = len(set(all_category_ids)) | |
| features = { | |
| 'num_banks_in_radius': n_banks, | |
| # 'mean_dist_banks': mean_dist_banks, | |
| # 'min_dist_bank': min_dist_bank, | |
| 'total_amenities': total_amenities, | |
| 'category_diversity': category_diversity | |
| } | |
| # Count occurrences per category | |
| count_per_category = Counter(all_category_ids) | |
| for feat in DATASET_COLUMNS: | |
| # for cat, cnt in count_per_category.items(): | |
| features[f'num_{feat}'] = count_per_category.get(feat, 0) | |
| # # Count occurrences of first category | |
| # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0] | |
| # count_first_category = Counter(first_categories) | |
| # for cat, cnt in count_first_category.items(): | |
| # features[f'num_first_{cat}'] = cnt | |
| return features |