Spaces:
Runtime error
Runtime error
Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import Counter
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def compute_features(candidate_point, radius=0.005):
|
| 7 |
+
lat, lon = candidate_point
|
| 8 |
+
|
| 9 |
+
# Banks
|
| 10 |
+
bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
|
| 11 |
+
n_banks = len(bank_idxs)
|
| 12 |
+
if n_banks > 0:
|
| 13 |
+
neighbors = df_banks.iloc[bank_idxs]
|
| 14 |
+
mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
|
| 15 |
+
min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
|
| 16 |
+
else:
|
| 17 |
+
mean_dist_banks = radius
|
| 18 |
+
min_dist_bank = radius
|
| 19 |
+
|
| 20 |
+
# Amenities
|
| 21 |
+
amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
|
| 22 |
+
amenities = df_amenities.iloc[amenity_idxs]
|
| 23 |
+
|
| 24 |
+
total_amenities = len(amenities)
|
| 25 |
+
|
| 26 |
+
# Flatten all category IDs
|
| 27 |
+
all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
|
| 28 |
+
category_diversity = len(set(all_category_ids))
|
| 29 |
+
|
| 30 |
+
features = {
|
| 31 |
+
'num_banks_in_radius': n_banks,
|
| 32 |
+
'mean_dist_banks': mean_dist_banks,
|
| 33 |
+
'min_dist_bank': min_dist_bank,
|
| 34 |
+
'total_amenities': total_amenities,
|
| 35 |
+
'category_diversity': category_diversity
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
# Count occurrences per category
|
| 39 |
+
count_per_category = Counter(all_category_ids)
|
| 40 |
+
for cat, cnt in count_per_category.items():
|
| 41 |
+
features[f'num_{cat}'] = cnt
|
| 42 |
+
|
| 43 |
+
# # Count occurrences of first category
|
| 44 |
+
# first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
|
| 45 |
+
# count_first_category = Counter(first_categories)
|
| 46 |
+
# for cat, cnt in count_first_category.items():
|
| 47 |
+
# features[f'num_first_{cat}'] = cnt
|
| 48 |
+
|
| 49 |
+
return features
|