jonathanjordan21 commited on
Commit
1c3d5f7
·
verified ·
1 Parent(s): d473063

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +49 -0
utils.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import Counter
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+ def compute_features(candidate_point, radius=0.005):
7
+ lat, lon = candidate_point
8
+
9
+ # Banks
10
+ bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
11
+ n_banks = len(bank_idxs)
12
+ if n_banks > 0:
13
+ neighbors = df_banks.iloc[bank_idxs]
14
+ mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
15
+ min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
16
+ else:
17
+ mean_dist_banks = radius
18
+ min_dist_bank = radius
19
+
20
+ # Amenities
21
+ amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
22
+ amenities = df_amenities.iloc[amenity_idxs]
23
+
24
+ total_amenities = len(amenities)
25
+
26
+ # Flatten all category IDs
27
+ all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
28
+ category_diversity = len(set(all_category_ids))
29
+
30
+ features = {
31
+ 'num_banks_in_radius': n_banks,
32
+ 'mean_dist_banks': mean_dist_banks,
33
+ 'min_dist_bank': min_dist_bank,
34
+ 'total_amenities': total_amenities,
35
+ 'category_diversity': category_diversity
36
+ }
37
+
38
+ # Count occurrences per category
39
+ count_per_category = Counter(all_category_ids)
40
+ for cat, cnt in count_per_category.items():
41
+ features[f'num_{cat}'] = cnt
42
+
43
+ # # Count occurrences of first category
44
+ # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
45
+ # count_first_category = Counter(first_categories)
46
+ # for cat, cnt in count_first_category.items():
47
+ # features[f'num_first_{cat}'] = cnt
48
+
49
+ return features