jonathanjordan21 commited on
Commit
0f96a0a
·
verified ·
1 Parent(s): b9fb926

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +22 -2
utils.py CHANGED
@@ -1,13 +1,31 @@
1
  from collections import Counter
2
  import pandas as pd
3
  import numpy as np
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def compute_features(candidate_point, radius=0.005):
7
  lat, lon = candidate_point
8
 
9
  # Banks
10
  bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
 
11
  n_banks = len(bank_idxs)
12
  if n_banks > 0:
13
  neighbors = df_banks.iloc[bank_idxs]
@@ -37,9 +55,11 @@ def compute_features(candidate_point, radius=0.005):
37
 
38
  # Count occurrences per category
39
  count_per_category = Counter(all_category_ids)
40
- for cat, cnt in count_per_category.items():
41
- features[f'num_{cat}'] = cnt
 
42
 
 
43
  # # Count occurrences of first category
44
  # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
45
  # count_first_category = Counter(first_categories)
 
1
  from collections import Counter
2
  import pandas as pd
3
  import numpy as np
4
+ from scipy.spatial import cKDTree
5
 
6
+ df_banks = pd.read_csv("df_amenities.csv")
7
+ df_banks = pd.read_csv("df_banks.csv")
8
+
9
+ bank_coords = df_banks[['lat','lon']].values
10
+ tree_banks = cKDTree(bank_coords)
11
+
12
+ amenity_coords = df_amenities[['lat','lon']].values
13
+ tree_amenities = cKDTree(amenity_coords)
14
+
15
+ DATASET_COLUMNS = [
16
+ 'num_Dining and Drinking', 'num_Community and Government', 'num_Retail',
17
+ 'num_Business and Professional Services', 'num_Landmarks and Outdoors',
18
+ 'num_Arts and Entertainment', 'num_Health and Medicine',
19
+ 'num_Travel and Transportation', 'num_Sports and Recreation',
20
+ 'num_Event'
21
+ ]
22
 
23
  def compute_features(candidate_point, radius=0.005):
24
  lat, lon = candidate_point
25
 
26
  # Banks
27
  bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
28
+
29
  n_banks = len(bank_idxs)
30
  if n_banks > 0:
31
  neighbors = df_banks.iloc[bank_idxs]
 
55
 
56
  # Count occurrences per category
57
  count_per_category = Counter(all_category_ids)
58
+ for feat in DATASET_COLUMNS:
59
+ # for cat, cnt in count_per_category.items():
60
+ features[f'num_{feat}'] = count_per_category.get(feat, 0)
61
 
62
+
63
  # # Count occurrences of first category
64
  # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
65
  # count_first_category = Counter(first_categories)