jonathanjordan21 commited on
Commit
bede559
·
verified ·
1 Parent(s): 8424804

Update utils2.py

Browse files
Files changed (1) hide show
  1. utils2.py +90 -58
utils2.py CHANGED
@@ -1,25 +1,25 @@
1
  from collections import Counter
2
  import pandas as pd
3
  import numpy as np
4
- from scipy.spatial import cKDTree
5
 
6
 
7
- df_amenities = pd.read_csv("df_indonesia.csv").rename(
8
- columns={"latitude":"lat", "longitude":"lon"}
9
- )
10
- df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
11
- columns={"latitude":"lat", "longitude":"lon"}
12
- )
13
 
14
- df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
15
- lambda x: eval(x)
16
- )
17
 
18
- bank_coords = df_banks[['lat','lon']].values
19
- tree_banks = cKDTree(bank_coords)
20
 
21
- amenity_coords = df_amenities[['lat','lon']].values
22
- tree_amenities = cKDTree(amenity_coords)
23
 
24
  DATASET_COLUMNS = [
25
  'Dining and Drinking', 'Community and Government', 'Retail',
@@ -29,55 +29,87 @@ DATASET_COLUMNS = [
29
  'Event'
30
  ]
31
 
32
- def compute_features(candidate_point, radius=0.005):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  lat, lon = candidate_point
34
 
35
- # Banks
36
- bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
37
-
38
- print("[BANK]", bank_idxs)
39
-
40
- n_banks = len(bank_idxs)
41
- if n_banks > 0:
42
- neighbors = df_banks.iloc[bank_idxs]
43
- mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
44
- min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
45
- else:
46
- mean_dist_banks = radius
47
- min_dist_bank = radius
48
-
49
- # Amenities
50
- amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
51
- amenities = df_amenities.iloc[amenity_idxs]
52
-
53
- total_amenities = len(amenities)
54
-
55
- # Flatten all category IDs
56
- # for cats in amenities['fsq_category_labels']:
57
- all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
58
- category_diversity = len(set(all_category_ids))
59
-
60
  features = {
61
- 'num_banks_in_radius': n_banks,
62
- # 'mean_dist_banks': mean_dist_banks,
63
- # 'min_dist_bank': min_dist_bank,
64
- 'total_amenities': total_amenities,
65
- 'category_diversity': category_diversity
66
  }
67
 
68
- # Count occurrences per category
69
- print("[CATEGORIES]", all_category_ids)
70
- count_per_category = Counter(all_category_ids)
71
- for feat in DATASET_COLUMNS:
72
- print("[FEAT]",feat)
73
- # for cat, cnt in count_per_category.items():
74
- features[f'num_{feat}'] = count_per_category.get(feat, 0)
75
-
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # # Count occurrences of first category
78
- # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
79
- # count_first_category = Counter(first_categories)
80
- # for cat, cnt in count_first_category.items():
81
- # features[f'num_first_{cat}'] = cnt
 
82
 
83
  return features
 
1
  from collections import Counter
2
  import pandas as pd
3
  import numpy as np
4
+ # from scipy.spatial import cKDTree
5
 
6
 
7
+ # df_amenities = pd.read_csv("df_indonesia.csv").rename(
8
+ # columns={"latitude":"lat", "longitude":"lon"}
9
+ # )
10
+ # df_banks = pd.read_csv("df_bank_indonesia.csv").rename(
11
+ # columns={"latitude":"lat", "longitude":"lon"}
12
+ # )
13
 
14
+ # df_amenities["fsq_category_labels"] = df_amenities["fsq_category_labels"].apply(
15
+ # lambda x: eval(x)
16
+ # )
17
 
18
+ # bank_coords = df_banks[['lat','lon']].values
19
+ # tree_banks = cKDTree(bank_coords)
20
 
21
+ # amenity_coords = df_amenities[['lat','lon']].values
22
+ # tree_amenities = cKDTree(amenity_coords)
23
 
24
  DATASET_COLUMNS = [
25
  'Dining and Drinking', 'Community and Government', 'Retail',
 
29
  'Event'
30
  ]
31
 
32
+ import os
33
+ from google.maps import areainsights_v1
34
+ from google.maps.areainsights_v1.types import ComputeInsightsRequest, Filter, LocationFilter, Insight
35
+ from google.type import latlng_pb2
36
+ import asyncio
37
+
38
+
39
+ async def compute_places_count_with_api_key(api_key, lat, lng, radius, place_type):
40
+ try:
41
+ client = areainsights_v1.AreaInsightsAsyncClient(
42
+ client_options={"api_key": api_key}
43
+ )
44
+
45
+ # 1. Define the geographic filter (a circle)
46
+ location_filter = LocationFilter(
47
+ circle=LocationFilter.Circle(
48
+ lat_lng=latlng_pb2.LatLng(latitude=lat, longitude=lng),
49
+ radius=radius
50
+ )
51
+ )
52
+
53
+ # 2. Define the place type filter
54
+ type_filter = areainsights_v1.TypeFilter(
55
+ # included_types=[place_type]
56
+ included_types=place
57
+ )
58
+
59
+ # 3. Assemble the main request body
60
+ request = ComputeInsightsRequest(
61
+ # We want the total count of matching places
62
+ insights=[Insight.INSIGHT_COUNT],
63
+ filter=Filter(
64
+ location_filter=location_filter,
65
+ type_filter=type_filter
66
+ )
67
+ )
68
+
69
+ response = await client.compute_insights(request=request)
70
+
71
+ count = int(response.count)
72
+
73
+ return count
74
+ except Exception as e:
75
+ print(f"An error occurred: {e}")
76
+ return None
77
+
78
+
79
+ def compute_features(candidate_point, api_key, radius=5000):
80
  lat, lon = candidate_point
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  features = {
83
+ 'num_banks_in_radius':0,
84
+ 'total_amenities':0,
85
+ 'category_diversity':0
 
 
86
  }
87
 
88
+ for i,place in enumerate(GOOGLE_PLACE_TYPE_MAPPING):
89
+ total_count = await compute_places_count_with_api_key(
90
+ api_key,
91
+ lat,
92
+ lon,
93
+ radius,
94
+ place
95
+ )
96
+
97
+ features[f'num_{DATASET_COLUMNS[i]}'] = total_count
98
+
99
+
100
+ n_banks = compute_places_count_with_api_key(
101
+ api_key,
102
+ lat,
103
+ lon,
104
+ radius,
105
+ ['atm']
106
+ )
107
 
108
+
109
+ features.update({
110
+ 'num_banks_in_radius': n_banks,
111
+ 'total_amenities': sum(v for v in features.values()),
112
+ 'category_diversity': sum(bool(v) for v in features.values())
113
+ })
114
 
115
  return features