File size: 1,696 Bytes
1c3d5f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from collections import Counter
import pandas as pd
import numpy as np


def compute_features(candidate_point, radius=0.005):
    lat, lon = candidate_point

    # Banks
    bank_idxs = tree_banks.query_ball_point([lat, lon], r=radius)
    n_banks = len(bank_idxs)
    if n_banks > 0:
        neighbors = df_banks.iloc[bank_idxs]
        mean_dist_banks = np.mean(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
        min_dist_bank = np.min(np.sqrt((neighbors['lat']-lat)**2 + (neighbors['lon']-lon)**2))
    else:
        mean_dist_banks = radius
        min_dist_bank = radius

    # Amenities
    amenity_idxs = tree_amenities.query_ball_point([lat, lon], r=radius)
    amenities = df_amenities.iloc[amenity_idxs]

    total_amenities = len(amenities)

    # Flatten all category IDs
    all_category_ids = [cats[0].split(">")[0].strip() for cats in amenities['fsq_category_labels'] if len(cats)>0]
    category_diversity = len(set(all_category_ids))

    features = {
        'num_banks_in_radius': n_banks,
        'mean_dist_banks': mean_dist_banks,
        'min_dist_bank': min_dist_bank,
        'total_amenities': total_amenities,
        'category_diversity': category_diversity
    }

    # Count occurrences per category
    count_per_category = Counter(all_category_ids)
    for cat, cnt in count_per_category.items():
        features[f'num_{cat}'] = cnt

    # # Count occurrences of first category
    # first_categories = [cats[0] for cats in amenities['fsq_category_ids'] if len(cats)>0]
    # count_first_category = Counter(first_categories)
    # for cat, cnt in count_first_category.items():
    #     features[f'num_first_{cat}'] = cnt

    return features