File size: 1,627 Bytes
f4642b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29d63ce
 
f4642b6
29d63ce
 
f4642b6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
from scipy.stats import yeojohnson

atnd_adj = 1.1 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values 
ppr_adj = 1.2 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values 
current_to_2026_relative_inflation = 1. # must be updated with January value yearly

def compute_ppp_rpp(country_code, state_code, ppp_df, rpp_df):
    if country_code == 'US':
        return rpp_df.loc[state_code].values[0] - 1
    return ppp_df.loc[country_code].values[0] - 1


def build_feature_row(inputs, ppp_rpp):
    return pd.DataFrame([{
        'act_atnd_tot_atnd_num': inputs['atnd'] / atnd_adj,
        'exh_num': inputs['exh'],
        'act_paprs_num': inputs['ppr'] / ppr_adj,
        'longevity': inputs['duration'] - 1,
        'ppp_rpp': ppp_rpp,
        'relative_inflation': current_to_2026_relative_inflation,
        'academic_venue': float(inputs['academic']),
        'us_flag': float(inputs['country'] == 'US'),
        'conf_evnt_typ_nm_Workshop': float(inputs['event'] == "Workshop"),
        'conf_evnt_typ_nm_Conference': float(inputs['event'] == "Conference"),
        'conf_evnt_typ_nm_Other': float(inputs['event'] == "Other"),
        'imag': float(inputs['imag']),
        'autom': float(inputs['autom']),
        'wireless': float(inputs['wireless']),
        'embedded system': float(inputs['embedded_system'])
    }])


def apply_yeojohnson(df, lambdas):
    for col in list(lambdas.keys())[1:]:
        df[col] = yeojohnson(df[col], lambdas[col])
    return df