Spaces:
Running
Running
File size: 1,627 Bytes
f4642b6 29d63ce f4642b6 29d63ce f4642b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import pandas as pd
from scipy.stats import yeojohnson
atnd_adj = 1.1 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values
ppr_adj = 1.2 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values
current_to_2026_relative_inflation = 1. # must be updated with January value yearly
def compute_ppp_rpp(country_code, state_code, ppp_df, rpp_df):
if country_code == 'US':
return rpp_df.loc[state_code].values[0] - 1
return ppp_df.loc[country_code].values[0] - 1
def build_feature_row(inputs, ppp_rpp):
return pd.DataFrame([{
'act_atnd_tot_atnd_num': inputs['atnd'] / atnd_adj,
'exh_num': inputs['exh'],
'act_paprs_num': inputs['ppr'] / ppr_adj,
'longevity': inputs['duration'] - 1,
'ppp_rpp': ppp_rpp,
'relative_inflation': current_to_2026_relative_inflation,
'academic_venue': float(inputs['academic']),
'us_flag': float(inputs['country'] == 'US'),
'conf_evnt_typ_nm_Workshop': float(inputs['event'] == "Workshop"),
'conf_evnt_typ_nm_Conference': float(inputs['event'] == "Conference"),
'conf_evnt_typ_nm_Other': float(inputs['event'] == "Other"),
'imag': float(inputs['imag']),
'autom': float(inputs['autom']),
'wireless': float(inputs['wireless']),
'embedded system': float(inputs['embedded_system'])
}])
def apply_yeojohnson(df, lambdas):
for col in list(lambdas.keys())[1:]:
df[col] = yeojohnson(df[col], lambdas[col])
return df |