Spaces:
Running
Running
| import pandas as pd | |
| from scipy.stats import yeojohnson | |
| atnd_adj = 1.1 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values | |
| ppr_adj = 1.2 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values | |
| current_to_2026_relative_inflation = 1. # must be updated with January value yearly | |
| def compute_ppp_rpp(country_code, state_code, ppp_df, rpp_df): | |
| if country_code == 'US': | |
| return rpp_df.loc[state_code].values[0] - 1 | |
| return ppp_df.loc[country_code].values[0] - 1 | |
| def build_feature_row(inputs, ppp_rpp): | |
| return pd.DataFrame([{ | |
| 'act_atnd_tot_atnd_num': inputs['atnd'] / atnd_adj, | |
| 'exh_num': inputs['exh'], | |
| 'act_paprs_num': inputs['ppr'] / ppr_adj, | |
| 'longevity': inputs['duration'] - 1, | |
| 'ppp_rpp': ppp_rpp, | |
| 'relative_inflation': current_to_2026_relative_inflation, | |
| 'academic_venue': float(inputs['academic']), | |
| 'us_flag': float(inputs['country'] == 'US'), | |
| 'conf_evnt_typ_nm_Workshop': float(inputs['event'] == "Workshop"), | |
| 'conf_evnt_typ_nm_Conference': float(inputs['event'] == "Conference"), | |
| 'conf_evnt_typ_nm_Other': float(inputs['event'] == "Other"), | |
| 'imag': float(inputs['imag']), | |
| 'autom': float(inputs['autom']), | |
| 'wireless': float(inputs['wireless']), | |
| 'embedded system': float(inputs['embedded_system']) | |
| }]) | |
| def apply_yeojohnson(df, lambdas): | |
| for col in list(lambdas.keys())[1:]: | |
| df[col] = yeojohnson(df[col], lambdas[col]) | |
| return df |