import pandas as pd from scipy.stats import yeojohnson atnd_adj = 1.1 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values ppr_adj = 1.2 # adjustment rate is based on the average estimate/actual value - usually volunteers put bigger values current_to_2026_relative_inflation = 1. # must be updated with January value yearly def compute_ppp_rpp(country_code, state_code, ppp_df, rpp_df): if country_code == 'US': return rpp_df.loc[state_code].values[0] - 1 return ppp_df.loc[country_code].values[0] - 1 def build_feature_row(inputs, ppp_rpp): return pd.DataFrame([{ 'act_atnd_tot_atnd_num': inputs['atnd'] / atnd_adj, 'exh_num': inputs['exh'], 'act_paprs_num': inputs['ppr'] / ppr_adj, 'longevity': inputs['duration'] - 1, 'ppp_rpp': ppp_rpp, 'relative_inflation': current_to_2026_relative_inflation, 'academic_venue': float(inputs['academic']), 'us_flag': float(inputs['country'] == 'US'), 'conf_evnt_typ_nm_Workshop': float(inputs['event'] == "Workshop"), 'conf_evnt_typ_nm_Conference': float(inputs['event'] == "Conference"), 'conf_evnt_typ_nm_Other': float(inputs['event'] == "Other"), 'imag': float(inputs['imag']), 'autom': float(inputs['autom']), 'wireless': float(inputs['wireless']), 'embedded system': float(inputs['embedded_system']) }]) def apply_yeojohnson(df, lambdas): for col in list(lambdas.keys())[1:]: df[col] = yeojohnson(df[col], lambdas[col]) return df