""" Admission reduction utilities """ import pandas as pd from datetime import date def fill_missing_years(df): """ Add admission data from years where patient is missing from the dataset -------- :param df: dataframe to be updated :return: dataframe with missing years added """ df = df.sort_values('ADMDATE') year_col = df.eoy.dt.year.tolist() end_month = df.eoy.dt.month.iloc[0] end_day = df.eoy.dt.day.iloc[0] # We only want missing years year_range = range(year_col[0] + 1, year_col[-1]) years = [y for y in year_range if not (y in year_col)] # If any years missing add rows if len(years) > 0: sh_id = df.SafeHavenID.iloc[0] eth_grp = df.eth_grp.iloc[0] adm_dates = pd.to_datetime([date(y, end_month, end_day) for y in years]) data = {'SafeHavenID': sh_id, 'eth_grp': eth_grp, 'ADMDATE': adm_dates, 'STAY': 0, 'copd_event': 0, 'resp_event': 0, 'eoy': adm_dates, 'adm': 0, 'anxiety_depression_event': 0} missed_years = pd.DataFrame(data) df = pd.concat([df, missed_years]).sort_values('ADMDATE') return df def calc_adm_per_year(df): """ Reduce data to 1 row per year -------- :param df: dataframe to reduced :return: reduced dataframe """ # Last EOY columns eoy_cols = ['eth_grp', 'days_since_copd', 'days_since_resp', 'days_since_adm', 'adm_to_date', 'copd_to_date', 'resp_to_date', 'anxiety_depression_to_date', 'copd_date', 'resp_date', 'adm_date'] last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last() # Average column los = df.groupby(['SafeHavenID', 'eoy'])[['STAY']].mean() los.columns = ['mean_los'] # Total columns sum_cols = ['adm', 'copd_event', 'resp_event', 'anxiety_depression_event', 'STAY'] total_cols = ['adm_per_year', 'copd_per_year', 'resp_per_year', 'anxiety_depression_per_year', 'total_hosp_days'] total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum() total.columns = total_cols # Join together results = last.join(los).join(total) return results