File size: 2,168 Bytes

53a6def

"""
Admission reduction utilities
"""
import pandas as pd
from datetime import date


def fill_missing_years(df):
    """
    Add admission data from years where patient is missing from the dataset
    --------
    :param df: dataframe to be updated
    :return: dataframe with missing years added
    """
    df = df.sort_values('ADMDATE')
    year_col = df.eoy.dt.year.tolist()
    end_month = df.eoy.dt.month.iloc[0]
    end_day = df.eoy.dt.day.iloc[0]

    # We only want missing years
    year_range = range(year_col[0] + 1, year_col[-1])
    years = [y for y in year_range if not (y in year_col)]

    # If any years missing add rows
    if len(years) > 0:
        sh_id = df.SafeHavenID.iloc[0]
        eth_grp = df.eth_grp.iloc[0]
        adm_dates = pd.to_datetime([date(y, end_month, end_day) for y in years])
        data = {'SafeHavenID': sh_id, 'eth_grp': eth_grp, 'ADMDATE': adm_dates,
                'STAY': 0, 'copd_event': 0, 'resp_event': 0, 'eoy': adm_dates,
                'adm': 0, 'anxiety_depression_event': 0}
        missed_years = pd.DataFrame(data)
        df = pd.concat([df, missed_years]).sort_values('ADMDATE')

    return df


def calc_adm_per_year(df):
    """
    Reduce data to 1 row per year
    --------
    :param df: dataframe to reduced
    :return: reduced dataframe
    """
    # Last EOY columns
    eoy_cols = ['eth_grp', 'days_since_copd', 'days_since_resp', 'days_since_adm',
                'adm_to_date', 'copd_to_date', 'resp_to_date',
                'anxiety_depression_to_date', 'copd_date', 'resp_date', 'adm_date']
    last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last()

    # Average column
    los = df.groupby(['SafeHavenID', 'eoy'])[['STAY']].mean()
    los.columns = ['mean_los']

    # Total columns
    sum_cols = ['adm', 'copd_event', 'resp_event', 'anxiety_depression_event', 'STAY']
    total_cols = ['adm_per_year', 'copd_per_year', 'resp_per_year',
                  'anxiety_depression_per_year', 'total_hosp_days']
    total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum()
    total.columns = total_cols

    # Join together
    results = last.join(los).join(total)

    return results