File size: 2,168 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | """
Admission reduction utilities
"""
import pandas as pd
from datetime import date
def fill_missing_years(df):
"""
Add admission data from years where patient is missing from the dataset
--------
:param df: dataframe to be updated
:return: dataframe with missing years added
"""
df = df.sort_values('ADMDATE')
year_col = df.eoy.dt.year.tolist()
end_month = df.eoy.dt.month.iloc[0]
end_day = df.eoy.dt.day.iloc[0]
# We only want missing years
year_range = range(year_col[0] + 1, year_col[-1])
years = [y for y in year_range if not (y in year_col)]
# If any years missing add rows
if len(years) > 0:
sh_id = df.SafeHavenID.iloc[0]
eth_grp = df.eth_grp.iloc[0]
adm_dates = pd.to_datetime([date(y, end_month, end_day) for y in years])
data = {'SafeHavenID': sh_id, 'eth_grp': eth_grp, 'ADMDATE': adm_dates,
'STAY': 0, 'copd_event': 0, 'resp_event': 0, 'eoy': adm_dates,
'adm': 0, 'anxiety_depression_event': 0}
missed_years = pd.DataFrame(data)
df = pd.concat([df, missed_years]).sort_values('ADMDATE')
return df
def calc_adm_per_year(df):
"""
Reduce data to 1 row per year
--------
:param df: dataframe to reduced
:return: reduced dataframe
"""
# Last EOY columns
eoy_cols = ['eth_grp', 'days_since_copd', 'days_since_resp', 'days_since_adm',
'adm_to_date', 'copd_to_date', 'resp_to_date',
'anxiety_depression_to_date', 'copd_date', 'resp_date', 'adm_date']
last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last()
# Average column
los = df.groupby(['SafeHavenID', 'eoy'])[['STAY']].mean()
los.columns = ['mean_los']
# Total columns
sum_cols = ['adm', 'copd_event', 'resp_event', 'anxiety_depression_event', 'STAY']
total_cols = ['adm_per_year', 'copd_per_year', 'resp_per_year',
'anxiety_depression_per_year', 'total_hosp_days']
total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum()
total.columns = total_cols
# Join together
results = last.join(los).join(total)
return results |