""" Admission processing utilities """ import json import numpy as np from utils.common import track_event def update_null_stay(df): """ Calculate length of stay based on ADM/DISDATE for null STAY values -------- :param df: pandas dataframe to be updated :return: updated dataframe """ # Check for nulls is_null = df.STAY.isnull() # If null calculate total length of stay if sum(is_null) > 0: null_stay = np.where(is_null) for i in null_stay: stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item() df.loc[i, 'STAY'] = float(stay.days) return df def calculate_total_stay(df): """ Convert admissions with same ADMDATE as previous DISDATE to single admission where patient has been transferred between departments -------- :param df: pandas dataframe to be updated :return: updated dataframe """ df.reset_index(inplace=True, drop=True) rows_to_drop = [] # If ADMDATE matches previous DISDATE, mark as transfer and combine df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift()) for index, row in df.iloc[1:].iterrows(): if row.transfer is True: df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY rows_to_drop.append(index - 1) # Drop original individual rows in transfer df.drop(rows_to_drop, inplace=True) # Drop tracking column df.drop('transfer', axis=1, inplace=True) return df def convert_ethgrp_desc(eth): """ Find ethnic group based on given ETHGRP string -------- :param eth: str ethnic group description in the style of SMR01 data :return: string ethnicity """ if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth): return "White" elif eth.startswith("British"): return "White" elif "mixed" in eth: return "Mixed" elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth): return "Asian" elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth): return "Black" elif ("Arab" in eth) | ("other ethnic" in eth): return "Other" elif "Refused" in eth: return "Refused" else: return "Unknown" def mode_ethnicity(v, eth_col): """ Select the most commonly occuring ethnicity for each patient in groupby -------- :param v: pandas patient dataframe to be updated :param eth_col: str ethnicity column :return: updated subset of data with common ethnicity per ID """ eth = v[eth_col] n = eth.nunique() has_unk = eth.str.contains('Unknown') any_unk = any(has_unk) wout_unk = has_unk.apply(lambda x: x is False) has_ref = eth.str.contains('Refused') any_ref = any(has_ref) wout_ref = has_ref.apply(lambda x: x is False) # Select ethnicities excluding 'Unknown' or 'Refused' where possible if any_unk & any_ref & (n > 2): eth = eth[wout_unk & wout_ref] elif any_unk & (n > 1): eth = eth[wout_unk] elif any_ref & (n > 1): eth = eth[wout_ref] # Select the most commonly appearing ethnicity main_eth = eth.mode().values[0] v[eth_col] = main_eth return v def search_diag(df, typ): """ Search diagnosis columns for descriptions indicative of copd or resp events -------- :param df: dataframe to search :param typ: 'copd', 'resp' or 'anxiety_depression' :return: dataframe with column added tracking specific type of admission """ # Columns to search diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', 'DIAG5Desc', 'DIAG6Desc'] # Load mappings copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json')) # Select mappings relevant to desired type of admission desc = copd_resp_desc[typ] # copd descriptions will only require searching a single specific phrase single = typ == 'copd' # Search columns and track df[typ + '_event'] = df[diag_cols].apply( lambda x: track_event(x, desc, single)).any(axis=1).astype(int) return df