| | """ |
| | Admission processing utilities |
| | """ |
| | import json |
| | import numpy as np |
| | from utils.common import track_event |
| |
|
| |
|
| | def update_null_stay(df): |
| | """ |
| | Calculate length of stay based on ADM/DISDATE for null STAY values |
| | -------- |
| | :param df: pandas dataframe to be updated |
| | :return: updated dataframe |
| | """ |
| | |
| | is_null = df.STAY.isnull() |
| |
|
| | |
| | if sum(is_null) > 0: |
| | null_stay = np.where(is_null) |
| | for i in null_stay: |
| | stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item() |
| | df.loc[i, 'STAY'] = float(stay.days) |
| |
|
| | return df |
| |
|
| |
|
| | def calculate_total_stay(df): |
| | """ |
| | Convert admissions with same ADMDATE as previous DISDATE to single |
| | admission where patient has been transferred between departments |
| | -------- |
| | :param df: pandas dataframe to be updated |
| | :return: updated dataframe |
| | """ |
| | df.reset_index(inplace=True, drop=True) |
| | rows_to_drop = [] |
| |
|
| | |
| | df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift()) |
| | for index, row in df.iloc[1:].iterrows(): |
| | if row.transfer is True: |
| | df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE |
| | df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY |
| | rows_to_drop.append(index - 1) |
| |
|
| | |
| | df.drop(rows_to_drop, inplace=True) |
| |
|
| | |
| | df.drop('transfer', axis=1, inplace=True) |
| |
|
| | return df |
| |
|
| |
|
| | def convert_ethgrp_desc(eth): |
| | """ |
| | Find ethnic group based on given ETHGRP string |
| | -------- |
| | :param eth: str ethnic group description in the style of SMR01 data |
| | :return: string ethnicity |
| | """ |
| | if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth): |
| | return "White" |
| |
|
| | elif eth.startswith("British"): |
| | return "White" |
| |
|
| | elif "mixed" in eth: |
| | return "Mixed" |
| |
|
| | elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth): |
| | return "Asian" |
| |
|
| | elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth): |
| | return "Black" |
| |
|
| | elif ("Arab" in eth) | ("other ethnic" in eth): |
| | return "Other" |
| |
|
| | elif "Refused" in eth: |
| | return "Refused" |
| |
|
| | else: |
| | return "Unknown" |
| |
|
| |
|
| | def mode_ethnicity(v, eth_col): |
| | """ |
| | Select the most commonly occuring ethnicity for each patient in groupby |
| | -------- |
| | :param v: pandas patient dataframe to be updated |
| | :param eth_col: str ethnicity column |
| | :return: updated subset of data with common ethnicity per ID |
| | """ |
| | eth = v[eth_col] |
| | n = eth.nunique() |
| | has_unk = eth.str.contains('Unknown') |
| | any_unk = any(has_unk) |
| | wout_unk = has_unk.apply(lambda x: x is False) |
| | has_ref = eth.str.contains('Refused') |
| | any_ref = any(has_ref) |
| | wout_ref = has_ref.apply(lambda x: x is False) |
| |
|
| | |
| | if any_unk & any_ref & (n > 2): |
| | eth = eth[wout_unk & wout_ref] |
| | elif any_unk & (n > 1): |
| | eth = eth[wout_unk] |
| | elif any_ref & (n > 1): |
| | eth = eth[wout_ref] |
| |
|
| | |
| | main_eth = eth.mode().values[0] |
| | v[eth_col] = main_eth |
| | |
| | return v |
| |
|
| |
|
| | def search_diag(df, typ): |
| | """ |
| | Search diagnosis columns for descriptions indicative of copd or resp events |
| | -------- |
| | :param df: dataframe to search |
| | :param typ: 'copd', 'resp' or 'anxiety_depression' |
| | :return: dataframe with column added tracking specific type of admission |
| | """ |
| | |
| | diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', |
| | 'DIAG5Desc', 'DIAG6Desc'] |
| |
|
| | |
| | copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json')) |
| |
|
| | |
| | desc = copd_resp_desc[typ] |
| |
|
| | |
| | single = typ == 'copd' |
| |
|
| | |
| | df[typ + '_event'] = df[diag_cols].apply( |
| | lambda x: track_event(x, desc, single)).any(axis=1).astype(int) |
| |
|
| | return df |
| |
|