File size: 4,253 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | """
Admission processing utilities
"""
import json
import numpy as np
from utils.common import track_event
def update_null_stay(df):
"""
Calculate length of stay based on ADM/DISDATE for null STAY values
--------
:param df: pandas dataframe to be updated
:return: updated dataframe
"""
# Check for nulls
is_null = df.STAY.isnull()
# If null calculate total length of stay
if sum(is_null) > 0:
null_stay = np.where(is_null)
for i in null_stay:
stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item()
df.loc[i, 'STAY'] = float(stay.days)
return df
def calculate_total_stay(df):
"""
Convert admissions with same ADMDATE as previous DISDATE to single
admission where patient has been transferred between departments
--------
:param df: pandas dataframe to be updated
:return: updated dataframe
"""
df.reset_index(inplace=True, drop=True)
rows_to_drop = []
# If ADMDATE matches previous DISDATE, mark as transfer and combine
df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift())
for index, row in df.iloc[1:].iterrows():
if row.transfer is True:
df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE
df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY
rows_to_drop.append(index - 1)
# Drop original individual rows in transfer
df.drop(rows_to_drop, inplace=True)
# Drop tracking column
df.drop('transfer', axis=1, inplace=True)
return df
def convert_ethgrp_desc(eth):
"""
Find ethnic group based on given ETHGRP string
--------
:param eth: str ethnic group description in the style of SMR01 data
:return: string ethnicity
"""
if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth):
return "White"
elif eth.startswith("British"):
return "White"
elif "mixed" in eth:
return "Mixed"
elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth):
return "Asian"
elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth):
return "Black"
elif ("Arab" in eth) | ("other ethnic" in eth):
return "Other"
elif "Refused" in eth:
return "Refused"
else:
return "Unknown"
def mode_ethnicity(v, eth_col):
"""
Select the most commonly occuring ethnicity for each patient in groupby
--------
:param v: pandas patient dataframe to be updated
:param eth_col: str ethnicity column
:return: updated subset of data with common ethnicity per ID
"""
eth = v[eth_col]
n = eth.nunique()
has_unk = eth.str.contains('Unknown')
any_unk = any(has_unk)
wout_unk = has_unk.apply(lambda x: x is False)
has_ref = eth.str.contains('Refused')
any_ref = any(has_ref)
wout_ref = has_ref.apply(lambda x: x is False)
# Select ethnicities excluding 'Unknown' or 'Refused' where possible
if any_unk & any_ref & (n > 2):
eth = eth[wout_unk & wout_ref]
elif any_unk & (n > 1):
eth = eth[wout_unk]
elif any_ref & (n > 1):
eth = eth[wout_ref]
# Select the most commonly appearing ethnicity
main_eth = eth.mode().values[0]
v[eth_col] = main_eth
return v
def search_diag(df, typ):
"""
Search diagnosis columns for descriptions indicative of copd or resp events
--------
:param df: dataframe to search
:param typ: 'copd', 'resp' or 'anxiety_depression'
:return: dataframe with column added tracking specific type of admission
"""
# Columns to search
diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
'DIAG5Desc', 'DIAG6Desc']
# Load mappings
copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json'))
# Select mappings relevant to desired type of admission
desc = copd_resp_desc[typ]
# copd descriptions will only require searching a single specific phrase
single = typ == 'copd'
# Search columns and track
df[typ + '_event'] = df[diag_cols].apply(
lambda x: track_event(x, desc, single)).any(axis=1).astype(int)
return df
|