IamGrooooot's picture
Model E: Unsupervised PCA + clustering risk stratification
53a6def
"""
Admission processing utilities
"""
import json
import numpy as np
from utils.common import track_event
def update_null_stay(df):
"""
Calculate length of stay based on ADM/DISDATE for null STAY values
--------
:param df: pandas dataframe to be updated
:return: updated dataframe
"""
# Check for nulls
is_null = df.STAY.isnull()
# If null calculate total length of stay
if sum(is_null) > 0:
null_stay = np.where(is_null)
for i in null_stay:
stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item()
df.loc[i, 'STAY'] = float(stay.days)
return df
def calculate_total_stay(df):
"""
Convert admissions with same ADMDATE as previous DISDATE to single
admission where patient has been transferred between departments
--------
:param df: pandas dataframe to be updated
:return: updated dataframe
"""
df.reset_index(inplace=True, drop=True)
rows_to_drop = []
# If ADMDATE matches previous DISDATE, mark as transfer and combine
df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift())
for index, row in df.iloc[1:].iterrows():
if row.transfer is True:
df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE
df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY
rows_to_drop.append(index - 1)
# Drop original individual rows in transfer
df.drop(rows_to_drop, inplace=True)
# Drop tracking column
df.drop('transfer', axis=1, inplace=True)
return df
def convert_ethgrp_desc(eth):
"""
Find ethnic group based on given ETHGRP string
--------
:param eth: str ethnic group description in the style of SMR01 data
:return: string ethnicity
"""
if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth):
return "White"
elif eth.startswith("British"):
return "White"
elif "mixed" in eth:
return "Mixed"
elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth):
return "Asian"
elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth):
return "Black"
elif ("Arab" in eth) | ("other ethnic" in eth):
return "Other"
elif "Refused" in eth:
return "Refused"
else:
return "Unknown"
def mode_ethnicity(v, eth_col):
"""
Select the most commonly occuring ethnicity for each patient in groupby
--------
:param v: pandas patient dataframe to be updated
:param eth_col: str ethnicity column
:return: updated subset of data with common ethnicity per ID
"""
eth = v[eth_col]
n = eth.nunique()
has_unk = eth.str.contains('Unknown')
any_unk = any(has_unk)
wout_unk = has_unk.apply(lambda x: x is False)
has_ref = eth.str.contains('Refused')
any_ref = any(has_ref)
wout_ref = has_ref.apply(lambda x: x is False)
# Select ethnicities excluding 'Unknown' or 'Refused' where possible
if any_unk & any_ref & (n > 2):
eth = eth[wout_unk & wout_ref]
elif any_unk & (n > 1):
eth = eth[wout_unk]
elif any_ref & (n > 1):
eth = eth[wout_ref]
# Select the most commonly appearing ethnicity
main_eth = eth.mode().values[0]
v[eth_col] = main_eth
return v
def search_diag(df, typ):
"""
Search diagnosis columns for descriptions indicative of copd or resp events
--------
:param df: dataframe to search
:param typ: 'copd', 'resp' or 'anxiety_depression'
:return: dataframe with column added tracking specific type of admission
"""
# Columns to search
diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
'DIAG5Desc', 'DIAG6Desc']
# Load mappings
copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json'))
# Select mappings relevant to desired type of admission
desc = copd_resp_desc[typ]
# copd descriptions will only require searching a single specific phrase
single = typ == 'copd'
# Search columns and track
df[typ + '_event'] = df[diag_cols].apply(
lambda x: track_event(x, desc, single)).any(axis=1).astype(int)
return df