File size: 4,253 Bytes

53a6def

"""
Admission processing utilities
"""
import json
import numpy as np
from utils.common import track_event


def update_null_stay(df):
    """
    Calculate length of stay based on ADM/DISDATE for null STAY values
    --------
    :param df: pandas dataframe to be updated
    :return: updated dataframe
    """
    # Check for nulls
    is_null = df.STAY.isnull()

    # If null calculate total length of stay
    if sum(is_null) > 0:
        null_stay = np.where(is_null)
        for i in null_stay:
            stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item()
            df.loc[i, 'STAY'] = float(stay.days)

    return df


def calculate_total_stay(df):
    """
    Convert admissions with same ADMDATE as previous DISDATE to single
    admission where patient has been transferred between departments
    --------
    :param df: pandas dataframe to be updated
    :return: updated dataframe
    """
    df.reset_index(inplace=True, drop=True)
    rows_to_drop = []

    # If ADMDATE matches previous DISDATE, mark as transfer and combine
    df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift())
    for index, row in df.iloc[1:].iterrows():
        if row.transfer is True:
            df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE
            df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY
            rows_to_drop.append(index - 1)

    # Drop original individual rows in transfer
    df.drop(rows_to_drop, inplace=True)

    # Drop tracking column
    df.drop('transfer', axis=1, inplace=True)

    return df


def convert_ethgrp_desc(eth):
    """
    Find ethnic group based on given ETHGRP string
    --------
    :param eth: str ethnic group description in the style of SMR01 data
    :return: string ethnicity
    """
    if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth):
        return "White"

    elif eth.startswith("British"):
        return "White"

    elif "mixed" in eth:
        return "Mixed"

    elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth):
        return "Asian"

    elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth):
        return "Black"

    elif ("Arab" in eth) | ("other ethnic" in eth):
        return "Other"

    elif "Refused" in eth:
        return "Refused"

    else:
        return "Unknown"


def mode_ethnicity(v, eth_col):
    """
    Select the most commonly occuring ethnicity for each patient in groupby
    --------
    :param v: pandas patient dataframe to be updated
    :param eth_col: str ethnicity column
    :return: updated subset of data with common ethnicity per ID
    """
    eth = v[eth_col]
    n = eth.nunique()
    has_unk = eth.str.contains('Unknown')
    any_unk = any(has_unk)
    wout_unk = has_unk.apply(lambda x: x is False)
    has_ref = eth.str.contains('Refused')
    any_ref = any(has_ref)
    wout_ref = has_ref.apply(lambda x: x is False)

    # Select ethnicities excluding 'Unknown' or 'Refused' where possible
    if any_unk & any_ref & (n > 2):
        eth = eth[wout_unk & wout_ref]
    elif any_unk & (n > 1):
        eth = eth[wout_unk]
    elif any_ref & (n > 1):
        eth = eth[wout_ref]

    # Select the most commonly appearing ethnicity
    main_eth = eth.mode().values[0]
    v[eth_col] = main_eth
    
    return v


def search_diag(df, typ):
    """
    Search diagnosis columns for descriptions indicative of copd or resp events
    --------
    :param df: dataframe to search
    :param typ: 'copd', 'resp' or 'anxiety_depression'
    :return: dataframe with column added tracking specific type of admission
    """
    # Columns to search
    diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
                 'DIAG5Desc', 'DIAG6Desc']

    # Load mappings
    copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json'))

    # Select mappings relevant to desired type of admission
    desc = copd_resp_desc[typ]

    # copd descriptions will only require searching a single specific phrase
    single = typ == 'copd'

    # Search columns and track
    df[typ + '_event'] = df[diag_cols].apply(
        lambda x: track_event(x, desc, single)).any(axis=1).astype(int)

    return df