File size: 4,253 Bytes
53a6def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
Admission processing utilities
"""
import json
import numpy as np
from utils.common import track_event


def update_null_stay(df):
    """
    Calculate length of stay based on ADM/DISDATE for null STAY values
    --------
    :param df: pandas dataframe to be updated
    :return: updated dataframe
    """
    # Check for nulls
    is_null = df.STAY.isnull()

    # If null calculate total length of stay
    if sum(is_null) > 0:
        null_stay = np.where(is_null)
        for i in null_stay:
            stay = df.loc[i, 'DISDATE'].item() - df.loc[i, 'ADMDATE'].item()
            df.loc[i, 'STAY'] = float(stay.days)

    return df


def calculate_total_stay(df):
    """
    Convert admissions with same ADMDATE as previous DISDATE to single
    admission where patient has been transferred between departments
    --------
    :param df: pandas dataframe to be updated
    :return: updated dataframe
    """
    df.reset_index(inplace=True, drop=True)
    rows_to_drop = []

    # If ADMDATE matches previous DISDATE, mark as transfer and combine
    df['transfer'] = df.ADMDATE.eq(df.DISDATE.shift())
    for index, row in df.iloc[1:].iterrows():
        if row.transfer is True:
            df.loc[index, 'ADMDATE'] = df.iloc[index - 1].ADMDATE
            df.loc[index, 'STAY'] = row.STAY + df.iloc[index - 1].STAY
            rows_to_drop.append(index - 1)

    # Drop original individual rows in transfer
    df.drop(rows_to_drop, inplace=True)

    # Drop tracking column
    df.drop('transfer', axis=1, inplace=True)

    return df


def convert_ethgrp_desc(eth):
    """
    Find ethnic group based on given ETHGRP string
    --------
    :param eth: str ethnic group description in the style of SMR01 data
    :return: string ethnicity
    """
    if ("White" in eth) | ("Irish" in eth) | ("Welsh" in eth) | ("English" in eth):
        return "White"

    elif eth.startswith("British"):
        return "White"

    elif "mixed" in eth:
        return "Mixed"

    elif ("Asian" in eth) | ("Pakistani" in eth) | ("Indian" in eth) | ("Bangladeshi" in eth) | ("Chinese" in eth):
        return "Asian"

    elif ("Black" in eth) | ("Caribbean" in eth) | ("African" in eth):
        return "Black"

    elif ("Arab" in eth) | ("other ethnic" in eth):
        return "Other"

    elif "Refused" in eth:
        return "Refused"

    else:
        return "Unknown"


def mode_ethnicity(v, eth_col):
    """
    Select the most commonly occuring ethnicity for each patient in groupby
    --------
    :param v: pandas patient dataframe to be updated
    :param eth_col: str ethnicity column
    :return: updated subset of data with common ethnicity per ID
    """
    eth = v[eth_col]
    n = eth.nunique()
    has_unk = eth.str.contains('Unknown')
    any_unk = any(has_unk)
    wout_unk = has_unk.apply(lambda x: x is False)
    has_ref = eth.str.contains('Refused')
    any_ref = any(has_ref)
    wout_ref = has_ref.apply(lambda x: x is False)

    # Select ethnicities excluding 'Unknown' or 'Refused' where possible
    if any_unk & any_ref & (n > 2):
        eth = eth[wout_unk & wout_ref]
    elif any_unk & (n > 1):
        eth = eth[wout_unk]
    elif any_ref & (n > 1):
        eth = eth[wout_ref]

    # Select the most commonly appearing ethnicity
    main_eth = eth.mode().values[0]
    v[eth_col] = main_eth
    
    return v


def search_diag(df, typ):
    """
    Search diagnosis columns for descriptions indicative of copd or resp events
    --------
    :param df: dataframe to search
    :param typ: 'copd', 'resp' or 'anxiety_depression'
    :return: dataframe with column added tracking specific type of admission
    """
    # Columns to search
    diag_cols = ['DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
                 'DIAG5Desc', 'DIAG6Desc']

    # Load mappings
    copd_resp_desc = json.load(open('mappings/diag_copd_resp_desc.json'))

    # Select mappings relevant to desired type of admission
    desc = copd_resp_desc[typ]

    # copd descriptions will only require searching a single specific phrase
    single = typ == 'copd'

    # Search columns and track
    df[typ + '_event'] = df[diag_cols].apply(
        lambda x: track_event(x, desc, single)).any(axis=1).astype(int)

    return df