File size: 2,291 Bytes
53a6def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Utility functions common across admission processing
(admissions/comorbidities/gples)
"""
import pandas as pd
from utils.common import read_data
from utils.adm_processing import (update_null_stay, calculate_total_stay,
                                  search_diag)


def initialize_adm_data(adm_file):
    """
    Load in and convert admission dataset to correct format
    --------
    :param adm_file: admission data file name
    :return: admission dataframe with correct column names and types
    """
    print('Loading admission data')

    # Read in data
    adm_cols = ['SafeHavenID', 'ETHGRP', 'ADMDATE', 'DISDATE', 'STAY',
                'DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
                'DIAG5Desc', 'DIAG6Desc']
    adm_types = ['int', 'object', 'object', 'object', 'int',
                 'str', 'str', 'str', 'str', 'str', 'str']
    df = read_data(adm_file, adm_cols, adm_types)

    # Drop duplicates - nulls needed in DIAGDesc columns
    df = df.drop_duplicates()

    # Convert date columns to correct type
    df['ADMDATE'] = pd.to_datetime(df['ADMDATE'])
    df['DISDATE'] = pd.to_datetime(df['DISDATE'])

    return df


def correct_stays(df):
    """
    Fill any null STAY data and consolidate any transfer admissions into single
    admission occurrences
    --------
    :param df: admission dataframe to be corrected
    :return: admission dataframe with null stays filled and transfers combined
    """
    print('Correcting stays')

    # Update any null STAY data using ADM and DIS dates
    df = update_null_stay(df)

    # Correct stays for patients passed across departments
    df = df.sort_values(['SafeHavenID', 'ADMDATE', 'DISDATE'])
    df = df.groupby('SafeHavenID').apply(calculate_total_stay)
    df = df.reset_index(drop=True)

    return df


def track_copd_resp(df):
    """
    Search for COPD and/or respiratory admissions
    --------
    :param df: admission dataframe to be updated
    :return: updated dataframe with events tracked
    """
    print('Tracking events')

    # Strip DIAGDesc columns
    df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)

    # Track COPD admissions
    df = search_diag(df, 'copd')

    # Track respiratory admissions
    df = search_diag(df, 'resp')

    return df