File size: 2,291 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | """
Utility functions common across admission processing
(admissions/comorbidities/gples)
"""
import pandas as pd
from utils.common import read_data
from utils.adm_processing import (update_null_stay, calculate_total_stay,
search_diag)
def initialize_adm_data(adm_file):
"""
Load in and convert admission dataset to correct format
--------
:param adm_file: admission data file name
:return: admission dataframe with correct column names and types
"""
print('Loading admission data')
# Read in data
adm_cols = ['SafeHavenID', 'ETHGRP', 'ADMDATE', 'DISDATE', 'STAY',
'DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc',
'DIAG5Desc', 'DIAG6Desc']
adm_types = ['int', 'object', 'object', 'object', 'int',
'str', 'str', 'str', 'str', 'str', 'str']
df = read_data(adm_file, adm_cols, adm_types)
# Drop duplicates - nulls needed in DIAGDesc columns
df = df.drop_duplicates()
# Convert date columns to correct type
df['ADMDATE'] = pd.to_datetime(df['ADMDATE'])
df['DISDATE'] = pd.to_datetime(df['DISDATE'])
return df
def correct_stays(df):
"""
Fill any null STAY data and consolidate any transfer admissions into single
admission occurrences
--------
:param df: admission dataframe to be corrected
:return: admission dataframe with null stays filled and transfers combined
"""
print('Correcting stays')
# Update any null STAY data using ADM and DIS dates
df = update_null_stay(df)
# Correct stays for patients passed across departments
df = df.sort_values(['SafeHavenID', 'ADMDATE', 'DISDATE'])
df = df.groupby('SafeHavenID').apply(calculate_total_stay)
df = df.reset_index(drop=True)
return df
def track_copd_resp(df):
"""
Search for COPD and/or respiratory admissions
--------
:param df: admission dataframe to be updated
:return: updated dataframe with events tracked
"""
print('Tracking events')
# Strip DIAGDesc columns
df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
# Track COPD admissions
df = search_diag(df, 'copd')
# Track respiratory admissions
df = search_diag(df, 'resp')
return df |