""" Utility functions common across admission processing (admissions/comorbidities/gples) """ import pandas as pd from utils.common import read_data from utils.adm_processing import (update_null_stay, calculate_total_stay, search_diag) def initialize_adm_data(adm_file): """ Load in and convert admission dataset to correct format -------- :param adm_file: admission data file name :return: admission dataframe with correct column names and types """ print('Loading admission data') # Read in data adm_cols = ['SafeHavenID', 'ETHGRP', 'ADMDATE', 'DISDATE', 'STAY', 'DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', 'DIAG5Desc', 'DIAG6Desc'] adm_types = ['int', 'object', 'object', 'object', 'int', 'str', 'str', 'str', 'str', 'str', 'str'] df = read_data(adm_file, adm_cols, adm_types) # Drop duplicates - nulls needed in DIAGDesc columns df = df.drop_duplicates() # Convert date columns to correct type df['ADMDATE'] = pd.to_datetime(df['ADMDATE']) df['DISDATE'] = pd.to_datetime(df['DISDATE']) return df def correct_stays(df): """ Fill any null STAY data and consolidate any transfer admissions into single admission occurrences -------- :param df: admission dataframe to be corrected :return: admission dataframe with null stays filled and transfers combined """ print('Correcting stays') # Update any null STAY data using ADM and DIS dates df = update_null_stay(df) # Correct stays for patients passed across departments df = df.sort_values(['SafeHavenID', 'ADMDATE', 'DISDATE']) df = df.groupby('SafeHavenID').apply(calculate_total_stay) df = df.reset_index(drop=True) return df def track_copd_resp(df): """ Search for COPD and/or respiratory admissions -------- :param df: admission dataframe to be updated :return: updated dataframe with events tracked """ print('Tracking events') # Strip DIAGDesc columns df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x) # Track COPD admissions df = search_diag(df, 'copd') # Track respiratory admissions df = search_diag(df, 'resp') return df