File size: 4,419 Bytes
53a6def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Script for preprocessing pharmacy data
--------
Process pharmacy data and track inhaler prescriptions and rescue meds
"""
import json
import pandas as pd
from datetime import date
from dateutil.relativedelta import relativedelta
from utils.common import (add_hist_adm_presc, correct_column_names,
                          first_patient_appearance)
from utils.presc_common import initialize_presc_data, track_medication


def add_inhaler_mappings(df):
    """
    Load inhaler prescription mappings and track where they appear in the data
    --------
    :param df: dataframe
    :return: dataframe with column added for each inhaler type
    """
    print('Mapping inhaler prescriptions')

    # Load in inhaler mapping
    with open('mappings/inhaler_mapping.json') as json_file:
        inhaler_mapping = json.load(json_file)

    for k, v in inhaler_mapping.items():
        df[k + '_inhaler'] = df.PI_Approved_Name.str.contains(
            '|'.join(v)).astype(int)

    # Remove for now as empty
    df = df.drop(['LABA-LAMA-ICS_inhaler', 'Ignore_inhaler'], axis=1)

    return df


def add_eoy_column(df, dt_col, eoy_date):
    """
    Add EOY relative to user-specified end date
    --------
    :param df: dataframe
    :param dt_col: date column in dataframe
    :param eoy_date: EOY date from config
    :return: updated df with EOY column added
    """
    # Needed to stop error with creating a new column
    df = df.reset_index(drop=True)

    # Add column with user-specified end of year date
    end_date = pd.to_datetime(eoy_date)
    end_month = end_date.month
    end_day = end_date.day

    # Add for every year
    df['eoy'] = [date(y, end_month, end_day) for y in df[dt_col].dt.year]

    # Check that EOY date is after dt_col for each entry
    eoy_index = df.columns[df.columns == 'eoy']
    adm_vs_eoy = df[dt_col] > df.eoy
    row_index = df.index[adm_vs_eoy]
    df.loc[row_index, eoy_index] = df[adm_vs_eoy].eoy + relativedelta(years=1)
    df['eoy'] = pd.to_datetime(df.eoy)

    return df


def calc_presc_per_year(df):
    """
    Reduce data to 1 row per year
    --------
    :param df: dataframe to reduced
    :return: reduced dataframe
    """
    print('Reducing to 1 row per year')

    # Add end of year columns
    eoy_cols = ['presc_to_date', 'days_since_rescue', 'rescue_to_date',
                'anxiety_depression_presc_to_date', 'rescue_date']
    last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last()

    # Total columns
    sum_cols = ['SALBUTAMOL', 'SABA_inhaler', 'LABA_inhaler', 'LAMA_inhaler',
                'SAMA_inhaler', 'ICS_inhaler', 'LABA-ICS_inhaler',
                'LAMA +LABA-ICS_inhaler', 'SABA + SAMA_inhaler',
                'MCS_inhaler', 'rescue_meds', 'presc', 'anxiety_depression_presc']
    total_cols = [col + '_per_year' for col in sum_cols]
    total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum()
    total.columns = total_cols

    # Join together
    results = last.join(total)

    return results


def main():

    # Load in config items
    with open('../../../config.json') as json_config_file:
        config = json.load(json_config_file)

    # Load in data
    presc_file = config['extract_data_path'] + 'Pharmacy_Cohort3R.csv'
    presc = initialize_presc_data(presc_file)

    # Save first date in dataset
    data_path = config['model_data_path']
    first_patient_appearance(presc, 'PRESC_DATE', 'presc', data_path)

    # Add inhaler mapping
    presc = add_inhaler_mappings(presc)

    # Track salbutamol and rescue meds
    presc = track_medication(presc)

    # Drop columns
    cols_2_drop = ['PI_Approved_Name', 'PI_BNF_Item_Code', 'code']
    presc = presc.drop(cols_2_drop, axis=1)

    # Add column relative to user-specified date
    presc = add_eoy_column(presc, 'PRESC_DATE', config['date'])

    # Track rows which are admissions
    presc['presc'] = 1

    # Add any historical count columns
    presc = presc.groupby('SafeHavenID').apply(
        add_hist_adm_presc, 'presc', 'PRESC_DATE')
    presc = presc.reset_index(drop=True)

    # Save per event dataset
    presc.to_pickle(data_path + 'validation_presc_proc.pkl')

    # Reduce data to 1 row per year
    presc_yearly = calc_presc_per_year(presc)

    # Correct column names
    presc_yearly.columns = correct_column_names(presc_yearly.columns, 'presc')

    # Save data
    presc_yearly.to_pickle(data_path + 'presc_proc.pkl')


main()