| | """ |
| | Script for preprocessing pharmacy data |
| | -------- |
| | Process pharmacy data and track inhaler prescriptions and rescue meds |
| | """ |
| | import json |
| | import pandas as pd |
| | from datetime import date |
| | from dateutil.relativedelta import relativedelta |
| | from utils.common import (add_hist_adm_presc, correct_column_names, |
| | first_patient_appearance) |
| | from utils.presc_common import initialize_presc_data, track_medication |
| |
|
| |
|
| | def add_inhaler_mappings(df): |
| | """ |
| | Load inhaler prescription mappings and track where they appear in the data |
| | -------- |
| | :param df: dataframe |
| | :return: dataframe with column added for each inhaler type |
| | """ |
| | print('Mapping inhaler prescriptions') |
| |
|
| | |
| | with open('mappings/inhaler_mapping.json') as json_file: |
| | inhaler_mapping = json.load(json_file) |
| |
|
| | for k, v in inhaler_mapping.items(): |
| | df[k + '_inhaler'] = df.PI_Approved_Name.str.contains( |
| | '|'.join(v)).astype(int) |
| |
|
| | |
| | df = df.drop(['LABA-LAMA-ICS_inhaler', 'Ignore_inhaler'], axis=1) |
| |
|
| | return df |
| |
|
| |
|
| | def add_eoy_column(df, dt_col, eoy_date): |
| | """ |
| | Add EOY relative to user-specified end date |
| | -------- |
| | :param df: dataframe |
| | :param dt_col: date column in dataframe |
| | :param eoy_date: EOY date from config |
| | :return: updated df with EOY column added |
| | """ |
| | |
| | df = df.reset_index(drop=True) |
| |
|
| | |
| | end_date = pd.to_datetime(eoy_date) |
| | end_month = end_date.month |
| | end_day = end_date.day |
| |
|
| | |
| | df['eoy'] = [date(y, end_month, end_day) for y in df[dt_col].dt.year] |
| |
|
| | |
| | eoy_index = df.columns[df.columns == 'eoy'] |
| | adm_vs_eoy = df[dt_col] > df.eoy |
| | row_index = df.index[adm_vs_eoy] |
| | df.loc[row_index, eoy_index] = df[adm_vs_eoy].eoy + relativedelta(years=1) |
| | df['eoy'] = pd.to_datetime(df.eoy) |
| |
|
| | return df |
| |
|
| |
|
| | def calc_presc_per_year(df): |
| | """ |
| | Reduce data to 1 row per year |
| | -------- |
| | :param df: dataframe to reduced |
| | :return: reduced dataframe |
| | """ |
| | print('Reducing to 1 row per year') |
| |
|
| | |
| | eoy_cols = ['presc_to_date', 'days_since_rescue', 'rescue_to_date', |
| | 'anxiety_depression_presc_to_date', 'rescue_date'] |
| | last = df.groupby(['SafeHavenID', 'eoy'])[eoy_cols].last() |
| |
|
| | |
| | sum_cols = ['SALBUTAMOL', 'SABA_inhaler', 'LABA_inhaler', 'LAMA_inhaler', |
| | 'SAMA_inhaler', 'ICS_inhaler', 'LABA-ICS_inhaler', |
| | 'LAMA +LABA-ICS_inhaler', 'SABA + SAMA_inhaler', |
| | 'MCS_inhaler', 'rescue_meds', 'presc', 'anxiety_depression_presc'] |
| | total_cols = [col + '_per_year' for col in sum_cols] |
| | total = df.groupby(['SafeHavenID', 'eoy'])[sum_cols].sum() |
| | total.columns = total_cols |
| |
|
| | |
| | results = last.join(total) |
| |
|
| | return results |
| |
|
| |
|
| | def main(): |
| |
|
| | |
| | with open('../../../config.json') as json_config_file: |
| | config = json.load(json_config_file) |
| |
|
| | |
| | presc_file = config['extract_data_path'] + 'Pharmacy_Cohort3R.csv' |
| | presc = initialize_presc_data(presc_file) |
| |
|
| | |
| | data_path = config['model_data_path'] |
| | first_patient_appearance(presc, 'PRESC_DATE', 'presc', data_path) |
| |
|
| | |
| | presc = add_inhaler_mappings(presc) |
| |
|
| | |
| | presc = track_medication(presc) |
| |
|
| | |
| | cols_2_drop = ['PI_Approved_Name', 'PI_BNF_Item_Code', 'code'] |
| | presc = presc.drop(cols_2_drop, axis=1) |
| |
|
| | |
| | presc = add_eoy_column(presc, 'PRESC_DATE', config['date']) |
| |
|
| | |
| | presc['presc'] = 1 |
| |
|
| | |
| | presc = presc.groupby('SafeHavenID').apply( |
| | add_hist_adm_presc, 'presc', 'PRESC_DATE') |
| | presc = presc.reset_index(drop=True) |
| |
|
| | |
| | presc.to_pickle(data_path + 'validation_presc_proc.pkl') |
| |
|
| | |
| | presc_yearly = calc_presc_per_year(presc) |
| |
|
| | |
| | presc_yearly.columns = correct_column_names(presc_yearly.columns, 'presc') |
| |
|
| | |
| | presc_yearly.to_pickle(data_path + 'presc_proc.pkl') |
| |
|
| |
|
| | main() |
| |
|