copd-model-e / training /src /reduction /post_proc_reduction.py
IamGrooooot's picture
Model E: Unsupervised PCA + clustering risk stratification
53a6def
import json
import pandas as pd
single_inhaler = ['saba_inhaler_per_year', 'laba_inhaler_per_year',
'lama_inhaler_per_year', 'sama_inhaler_per_year',
'ics_inhaler_per_year', 'mcs_inhaler_per_year']
double_inhaler = ['laba_ics_inhaler_per_year', 'saba_sama_inhaler_per_year']
triple_inhaler = 'lama_laba_ics_inhaler_per_year'
adm_cols = ['copd_per_year', 'resp_per_year']
def main():
# Load in config items
with open('../../../config.json') as json_config_file:
config = json.load(json_config_file)
data_path = config['model_data_path']
# Read in original data before scaling
df = pd.read_pickle(data_path + 'merged_full.pkl')
# Create new reduced columns
df['single_inhaler'] = df[single_inhaler].sum(axis=1)
df['double_inhaler'] = df[double_inhaler].sum(axis=1)
df['triple_inhaler'] = df[triple_inhaler]
df['copd_resp_per_year'] = df[adm_cols].sum(axis=1)
# Drop original columns
cols2drop = single_inhaler + double_inhaler + [triple_inhaler] + adm_cols
df = df.drop(cols2drop, axis=1)
# Save data
df.to_pickle(data_path + 'merged.pkl')
main()