| | """ |
| | Derive features from demographics for 2 models: |
| | Parallel model 1: uses both hospital and community exacerbation events |
| | Parallel model 2: uses only hospital exacerbation events |
| | """ |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | import sys |
| | import os |
| | import model_h |
| | import yaml |
| |
|
| | with open("./training/config.yaml", "r") as config: |
| | config = yaml.safe_load(config) |
| |
|
| | |
| | model_type = config["model_settings"]["model_type"] |
| |
|
| | |
| | log = open("./training/logging/process_demographics_" + model_type + ".log", "w") |
| | sys.stdout = log |
| |
|
| | |
| | data_to_process = config["model_settings"]["data_to_process"] |
| |
|
| | |
| | if data_to_process == "forward_val": |
| | data = pd.read_pickle("./data/patient_labels_forward_val_hosp_comm.pkl") |
| | patient_details = pd.read_pickle("./data/patient_details_forward_val.pkl") |
| | else: |
| | data = pd.read_pickle("./data/patient_labels_" + model_type + ".pkl") |
| | patient_details = pd.read_pickle("./data/patient_details.pkl") |
| | data = data.merge( |
| | patient_details[["StudyId"]], |
| | on="StudyId", |
| | how="left", |
| | ) |
| |
|
| | |
| | data["DateOfBirth"] = pd.to_datetime(data["DateOfBirth"], utc=True) |
| | data["Age"] = (data["IndexDate"] - data["DateOfBirth"]).dt.days |
| | data["Age"] = np.floor(data["Age"] / 365) |
| | data = data.drop(columns="DateOfBirth") |
| |
|
| | |
| | age_bins = [0, 50, 60, 70, 80, np.inf] |
| | age_labels = ["<50", "50-59", "60-69", "70-79", "80+"] |
| | data["AgeBinned"] = model_h.bin_numeric_column( |
| | col=data["Age"], bins=age_bins, labels=age_labels |
| | ) |
| |
|
| | |
| |
|
| | |
| | sex_mapping = {"F": 1, "M": 0} |
| | data["Sex_F"] = data.Sex.map(sex_mapping) |
| | data = data.drop(columns=["Sex"]) |
| |
|
| | |
| | os.makedirs(config["outputs"]["processed_data_dir"], exist_ok=True) |
| | if data_to_process == "forward_val": |
| | data.to_pickle( |
| | os.path.join( |
| | config["outputs"]["processed_data_dir"], |
| | "demographics_forward_val_" + model_type + ".pkl", |
| | ) |
| | ) |
| | else: |
| | data.to_pickle( |
| | os.path.join( |
| | config["outputs"]["processed_data_dir"], |
| | "demographics_" + model_type + ".pkl", |
| | ) |
| | ) |
| |
|