File size: 3,821 Bytes
12ac942 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# required libraries
import yaml
import pickle
import numpy as np
import pandas as pd
from lifelines import LogLogisticAFTFitter
from KaplanMeierEstimator import KaplanMeierEstimator
def load_config(config_path):
'''Method to load config-file.'''
with open(config_path, 'r') as file:
config = yaml.safe_load(file)
return config
def save_object_by_pickle(path, saved_obj):
'''Method to save the object into file (serialization).'''
s = pickle.dumps(saved_obj)
fd = open(path, 'wb')
fd.write(s)
fd.close()
def example_how_to_train_survival_models(config):
'''Example how to train survival models (You have to train YOUR models and REMOVED this function).'''
# We describe a simulated dataset of patients that have some random covariates (=features)
dataset = pd.DataFrame([
{'id': 0, 'sex': 'M', 'age': 48, 'has_cancer': 0, 'visit_num': 12, 'has_D00_D48': 0, 'diagnosis_prop': 0.72},
{'id': 1, 'sex': 'M', 'age': 59, 'has_cancer': 1, 'visit_num': 31, 'has_D00_D48': 0, 'diagnosis_prop': 0.46},
{'id': 2, 'sex': 'M', 'age': 64, 'has_cancer': 0, 'visit_num': 22, 'has_D00_D48': 1, 'diagnosis_prop': 0.53},
{'id': 3, 'sex': 'M', 'age': 67, 'has_cancer': 1, 'visit_num': 25, 'has_D00_D48': 1, 'diagnosis_prop': 0.58},
{'id': 4, 'sex': 'M', 'age': 72, 'has_cancer': 0, 'visit_num': 18, 'has_D00_D48': 0, 'diagnosis_prop': 0.63},
{'id': 5, 'sex': 'F', 'age': 52, 'has_cancer': 0, 'visit_num': 27, 'has_D00_D48': 0, 'diagnosis_prop': 0.68},
{'id': 6, 'sex': 'F', 'age': 61, 'has_cancer': 0, 'visit_num': 32, 'has_D00_D48': 1, 'diagnosis_prop': 0.62},
{'id': 7, 'sex': 'F', 'age': 66, 'has_cancer': 1, 'visit_num': 38, 'has_D00_D48': 0, 'diagnosis_prop': 0.44},
{'id': 8, 'sex': 'F', 'age': 69, 'has_cancer': 1, 'visit_num': 35, 'has_D00_D48': 1, 'diagnosis_prop': 0.38},
{'id': 9, 'sex': 'F', 'age': 75, 'has_cancer': 0, 'visit_num': 33, 'has_D00_D48': 1, 'diagnosis_prop': 0.63},
]).set_index('id')
# Build the Kaplan-Meier estimator for MALES
mask = dataset['sex'] == 'M'
df = dataset[mask]
T = df['age']
C = 1 - df['has_cancer'] # 0 - Failure, 1 - Right-Censored Observation
km_males = KaplanMeierEstimator(T=T, C=C)
# Build the Kaplan-Meier estimator for FEMALES
mask = dataset['sex'] == 'F'
df = dataset[mask]
T = df['age']
C = 1 - df['has_cancer'] # 0 - Failure, 1 - Right-Censored Observation
km_females = KaplanMeierEstimator(T=T, C=C)
# Build the Kaplan-Meier estimator for MALES & FEMALES
T = dataset['age']
C = 1 - dataset['has_cancer'] # 0 - Failure, 1 - Right-Censored Observation
km_both = KaplanMeierEstimator(T=T, C=C)
# Train the AFT model
train = dataset.copy()
train['has_cancer'] = 1 - train['has_cancer'] # 0 - Failure, 1 - Right-Censored Observation
train['sex'] = train['sex'].apply(lambda sex: 1 if sex == 'M' else 0)
aft = LogLogisticAFTFitter(
alpha=0.05,
fit_intercept=True
).fit(train, duration_col='age', event_col='has_cancer')
aft_obj = {
'model': aft,
'covariates': ['sex', 'visit_num', 'has_D00_D48', 'diagnosis_prop'],
}
# Save these models
save_object_by_pickle(config['path_kaplan_meier_males'], km_males)
save_object_by_pickle(config['path_kaplan_meier_females'], km_females)
save_object_by_pickle(config['path_kaplan_meier_both'], km_both)
save_object_by_pickle(config['path_aft'], aft_obj)
# entry point
if __name__ == '__main__':
# Load config-file
config_path = './CONFIG_CanSave.yaml'
config = load_config(config_path)
# Start example to train survival models
example_how_to_train_survival_models(config)
|