|
|
|
|
|
import yaml |
|
|
import pickle |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
|
|
|
from lifelines import LogLogisticAFTFitter |
|
|
from KaplanMeierEstimator import KaplanMeierEstimator |
|
|
|
|
|
def load_config(config_path): |
|
|
'''Method to load config-file.''' |
|
|
with open(config_path, 'r') as file: |
|
|
config = yaml.safe_load(file) |
|
|
return config |
|
|
|
|
|
def save_object_by_pickle(path, saved_obj): |
|
|
'''Method to save the object into file (serialization).''' |
|
|
s = pickle.dumps(saved_obj) |
|
|
fd = open(path, 'wb') |
|
|
fd.write(s) |
|
|
fd.close() |
|
|
|
|
|
def example_how_to_train_survival_models(config): |
|
|
'''Example how to train survival models (You have to train YOUR models and REMOVED this function).''' |
|
|
|
|
|
dataset = pd.DataFrame([ |
|
|
{'id': 0, 'sex': 'M', 'age': 48, 'has_cancer': 0, 'visit_num': 12, 'has_D00_D48': 0, 'diagnosis_prop': 0.72}, |
|
|
{'id': 1, 'sex': 'M', 'age': 59, 'has_cancer': 1, 'visit_num': 31, 'has_D00_D48': 0, 'diagnosis_prop': 0.46}, |
|
|
{'id': 2, 'sex': 'M', 'age': 64, 'has_cancer': 0, 'visit_num': 22, 'has_D00_D48': 1, 'diagnosis_prop': 0.53}, |
|
|
{'id': 3, 'sex': 'M', 'age': 67, 'has_cancer': 1, 'visit_num': 25, 'has_D00_D48': 1, 'diagnosis_prop': 0.58}, |
|
|
{'id': 4, 'sex': 'M', 'age': 72, 'has_cancer': 0, 'visit_num': 18, 'has_D00_D48': 0, 'diagnosis_prop': 0.63}, |
|
|
{'id': 5, 'sex': 'F', 'age': 52, 'has_cancer': 0, 'visit_num': 27, 'has_D00_D48': 0, 'diagnosis_prop': 0.68}, |
|
|
{'id': 6, 'sex': 'F', 'age': 61, 'has_cancer': 0, 'visit_num': 32, 'has_D00_D48': 1, 'diagnosis_prop': 0.62}, |
|
|
{'id': 7, 'sex': 'F', 'age': 66, 'has_cancer': 1, 'visit_num': 38, 'has_D00_D48': 0, 'diagnosis_prop': 0.44}, |
|
|
{'id': 8, 'sex': 'F', 'age': 69, 'has_cancer': 1, 'visit_num': 35, 'has_D00_D48': 1, 'diagnosis_prop': 0.38}, |
|
|
{'id': 9, 'sex': 'F', 'age': 75, 'has_cancer': 0, 'visit_num': 33, 'has_D00_D48': 1, 'diagnosis_prop': 0.63}, |
|
|
]).set_index('id') |
|
|
|
|
|
|
|
|
mask = dataset['sex'] == 'M' |
|
|
df = dataset[mask] |
|
|
T = df['age'] |
|
|
C = 1 - df['has_cancer'] |
|
|
km_males = KaplanMeierEstimator(T=T, C=C) |
|
|
|
|
|
|
|
|
mask = dataset['sex'] == 'F' |
|
|
df = dataset[mask] |
|
|
T = df['age'] |
|
|
C = 1 - df['has_cancer'] |
|
|
km_females = KaplanMeierEstimator(T=T, C=C) |
|
|
|
|
|
|
|
|
T = dataset['age'] |
|
|
C = 1 - dataset['has_cancer'] |
|
|
km_both = KaplanMeierEstimator(T=T, C=C) |
|
|
|
|
|
|
|
|
train = dataset.copy() |
|
|
train['has_cancer'] = 1 - train['has_cancer'] |
|
|
train['sex'] = train['sex'].apply(lambda sex: 1 if sex == 'M' else 0) |
|
|
|
|
|
aft = LogLogisticAFTFitter( |
|
|
alpha=0.05, |
|
|
fit_intercept=True |
|
|
).fit(train, duration_col='age', event_col='has_cancer') |
|
|
|
|
|
aft_obj = { |
|
|
'model': aft, |
|
|
'covariates': ['sex', 'visit_num', 'has_D00_D48', 'diagnosis_prop'], |
|
|
} |
|
|
|
|
|
|
|
|
save_object_by_pickle(config['path_kaplan_meier_males'], km_males) |
|
|
save_object_by_pickle(config['path_kaplan_meier_females'], km_females) |
|
|
save_object_by_pickle(config['path_kaplan_meier_both'], km_both) |
|
|
save_object_by_pickle(config['path_aft'], aft_obj) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
config_path = './CONFIG_CanSave.yaml' |
|
|
config = load_config(config_path) |
|
|
|
|
|
|
|
|
example_how_to_train_survival_models(config) |
|
|
|