import os import sys import numpy as np import pandas as pd import model_h import shutil import pickle import yaml # Plotting import matplotlib.pyplot as plt # Model training and evaluation from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_validate, cross_val_predict from sklearn.metrics import precision_recall_curve, auc from sklearn.calibration import CalibratedClassifierCV from imblearn.ensemble import BalancedRandomForestClassifier import xgboost as xgb import ml_insights as mli import mlflow # Explainability from sklearn.inspection import permutation_importance with open("./training/config.yaml", "r") as config: config = yaml.safe_load(config) model_type = config['model_settings']['model_type'] ############################################################## # Load data ############################################################## # Setup log file log = open( os.path.join(config['outputs']['logging_dir'], "modelling_" + model_type + ".log"), "w") sys.stdout = log # Load CV folds fold_patients = np.load(os.path.join(config['outputs']['cohort_info_dir'], 'fold_patients_' + model_type + '.npy'), allow_pickle=True) # Load imputed crossval data train_data_imp = model_h.load_data_for_modelling(os.path.join( config["outputs"]["model_input_data_dir"], "train_imputed_cv_{}.pkl".format(model_type), )) # Load not imputed crossval data train_data_no_imp = model_h.load_data_for_modelling(os.path.join( config["outputs"]["model_input_data_dir"], "train_not_imputed_cv_{}.pkl".format(model_type), )) # Load imputed test data test_data_imp = model_h.load_data_for_modelling(os.path.join( config["outputs"]["model_input_data_dir"], "test_imputed_{}.pkl".format(model_type), )) # Load not imputed test data test_data_no_imp = model_h.load_data_for_modelling(os.path.join( config["outputs"]["model_input_data_dir"], "test_not_imputed_{}.pkl".format(model_type), )) # Load exac data #train_exac_data = pd.read_pickle('./data/train_exac_data_' + model_type + '.pkl') #test_exac_data = pd.read_pickle('./data/test_exac_data_' + model_type + '.pkl') # Print date ranges for train and test set print('Train date range', train_data_imp['IndexDate'].min(), train_data_imp['IndexDate'].max()) print('Test date range', test_data_imp['IndexDate'].min(), test_data_imp['IndexDate'].max()) # Set tags tags = {"prediction_window": config['model_settings']['prediction_window'], "lookback_period": config['model_settings']['lookback_period'], "min_index_date": train_data_imp['IndexDate'].min(), "max_index_date": train_data_imp['IndexDate'].max(), "1_row_per_length_in_service_days": config['model_settings']['one_row_per_days_in_service'], } # Create a tuple with training and validation indicies for each fold. Can be done with # either imputed or not imputed data as both have same patients cross_val_fold_indices = [] for fold in fold_patients: fold_val_ids = train_data_no_imp[train_data_no_imp.StudyId.isin(fold)] fold_train_ids = train_data_no_imp[~( train_data_no_imp.StudyId.isin(fold_val_ids.StudyId))] # Get index of rows in val and train fold_val_index = fold_val_ids.index fold_train_index = fold_train_ids.index # Append tuple of training and val indices cross_val_fold_indices.append((fold_train_index, fold_val_index)) # Create list of model features cols_to_drop = ['StudyId', 'ExacWithin3Months', 'IndexDate', 'HospExacWithin3Months', 'CommExacWithin3Months'] features_list = [col for col in train_data_no_imp.columns if col not in cols_to_drop] ### Train data ### # Separate features from target for data with no imputation performed train_features_no_imp = train_data_no_imp[features_list].astype('float') train_target_no_imp = train_data_no_imp.ExacWithin3Months.astype('float') # Separate features from target for data with no imputation performed train_features_imp = train_data_imp[features_list].astype('float') train_target_imp = train_data_imp.ExacWithin3Months.astype('float') ### Test data ### # Separate features from target for data with no imputation performed test_features_no_imp = test_data_no_imp[features_list].astype('float') test_target_no_imp = test_data_no_imp.ExacWithin3Months.astype('float') # Separate features from target for data with no imputation performed test_features_imp = test_data_imp[features_list].astype('float') test_target_imp = test_data_imp.ExacWithin3Months.astype('float') # Check that the target in imputed and not imputed datasets are the same. If not, # raise an error if not train_target_no_imp.equals(train_target_imp): raise ValueError( 'Target variable is not the same in imputed and non imputed datasets in the train set.') if not test_target_no_imp.equals(test_target_imp): raise ValueError( 'Target variable is not the same in imputed and non imputed datasets in the test set.') train_target = train_target_no_imp test_target = test_target_no_imp # Make sure all features are numeric for features in [train_features_no_imp, train_features_imp, test_features_no_imp, test_features_imp]: for col in features: features[col] = pd.to_numeric(features[col], errors='coerce') ############################################################## # Specify which models to evaluate ############################################################## # Set up MLflow mlflow.set_tracking_uri("sqlite:///mlruns.db") mlflow.set_experiment('model_h_drop_1_' + model_type) # Set CV scoring strategies and any model parameters scoring = ['f1', 'balanced_accuracy', 'accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'neg_brier_score'] # Set up models, each tuple contains 4 elements: model, model name, imputation status, # type of model models = [] # These models are run for both hospital exac model and hospital and community exac model models.append((BalancedRandomForestClassifier(random_state=0), 'balanced_random_forest', 'imputed', 'tree')) models.append((xgb.XGBClassifier(random_state=0, use_label_encoder=False, eval_metric='logloss'), 'xgb', 'not_imputed', 'tree')) models.append((RandomForestClassifier(), 'random_forest', 'imputed', 'tree')) # Get the parent run where hyperparameter tuning was done if model_type == 'only_hosp': parent_run_id = 'ba2d7244654c4b84a815932a3167648f' if model_type == 'hosp_comm': parent_run_id = 'f71edd4c72f14c0692431dca297ec131' ############################################################## # Run models ############################################################## #In MLflow run, perform K-fold cross validation and capture mean score across folds. with mlflow.start_run(run_name='hyperparameter_optimised_models_12'): for model in models: # Get parameters of best scoring models best_params = model_h.get_mlflow_run_params( model[1], parent_run_id, 'sqlite:///mlruns.db', model_type) # Each model will have multiple best scores for different scoring metrics. for n, scorer in enumerate(best_params): params = best_params[scorer] model[0].set_params(**params) with mlflow.start_run(run_name=model[1] + '_tuning_scorer_' + scorer, nested=True): print(model[1], scorer) # Create the artifacts directory if it doesn't exist os.makedirs(config['outputs']['artifact_dir'], exist_ok=True) # Remove existing directory contents to not mix files between different runs shutil.rmtree(config['outputs']['artifact_dir']) # Select correct data based on whether model is using imputed or not imputed # dataset if model[2] == 'imputed': train_features = train_features_imp test_features = test_features_imp train_data = train_data_imp test_data = test_data_imp else: train_features = train_features_no_imp test_features = test_features_no_imp train_data = train_data_no_imp test_data = test_data_no_imp mlflow.set_tags(tags=tags) # Perform K-fold cross validation with custom folds crossval = cross_validate(model[0], train_features, train_target, cv=cross_val_fold_indices, return_estimator=True, scoring=scoring, return_indices=True) # Get the predicted probabilities from each models probabilities_cv = cross_val_predict(model[0], train_features, train_target, cv=cross_val_fold_indices, method='predict_proba')[:, 1] # Evaluation for uncalibrated model - test set for iter_num, estimator in enumerate(crossval['estimator']): probs_test = estimator.predict_proba(test_features)[:,1] preds_test = estimator.predict(test_features) uncalib_metrics_test = model_h.calc_eval_metrics_for_model( test_target, preds_test, probs_test, 'uncalib_test') if iter_num == 0: uncalib_metrics_test_df = pd.DataFrame( uncalib_metrics_test, index=[iter_num]) else: uncalib_metrics_test_df_iter = pd.DataFrame( uncalib_metrics_test, index=[iter_num]) uncalib_metrics_test_df = pd.concat( [uncalib_metrics_test_df, uncalib_metrics_test_df_iter]) uncalib_metrics_test_mean = uncalib_metrics_test_df.mean() uncalib_metrics_test_mean = uncalib_metrics_test_mean.to_dict() # Get threshold that gives best F1 score for uncalibrated model best_thres_uncal, f1_bt, prec_bt, rec_bt = model_h.get_threshold_with_best_f1_score( train_target, probabilities_cv) # Save f1 score, precision and recall for the best threshold mlflow.log_metric('best_thres_uncal', best_thres_uncal) mlflow.log_metric('f1_best_thres', f1_bt) mlflow.log_metric('precision_best_thres', prec_bt) mlflow.log_metric('recall_best_thres', rec_bt) #### Plot confusion matrix at different thresholds #### model_h.plot_confusion_matrix( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, best_thres_uncal], probabilities_cv, train_target, model[1], model_type, 'uncalib') #### Calculate AUC-PR score #### precision, recall, thresholds = precision_recall_curve( train_target, probabilities_cv) auc_pr = auc(recall, precision) mlflow.log_metric('auc_pr', auc_pr) #### Generate calibration curves #### if model[1] != 'dummy_classifier': ### Sigmoid calibration ### # Perform calibration model_sig = CalibratedClassifierCV( model[0], method='sigmoid',cv=cross_val_fold_indices) model_sig.fit(train_features, train_target) probs_sig = model_sig.predict_proba(test_features)[:, 1] probs_sig_2 = model_sig.predict_proba(test_features) preds_sig = model_sig.predict(test_features) # Generate metrics for calibrated model calib_metrics_sig = model_h.calc_eval_metrics_for_model( test_target, preds_sig, probs_sig, 'sig') # Get threshold with best f1 score for calibrated model best_thres_sig, _, _, _ = model_h.get_threshold_with_best_f1_score( test_target, probs_sig) mlflow.log_metric('best_thres_sig', best_thres_sig) # Plot confusion matrices for calibrated model model_h.plot_confusion_matrix( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, best_thres_sig], probs_sig, test_target, model[1], model_type, "sig") # Plot score distribution for calibrated model model_h.plot_score_distribution( test_target, probs_sig, config['outputs']['artifact_dir'], model[1], model_type, 'sig') # Calculate std of auc-pr between CV folds model_h.calc_std_for_calibrated_classifiers( model_sig, 'sig', test_features, test_target) ### Isotonic calibration ### # Perform calibration model_iso = CalibratedClassifierCV( model[0], method='isotonic', cv=cross_val_fold_indices) model_iso.fit(train_features, train_target) probs_iso = model_iso.predict_proba(test_features)[:, 1] preds_iso = model_iso.predict(test_features) # Generate metrics for calibrated model calib_metrics_iso = model_h.calc_eval_metrics_for_model( test_target, preds_iso, probs_iso, 'iso') # Get threshold with best f1 score for calibrated model best_thres_iso, _, _, _ = model_h.get_threshold_with_best_f1_score( test_target, probs_iso) mlflow.log_metric('best_thres_iso', best_thres_iso) # Plot confusion matrices for calibrated model model_h.plot_confusion_matrix( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, best_thres_iso], probs_iso, test_target, model[1], model_type, "iso") # Plot score distribution for calibrated model model_h.plot_score_distribution( test_target, probs_iso, config['outputs']['artifact_dir'], model[1], model_type, 'iso') # Calculate std of auc-pr between CV folds model_h.calc_std_for_calibrated_classifiers( model_iso, 'iso', test_features, test_target) ### Spline calibration ### # Perform calibration spline_calib = mli.SplineCalib() spline_calib.fit(probabilities_cv, train_target) model[0].fit(train_features, train_target) preds_test_uncalib = model[0].predict_proba(test_features)[:,1] probs_spline = spline_calib.calibrate(preds_test_uncalib) preds_spline = probs_spline > 0.5 preds_spline = preds_spline.astype(int) # Generate metrics for calibrated model calib_metrics_spline = model_h.calc_eval_metrics_for_model( test_target, preds_spline, probs_spline, 'spline') # Get threshold with best f1 score for calibrated model best_thres_spline, _, _, _ = model_h.get_threshold_with_best_f1_score( test_target, probs_spline) mlflow.log_metric('best_thres_spline', best_thres_spline) # Plot confusion matrices for calibrated model model_h.plot_confusion_matrix( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, best_thres_spline], probs_spline, test_target, model[1], model_type, "spline") # Plot score distribution for calibrated model model_h.plot_score_distribution( test_target, probs_spline, config['outputs']['artifact_dir'], model[1], model_type, 'spline') ### Plot calibration curves ### # Plot calibration curves for equal width bins (each bin has same width) # and equal frequency bins (each bin has same number of observations) for strategy in ['uniform', 'quantile']: for bins in [5, 6, 10]: plt.figure(figsize=(8,8)) plt.plot([0, 1], [0, 1], linestyle='--') model_h.plot_calibration_curve( train_target, probabilities_cv, bins, strategy, 'Uncalibrated') model_h.plot_calibration_curve( test_target, probs_sig, bins, strategy,'Sigmoid') model_h.plot_calibration_curve( test_target, probs_iso, bins, strategy, 'Isotonic') model_h.plot_calibration_curve( test_target, probs_spline, bins, strategy, 'Spline') plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left') plt.title(model[1]) plt.tight_layout() plt.savefig( os.path.join(config['outputs']['artifact_dir'], model[1] + '_' + strategy + '_bins' + str(bins) + '_' + model_type + '.png')) plt.close() # Plot uncalibrated model calibration curve at different bins and # strategies fig, (ax1,ax2) = plt.subplots(ncols=2, sharex=True, figsize=(15,10)) #plt.figure(figsize=(8,8)) for ax in [ax1, ax2]: ax.plot([0, 1], [0, 1], linestyle='--') for bins in [5, 6, 7, 8, 9]: model_h.plot_calibration_curve( train_target, probabilities_cv, bins, 'quantile', 'Bins=' + str(bins), ax1) for bins in [5, 6, 7, 8, 9]: model_h.plot_calibration_curve( train_target, probabilities_cv, bins, 'uniform', 'Bins=' + str(bins), ax2) ax1.title.set_text(model[1] + ' uncalibrated model quantile bins') ax2.title.set_text(model[1] + ' uncalibrated model uniform bins') plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left') plt.tight_layout() plt.savefig( os.path.join(config['outputs']['artifact_dir'], model[1] + '_uncal_' + model_type + '.png')) plt.close() # Plot calibration curves with error bars model_h.plot_calibration_plot_with_error_bars( probabilities_cv, probs_sig, probs_iso, probs_spline, train_target, test_target, model[1]) plt.close() #### Get total gain and total cover for boosting machine models #### if model[1].startswith("xgb"): feat_importance_tot_gain_df = model_h.plot_feat_importance_model( model[0], model[1], model_type) # Save feature importance by total gain if model[1].startswith("xgb"): feat_importance_tot_gain_df.to_csv( './data/feature_importance_tot_gain_' + model_type + '.csv', index=False) #### Calculate model performance by event type #### if model[1] not in ['dummy_classifier']: # Create df to contain prediction data and event type data preds_event_df_uncalib = model_h.create_df_probabilities_and_predictions( probabilities_cv, best_thres_uncal, train_data['StudyId'].tolist(), train_target, train_data[['ExacWithin3Months','HospExacWithin3Months','CommExacWithin3Months']], model[1], model_type, output_dir='./data/prediction_and_events/') preds_events_df_sig = model_h.create_df_probabilities_and_predictions( probs_sig, best_thres_sig, test_data['StudyId'].tolist(), test_target, test_data[['ExacWithin3Months', 'HospExacWithin3Months','CommExacWithin3Months']], model[1], model_type, output_dir='./data/prediction_and_events/', calib_type='sig') preds_events_df_iso = model_h.create_df_probabilities_and_predictions( probs_iso, best_thres_iso, test_data['StudyId'].tolist(), test_target, test_data[['ExacWithin3Months', 'HospExacWithin3Months','CommExacWithin3Months']], model[1], model_type, output_dir='./data/prediction_and_events/', calib_type='iso') preds_events_df_spline = model_h.create_df_probabilities_and_predictions( probs_spline, best_thres_spline, test_data['StudyId'].tolist(), test_target, test_data[['ExacWithin3Months', 'HospExacWithin3Months','CommExacWithin3Months']], model[1], model_type, output_dir='./data/prediction_and_events/', calib_type='spline') # Subset to each event type and calculate metrics metrics_by_event_type_uncalib = model_h.calc_metrics_by_event_type( preds_event_df_uncalib, calib_type="uncalib") metrics_by_event_type_sig = model_h.calc_metrics_by_event_type( preds_events_df_sig, calib_type='sig') metrics_by_event_type_iso = model_h.calc_metrics_by_event_type( preds_events_df_iso, calib_type='iso') metrics_by_event_type_spline = model_h.calc_metrics_by_event_type( preds_events_df_spline, calib_type='spline') # Subset to each event type and plot ROC curve model_h.plot_roc_curve_by_event_type( preds_event_df_uncalib, model[1], 'uncalib') model_h.plot_roc_curve_by_event_type( preds_events_df_sig, model[1], 'sig') model_h.plot_roc_curve_by_event_type( preds_events_df_iso, model[1], 'iso') model_h.plot_roc_curve_by_event_type( preds_events_df_spline, model[1], 'spline') # Subset to each event type and plot PR curve model_h.plot_prec_recall_by_event_type( preds_event_df_uncalib, model[1], 'uncalib') model_h.plot_prec_recall_by_event_type( preds_events_df_sig, model[1], 'sig') model_h.plot_prec_recall_by_event_type( preds_events_df_iso, model[1], 'iso') model_h.plot_prec_recall_by_event_type( preds_events_df_spline, model[1], 'spline') #### SHAP #### if model[1] not in ['dummy_classifier']: ### Uncalibrated model ### # Get the average SHAP values from CV folds for uncalibrated model shap_values_v_uncal, shap_values_t_uncal = model_h.get_uncalibrated_shap( crossval['estimator'], test_features, train_features, train_data[features_list].columns, model[1], model_type) ## Plot SHAP summary plots ## model_h.plot_averaged_summary_plot( shap_values_t_uncal, train_data[features_list], model[1], 'uncalib', model_type) ## Plot SHAP interaction heatmap ## model_h.plot_shap_interaction_value_heatmap( crossval['estimator'], train_features, train_data[features_list].columns, model[1], model_type) ### Calibrated models ### calib_models = {'sig':model_sig, 'iso':model_iso} for calib_model_name in calib_models: # Get the average SHAP values from CV folds for calibrated model shap_values_v, shap_values_t = model_h.get_calibrated_shap_by_classifier( calib_models[calib_model_name], test_features, train_features, train_data.drop( columns=['StudyId', 'ExacWithin3Months', 'IndexDate', 'HospExacWithin3Months', 'CommExacWithin3Months']).columns, calib_model_name, model[1], model_type) ## Plot SHAP summary plots ## model_h.plot_averaged_summary_plot( shap_values_t, train_data.drop( columns=['StudyId', 'ExacWithin3Months', 'IndexDate', 'HospExacWithin3Months','CommExacWithin3Months']), model[1], calib_model_name, model_type) ## Get feature importance for local SHAP values ## feature_imp_df = model_h.get_local_shap_values( model[1], model_type, shap_values_v, test_features, calib_model_name,shap_ids_dir='./data/prediction_and_events/') feature_imp_df.to_csv( './data/prediction_and_events/local_feature_imp_df' + model[1] + '_' + calib_model_name + '.csv') ## Plot local SHAP plots ## test_feat_enc_conv = model_h.plot_local_shap( model[1], model_type, shap_values_v, test_features, train_features, calib_model_name, row_ids_to_plot=['missed', 'incorrect', 'correct'], artifact_dir=config['outputs']['artifact_dir'], shap_ids_dir='./data/prediction_and_events/', reverse_scaling_flag=False, convert_target_encodings=True, imputation=model[2], target_enc_path="./data/artifacts/target_encodings_" + model_type + ".json", return_enc_converted_df=False) """ ### Plot SHAP dependency plots ### os.makedirs( "./tmp/dependence_plots", exist_ok=True) categorical_cols = [ "DaysSinceLastExac_te", "FEV1PercentPredicted_te"] for categorical_col in categorical_cols: shap.dependence_plot( categorical_col, shap_values_v, test_feat_enc_conv, interaction_index=None, show=False) plt.tight_layout() plt.savefig( "./tmp/dependence_plots/dependence_plot_" + categorical_col + "_" + model[1] + "_" + calib_model_name + file_suffix + ".png") plt.close() """ ### Plot distribution of model scores for uncalibrated model ### model_h.plot_score_distribution( train_target, probabilities_cv, config['outputs']['artifact_dir'], model[1], model_type) """ ### Permutation feature importance ### def calc_permutation_importance(model, features, target, scoring, n_repeats): permutation_imp = permutation_importance(model, features, target, random_state=0, scoring=scoring, n_repeats=n_repeats) for n, score in enumerate(permutation_imp): if n == 0: df = pd.DataFrame(data=permutation_imp[score]['importances_mean'], index=features.columns) df = df.rename(columns={0:score}) else: df[score] = permutation_imp[score]['importances_mean'] return df, permutation_imp def plot_permutation_feature_importance(permutation_imp_full, metric, col_names, n_repeats, train_or_test): os.makedirs("./tmp/permutation_feat_imp", exist_ok=True) sorted_importances_idx = permutation_imp_full[metric].importances_mean.argsort() importances = pd.DataFrame( permutation_imp_full[metric].importances[sorted_importances_idx].T, columns=col_names[sorted_importances_idx], ) ax = importances.plot.box(vert=False, whis=10) ax.set_title("Permutation Importances(" + train_or_test + ")") ax.axvline(x=0, color="k", linestyle="--") ax.set_xlabel("Decrease in accuracy score") ax.figure.tight_layout() plt.savefig('./tmp/permutation_feat_imp/' + train_or_test + '_' + metric + '_repeats' + str(n_repeats) +'.png') from scipy.cluster import hierarchy from scipy.spatial.distance import squareform from scipy.stats import spearmanr full_dataset_feat = pd.concat([train_features, test_features], axis=0) print(train_features) print(full_dataset_feat) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8)) corr = spearmanr(full_dataset_feat).correlation # Ensure the correlation matrix is symmetric corr = (corr + corr.T) / 2 np.fill_diagonal(corr, 1) # We convert the correlation matrix to a distance matrix before performing # hierarchical clustering using Ward's linkage. distance_matrix = 1 - np.abs(corr) dist_linkage = hierarchy.ward(squareform(distance_matrix)) dendro = hierarchy.dendrogram( dist_linkage, labels=full_dataset_feat.columns.to_list(), ax=ax1, leaf_rotation=90 ) dendro_idx = np.arange(0, len(dendro["ivl"])) ax2.imshow(corr[dendro["leaves"], :][:, dendro["leaves"]]) ax2.set_xticks(dendro_idx) ax2.set_yticks(dendro_idx) ax2.set_xticklabels(dendro["ivl"], rotation="vertical") ax2.set_yticklabels(dendro["ivl"]) _ = fig.tight_layout() plt.show() plt.close() #features_to_drop = ["TotalEngagementMRC", "NumCommExacPrior6mo", "WeekAvgCATQ2", "WeekAvgCATQ4"] #X_train_sel = train_features.drop(columns=features_to_drop) #X_test_sel = test_features.drop(columns=features_to_drop) from collections import defaultdict cluster_ids = hierarchy.fcluster(dist_linkage, 0.5, criterion="distance") cluster_id_to_feature_ids = defaultdict(list) for idx, cluster_id in enumerate(cluster_ids): cluster_id_to_feature_ids[cluster_id].append(idx) selected_features = [v[0] for v in cluster_id_to_feature_ids.values()] selected_features_names = full_dataset_feat.columns[selected_features] X_train_sel = train_features[selected_features_names] X_test_sel = test_features[selected_features_names] print(selected_features_names) # retrain # Perform calibration model_sig_perm = CalibratedClassifierCV( model[0], method='sigmoid',cv=cross_val_fold_indices) model_sig_perm.fit(X_train_sel, train_target) probs_sig = model_sig_perm.predict_proba(X_test_sel)[:, 1] probs_sig_2 = model_sig_perm.predict_proba(X_test_sel) preds_sig = model_sig_perm.predict(X_test_sel) print('before') print(calib_metrics_sig) # Generate metrics for calibrated model calib_metrics_sig = copd.calc_eval_metrics_for_model( test_target, preds_sig, probs_sig, 'sig') print(calib_metrics_sig) def plot_permutation_importance(clf, X, y, ax): result = permutation_importance(clf, X, y, n_repeats=10, random_state=42, n_jobs=2,scoring='average_precision') perm_sorted_idx = result.importances_mean.argsort() ax.boxplot( result.importances[perm_sorted_idx].T, vert=False, labels=X.columns[perm_sorted_idx], ) ax.axvline(x=0, color="k", linestyle="--") return ax fig, ax = plt.subplots(figsize=(7, 6)) plot_permutation_importance(model_sig_perm, X_test_sel, test_target, ax) ax.set_title("Permutation Importances on selected subset of features\n(test set)") ax.set_xlabel("Decrease in accuracy score") ax.figure.tight_layout() plt.savefig('./tmp/permutation_feat_imp.png') #for metric in ['f1', 'average_precision', 'roc_auc']: # for n_repeats in [5,10, 50]: # permutation_imp_train_df, permutation_imp_train_dict = calc_permutation_importance(model_sig, train_features, train_target, scoring=scoring, n_repeats=n_repeats) # plot_permutation_feature_importance(permutation_imp_train_dict, metric, train_features.columns, n_repeats, 'train') # for n_repeats in [5,10, 50]: # permutation_imp_test_df, permutation_imp_test_dict = calc_permutation_importance(model_sig, test_features, test_target, scoring=scoring, n_repeats=n_repeats) # plot_permutation_feature_importance(permutation_imp_test_dict, metric, test_features.columns, n_repeats, 'test') """ ### Log metrics, parameters, and artifacts ### # Log metrics averaged across folds for score in scoring: mlflow.log_metric(score, crossval['test_' + score].mean()) mlflow.log_metric(score + '_std', crossval['test_' + score].std()) # Log metrics for calibrated models if model[1] != 'dummy_classifier': mlflow.log_metrics(uncalib_metrics_test_mean) mlflow.log_metrics(calib_metrics_sig) mlflow.log_metrics(calib_metrics_iso) mlflow.log_metrics(calib_metrics_spline) mlflow.log_metrics(metrics_by_event_type_uncalib) mlflow.log_metrics(metrics_by_event_type_sig) mlflow.log_metrics(metrics_by_event_type_iso) mlflow.log_metrics(metrics_by_event_type_spline) # Log model parameters params = model[0].get_params() for param in params: mlflow.log_param(param, params[param]) # Log artifacts mlflow.log_artifacts(config['outputs']['artifact_dir']) # Save sig model with open('./data/model/trained_sig_' + model[1] + '_pkl', 'wb') as files: pickle.dump(model_sig, files) with open('./data/model/trained_iso_' + model[1] + '_pkl', 'wb') as files: pickle.dump(model_iso, files) with open('./data/model/trained_spline_' + model[1] + '_pkl', 'wb') as files: pickle.dump(spline_calib, files) mlflow.end_run()