import autogluon from tkinter import Tk,filedialog import pandas as pd from sklearn.model_selection import train_test_split from autogluon.tabular import TabularDataset,TabularPredictor from sklearn.metrics import roc_auc_score,f1_score,roc_curve,confusion_matrix import gradio as gr import matplotlib.pyplot as plt from sklearn.calibration import calibration_curve import seaborn as sns # Tkinter部分,用于上传csv文件 # 可以使用gradio默认的部分 def upload_file(): root=Tk() root.withdraw() file_path = filedialog.askopenfilename(title="Select CSV File", filetypes=[("Training files", "*.xlsx *.csv")]) return file_path # 模型训练内部评估 plt.rc('font',family='Times New Roman') def train_and_evaluate(file): # 读取csv件 df = pd.read_csv(file.name) label='hospital_expire_flag' # 分割数据集 train_df, test_df = train_test_split(df, test_size=0.2, random_state=42) # 训练模型 predictor=TabularPredictor(label=label,problem_type='binary',eval_metric='f1',path='./autogluon/').fit(train_df) # 载入最佳模型 best_model=predictor._trainer.load_model(predictor.get_model_names()[-1]) # 预测概率 y_prob = best_model.predict_proba(test_df.drop(label,axis=1)) # 计算AUC auc = roc_auc_score(test_df[label], y_prob) # 绘制ROC曲线 fpr, tpr, _ = roc_curve(test_df[label], y_prob) plt.figure(figsize=(5, 4)) plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}') plt.plot([0, 1], [0, 1], linestyle='--') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve') sns.despine() plt.legend(loc='best') plt.savefig('./roc_curve.png',dpi=200,bbox_inches='tight') # 绘制校准曲线 prob_true, prob_pred = calibration_curve(test_df[label], y_prob, n_bins=10) plt.figure(figsize=(5, 4)) plt.plot(prob_true, prob_pred, marker='o', label='Autogluon') plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated') plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15)) plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15)) plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15)) sns.despine() plt.legend() plt.savefig('./Calibration_curve.png',dpi=200,bbox_inches='tight') # 绘制决策曲线 y_pred=y_prob y_test=test_df[label] thresh_group=np.arange(0, 1, 0.01) net_benefit_model = np.array([]) for thresh in thresh_group: y_pred_label = y_pred > thresh tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel() n = len(y_test) net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh)) net_benefit_model = np.append(net_benefit_model, net_benefit) net_benefit_all = np.array([]) tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel() total = tp + tn for thresh in thresh_group: net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh)) net_benefit_all = np.append(net_benefit_all, net_benefit_) plt.figure(figsize=(5, 4)) ax=plt.gca() ax.plot(thresh_group, net_benefit_model) ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all') ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none') ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2) ax.set_xlim(0, 1) ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15) ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15}) ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15}) ax.grid(which='minor') ax.spines['right'].set_color((0.8, 0.8, 0.8)) ax.spines['top'].set_color((0.8, 0.8, 0.8)) ax.legend(loc='upper right') sns.despine() plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15)) plt.savefig('./Decision_curve.png',dpi=200,bbox_inches='tight') FI=predictor.feature_importance(test_df) norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6])) colors = plt.cm.viridis(norm(FI.importance[:6].values)) # 绘制棒图 plt.figure(figsize=(5,4)) plt.bar(FI.index[:6], FI.importance[:6],color=colors) ax=plt.gca() # 添加标题和标签 plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2) plt.xlabel('Features') plt.ylabel('Permutation Shuffling Values') sns.despine() plt.xticks(rotation=45) plt.savefig('./feature_importance.png',dpi=200,bbox_inches='tight') return './roc_curve.png','./Calibration_curve.png','./Decision_curve.png' ,'./feature_importance.png' # 外部验证 def external_evaluate(file): # 读取csv件 df = pd.read_csv(file.name) label='hospital_expire_flag' # 训练模型 predictor=TabularPredictor.load('./autogluon/') # 载入最佳模型 best_model=predictor._trainer.load_model(predictor.get_model_names()[-1]) # 预测概率 y_prob = best_model.predict_proba(df.drop(label,axis=1)) # 计算AUC auc = roc_auc_score(df[label], y_prob) # 绘制ROC曲线 fpr, tpr, _ = roc_curve(df[label], y_prob) plt.figure(figsize=(5, 4)) plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}') plt.plot([0, 1], [0, 1], linestyle='--') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve') sns.despine() plt.legend(loc='best') plt.savefig('./roc_curve_external.png',dpi=200,bbox_inches='tight') # 绘制校准曲线 prob_true, prob_pred = calibration_curve(df[label], y_prob, n_bins=10) plt.figure(figsize=(5, 4)) plt.plot(prob_true, prob_pred, marker='o', label='Autogluon') plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated') plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15)) plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15)) plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15)) sns.despine() plt.legend() plt.savefig('./Calibration_curve_external.png',dpi=200,bbox_inches='tight') # 绘制决策曲线 y_pred=y_prob y_test=df[label] thresh_group=np.arange(0, 1, 0.01) net_benefit_model = np.array([]) for thresh in thresh_group: y_pred_label = y_pred > thresh tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel() n = len(y_test) net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh)) net_benefit_model = np.append(net_benefit_model, net_benefit) net_benefit_all = np.array([]) tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel() total = tp + tn for thresh in thresh_group: net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh)) net_benefit_all = np.append(net_benefit_all, net_benefit_) plt.figure(figsize=(5, 4)) ax=plt.gca() ax.plot(thresh_group, net_benefit_model) ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all') ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none') ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2) ax.set_xlim(0, 1) ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15) ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15}) ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15}) ax.grid(which='minor') ax.spines['right'].set_color((0.8, 0.8, 0.8)) ax.spines['top'].set_color((0.8, 0.8, 0.8)) ax.legend(loc='upper right') sns.despine() plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15),pad=0.01) plt.savefig('./Decision_curve_external.png',dpi=200,bbox_inches='tight') # 绘制棒图 FI=predictor.feature_importance(df) norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6])) colors = plt.cm.viridis(norm(FI.importance[:6].values)) plt.figure(figsize=(5,4)) plt.bar(FI.index[:6], FI.importance[:6],color=colors) ax=plt.gca() # 添加标题和标签 plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2) plt.xlabel('Features') plt.ylabel('Permutation Shuffling Values') sns.despine() plt.xticks(rotation=45) plt.savefig('./feature_importance_external.png',dpi=200,bbox_inches='tight') return './roc_curve_external.png','./Calibration_curve_external.png','./Decision_curve_external.png' ,'./feature_importance_external.png' def preview_excel(file): df = pd.read_csv(file.name) return df.head(3) import gradio as gr import base64 # CSS styles for the interface css = """ body { background-color: #f8f9fa; font-family: 'Arial', sans-serif; } #file_input, #external_file_input, #dataframe { border: 2px dashed #007bff; padding: 20px; border-radius: 10px; background-color: #fff; } #train_button, #evaluate_button, #dataframe_button { background-color: #007bff; color: gray; /* Changed to white for better contrast */ font-size: 18px; border-radius: 5px; margin-top: 10px; transition: background-color 0.3s; } #train_button:hover, #evaluate_button:hover, #dataframe_button:hover { background-color: #0056b3; } #roc_image, #calibration_image, #decision_image, #external_eval_image1, #external_eval_image2, #external_eval_image3 { border: 1px solid #ddd; border-radius: 10px; padding: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); } h1 { color: blue; text-align: center; font-size: 28px; } h2 { color: #007bff; text-align: center; } p { color: #555; text-align: center; } .spinner { display: none; text-align: center; margin-top: 20px; } """ # Load and encode the background image with open("D:/Haoran/科研/毕设/分析/模型部署/automl6.png", "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode() # Create the HTML layout with a background image background_image = f"""

Clinical Prediction Model Training and Evaluation based on AutoML

Upload your CSV file with a 'hospital_expire_flag' column for binary classification. The tool will train a model, evaluate it, and display ROC, Calibration, Decision curves and Feature Importance plot.

""" # Create Gradio Blocks interface with gr.Blocks(css=css) as interface: gr.HTML(background_image) with gr.Row(): file_input = gr.File(label='Upload Model Training CSV File', elem_id="file_input") pre_button = gr.Button('Preview of the First 3 Rows', elem_id='dataframe_button') with gr.Row(): dataframe = gr.DataFrame(elem_id='dataframe') pre_button.click(fn=preview_excel, inputs=file_input, outputs=dataframe) train_button = gr.Button("Train and Internal Evaluate", elem_id="train_button") with gr.Row(): img1 = gr.Image(label="ROC Curve", type='filepath', elem_id="roc_image") img2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="calibration_image") img3 = gr.Image(label="Decision Curve", type='filepath', elem_id="decision_image") img4 = gr.Image(label="Feature Importance", type='filepath', elem_id="feature_importance_image") spinner = gr.Markdown("
Training model... Please wait...
") def handle_click(file): spinner.update(value="正在训练模型,请稍候...", visible=True) try: results = train_and_evaluate(file) return results except Exception as e: return f"训练失败: {str(e)}" finally: spinner.update(visible=False) train_button.click(fn=handle_click, inputs=file_input, outputs=[img1, img2, img3, img4]) # External evaluation section gr.Markdown("

External Evaluation

") external_file_input = gr.File(label='Upload External Evaluation CSV File', elem_id="external_file_input") evaluate_button = gr.Button("External Evaluate", elem_id="evaluate_button") with gr.Row(): external_eval_image1 = gr.Image(label="ROC Curve", type='filepath', elem_id="external_eval_image1") external_eval_image2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="external_eval_image2") external_eval_image3 = gr.Image(label="Decision Curve", type='filepath', elem_id="external_eval_image3") external_eval_image4 = gr.Image(label="Feature Importance", type='filepath', elem_id="external_eval_image4") def evaluate_click(file): spinner.update(value="正在进行外部评估,请稍候...", visible=True) try: results = external_evaluate(file) return results except Exception as e: return f"外部评估失败: {str(e)}" finally: spinner.update(visible=False) evaluate_button.click(fn=evaluate_click, inputs=external_file_input, outputs=[external_eval_image1, external_eval_image2, external_eval_image3, external_eval_image4]) # Launch the interface interface.launch()