Spaces:

haoranrr
/

AMBER

Runtime error

App Files Files Community

haoranrr commited on Mar 15, 2025

Commit

cc49bd9

verified ·

1 Parent(s): 1bd1629

AutoML

Browse files

Files changed (1) hide show

app.py +326 -0

app.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import autogluon
+from tkinter import Tk,filedialog
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from autogluon.tabular import TabularDataset,TabularPredictor
+from sklearn.metrics import roc_auc_score,f1_score,roc_curve,confusion_matrix
+import gradio as gr
+import matplotlib.pyplot as plt
+from sklearn.calibration import calibration_curve
+import seaborn as sns
+# Tkinter部分，用于上传csv文件
+# 可以使用gradio默认的部分
+def upload_file():
+    root=Tk()
+    root.withdraw()
+    file_path = filedialog.askopenfilename(title="Select CSV File", filetypes=[("Training files", "*.xlsx *.csv")])
+    return file_path
+# 模型训练内部评估
+plt.rc('font',family='Times New Roman')
+def train_and_evaluate(file):
+  # 读取csv件
+    df = pd.read_csv(file.name)
+    label='hospital_expire_flag'
+    # 分割数据集
+    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
+    # 训练模型
+    predictor=TabularPredictor(label=label,problem_type='binary',eval_metric='f1',path='./autogluon/').fit(train_df)
+    # 载入最佳模型
+    best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
+    # 预测概率
+    y_prob = best_model.predict_proba(test_df.drop(label,axis=1))
+    # 计算AUC
+    auc = roc_auc_score(test_df[label], y_prob)
+    # 绘制ROC曲线
+    fpr, tpr, _ = roc_curve(test_df[label], y_prob)
+    plt.figure(figsize=(5, 4))
+    plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
+    plt.plot([0, 1], [0, 1], linestyle='--')
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('ROC Curve')
+    sns.despine()
+    plt.legend(loc='best')
+    plt.savefig('./roc_curve.png',dpi=200,bbox_inches='tight')
+    # 绘制校准曲线
+    prob_true, prob_pred = calibration_curve(test_df[label], y_prob, n_bins=10)
+    plt.figure(figsize=(5, 4))
+    plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
+    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
+    plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
+    plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
+    plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
+    sns.despine()
+    plt.legend()
+    plt.savefig('./Calibration_curve.png',dpi=200,bbox_inches='tight')
+    # 绘制决策曲线
+    y_pred=y_prob
+    y_test=test_df[label]
+    thresh_group=np.arange(0, 1, 0.01)
+    net_benefit_model = np.array([])
+    for thresh in thresh_group:
+        y_pred_label = y_pred > thresh
+        tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
+        n = len(y_test)
+        net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
+        net_benefit_model = np.append(net_benefit_model, net_benefit)
+    net_benefit_all = np.array([])
+    tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
+    total = tp + tn
+    for thresh in thresh_group:
+        net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
+        net_benefit_all = np.append(net_benefit_all, net_benefit_)
+    plt.figure(figsize=(5, 4))
+    ax=plt.gca()
+    ax.plot(thresh_group, net_benefit_model)
+    ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
+    ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
+    ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
+    ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
+    ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
+    ax.grid(which='minor')
+    ax.spines['right'].set_color((0.8, 0.8, 0.8))
+    ax.spines['top'].set_color((0.8, 0.8, 0.8))
+    ax.legend(loc='upper right')
+    sns.despine()
+    plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15))
+    plt.savefig('./Decision_curve.png',dpi=200,bbox_inches='tight')
+    FI=predictor.feature_importance(test_df)
+    norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
+    colors = plt.cm.viridis(norm(FI.importance[:6].values))
+    # 绘制棒图
+    plt.figure(figsize=(5,4))
+    plt.bar(FI.index[:6], FI.importance[:6],color=colors)
+    ax=plt.gca()
+    # 添加标题和标签
+    plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
+    plt.xlabel('Features')
+    plt.ylabel('Permutation Shuffling Values')
+    sns.despine()
+    plt.xticks(rotation=45)
+    plt.savefig('./feature_importance.png',dpi=200,bbox_inches='tight')
+    return  './roc_curve.png','./Calibration_curve.png','./Decision_curve.png' ,'./feature_importance.png'
+  # 外部验证
+def external_evaluate(file):
+  # 读取csv件
+    df = pd.read_csv(file.name)
+    label='hospital_expire_flag'
+    # 训练模型
+    predictor=TabularPredictor.load('./autogluon/')
+    # 载入最佳模型
+    best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
+    # 预测概率
+    y_prob = best_model.predict_proba(df.drop(label,axis=1))
+    # 计算AUC
+    auc = roc_auc_score(df[label], y_prob)
+    # 绘制ROC曲线
+    fpr, tpr, _ = roc_curve(df[label], y_prob)
+    plt.figure(figsize=(5, 4))
+    plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
+    plt.plot([0, 1], [0, 1], linestyle='--')
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('ROC Curve')
+    sns.despine()
+    plt.legend(loc='best')
+    plt.savefig('./roc_curve_external.png',dpi=200,bbox_inches='tight')
+    # 绘制校准曲线
+    prob_true, prob_pred = calibration_curve(df[label], y_prob, n_bins=10)
+    plt.figure(figsize=(5, 4))
+    plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
+    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
+    plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
+    plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
+    plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
+    sns.despine()
+    plt.legend()
+    plt.savefig('./Calibration_curve_external.png',dpi=200,bbox_inches='tight')
+    # 绘制决策曲线
+    y_pred=y_prob
+    y_test=df[label]
+    thresh_group=np.arange(0, 1, 0.01)
+    net_benefit_model = np.array([])
+    for thresh in thresh_group:
+        y_pred_label = y_pred > thresh
+        tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
+        n = len(y_test)
+        net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
+        net_benefit_model = np.append(net_benefit_model, net_benefit)
+    net_benefit_all = np.array([])
+    tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
+    total = tp + tn
+    for thresh in thresh_group:
+        net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
+        net_benefit_all = np.append(net_benefit_all, net_benefit_)
+    plt.figure(figsize=(5, 4))
+    ax=plt.gca()
+    ax.plot(thresh_group, net_benefit_model)
+    ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
+    ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
+    ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
+    ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
+    ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
+    ax.grid(which='minor')
+    ax.spines['right'].set_color((0.8, 0.8, 0.8))
+    ax.spines['top'].set_color((0.8, 0.8, 0.8))
+    ax.legend(loc='upper right')
+    sns.despine()
+    plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15),pad=0.01)
+    plt.savefig('./Decision_curve_external.png',dpi=200,bbox_inches='tight')
+    # 绘制棒图
+    FI=predictor.feature_importance(df)
+    norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
+    colors = plt.cm.viridis(norm(FI.importance[:6].values))
+    plt.figure(figsize=(5,4))
+    plt.bar(FI.index[:6], FI.importance[:6],color=colors)
+    ax=plt.gca()
+    # 添加标题和标签
+    plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
+    plt.xlabel('Features')
+    plt.ylabel('Permutation Shuffling Values')
+    sns.despine()
+    plt.xticks(rotation=45)
+    plt.savefig('./feature_importance_external.png',dpi=200,bbox_inches='tight')
+    return './roc_curve_external.png','./Calibration_curve_external.png','./Decision_curve_external.png' ,'./feature_importance_external.png'
+def preview_excel(file):
+    df = pd.read_csv(file.name)
+    return df.head(3)
+import gradio as gr
+import base64
+# CSS styles for the interface
+css = """
+body {
+    background-color: #f8f9fa;
+    font-family: 'Arial', sans-serif;
+}
+#file_input, #external_file_input, #dataframe {
+    border: 2px dashed #007bff;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: #fff;
+}
+#train_button, #evaluate_button, #dataframe_button {
+    background-color: #007bff;
+    color: gray;  /* Changed to white for better contrast */
+    font-size: 18px;
+    border-radius: 5px;
+    margin-top: 10px;
+    transition: background-color 0.3s;
+}
+#train_button:hover, #evaluate_button:hover, #dataframe_button:hover {
+    background-color: #0056b3;
+}
+#roc_image, #calibration_image, #decision_image, #external_eval_image1, #external_eval_image2, #external_eval_image3 {
+    border: 1px solid #ddd;
+    border-radius: 10px;
+    padding: 10px;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+}
+h1 {
+    color: blue;
+    text-align: center;
+    font-size: 28px;
+}
+h2 {
+    color: #007bff;
+    text-align: center;
+}
+p {
+    color: #555;
+    text-align: center;
+}
+.spinner {
+    display: none;
+    text-align: center;
+    margin-top: 20px;
+}
+"""
+# Load and encode the background image
+with open("D:/Haoran/科研/毕设/分析/模型部署/automl6.png", "rb") as image_file:
+    encoded_string = base64.b64encode(image_file.read()).decode()
+# Create the HTML layout with a background image
+background_image = f"""
+<div style="position: relative; height: 30vh;">
+    <div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; background-image: url('data:image/jpeg;base64,{encoded_string}'); background-size: contain; background-repeat: no-repeat; background-position: center; opacity: 0.7;">
+    </div>
+    <div style="position: absolute; top: 85%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
+        <h1 style="color: blue; font-weight: bold; font-size: 45px; white-space: nowrap;">Clinical Prediction Model Training and Evaluation based on AutoML</h1>
+        <p>Upload your CSV file with a 'hospital_expire_flag' column for binary classification. The tool will train a model, evaluate it, and display ROC, Calibration, Decision curves and Feature Importance plot.</p>
+    </div>
+</div>
+"""
+# Create Gradio Blocks interface
+with gr.Blocks(css=css) as interface:
+    gr.HTML(background_image)
+    with gr.Row():
+        file_input = gr.File(label='Upload Model Training CSV File', elem_id="file_input")
+    pre_button = gr.Button('Preview of the First 3 Rows', elem_id='dataframe_button')
+    with gr.Row():
+        dataframe = gr.DataFrame(elem_id='dataframe')
+    pre_button.click(fn=preview_excel, inputs=file_input, outputs=dataframe)
+    train_button = gr.Button("Train and Internal Evaluate", elem_id="train_button")
+    with gr.Row():
+        img1 = gr.Image(label="ROC Curve", type='filepath', elem_id="roc_image")
+        img2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="calibration_image")
+        img3 = gr.Image(label="Decision Curve", type='filepath', elem_id="decision_image")
+        img4 = gr.Image(label="Feature Importance", type='filepath', elem_id="feature_importance_image")
+    spinner = gr.Markdown("<div class='spinner'>Training model... Please wait...</div>")
+    def handle_click(file):
+        spinner.update(value="正在训练模型，请稍候...", visible=True)
+        try:
+            results = train_and_evaluate(file)
+            return results
+        except Exception as e:
+            return f"训练失败: {str(e)}"
+        finally:
+            spinner.update(visible=False)
+    train_button.click(fn=handle_click, inputs=file_input, outputs=[img1, img2, img3, img4])
+    # External evaluation section
+    gr.Markdown("<h2 style='text-align: center;'>External Evaluation</h2>")
+    external_file_input = gr.File(label='Upload External Evaluation CSV File', elem_id="external_file_input")
+    evaluate_button = gr.Button("External Evaluate", elem_id="evaluate_button")
+    with gr.Row():
+        external_eval_image1 = gr.Image(label="ROC Curve", type='filepath', elem_id="external_eval_image1")
+        external_eval_image2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="external_eval_image2")
+        external_eval_image3 = gr.Image(label="Decision Curve", type='filepath', elem_id="external_eval_image3")
+        external_eval_image4 = gr.Image(label="Feature Importance", type='filepath', elem_id="external_eval_image4")
+    def evaluate_click(file):
+        spinner.update(value="正在进行外部评估，请稍候...", visible=True)
+        try:
+            results = external_evaluate(file)
+            return results
+        except Exception as e:
+            return f"外部评估失败: {str(e)}"
+        finally:
+            spinner.update(visible=False)
+    evaluate_button.click(fn=evaluate_click, inputs=external_file_input, outputs=[external_eval_image1, external_eval_image2, external_eval_image3, external_eval_image4])
+# Launch the interface
+interface.launch()