AMBER / app.py
haoranrr's picture
AutoML
cc49bd9 verified
import autogluon
from tkinter import Tk,filedialog
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularDataset,TabularPredictor
from sklearn.metrics import roc_auc_score,f1_score,roc_curve,confusion_matrix
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
import seaborn as sns
# Tkinter部分,用于上传csv文件
# 可以使用gradio默认的部分
def upload_file():
root=Tk()
root.withdraw()
file_path = filedialog.askopenfilename(title="Select CSV File", filetypes=[("Training files", "*.xlsx *.csv")])
return file_path
# 模型训练内部评估
plt.rc('font',family='Times New Roman')
def train_and_evaluate(file):
# 读取csv件
df = pd.read_csv(file.name)
label='hospital_expire_flag'
# 分割数据集
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# 训练模型
predictor=TabularPredictor(label=label,problem_type='binary',eval_metric='f1',path='./autogluon/').fit(train_df)
# 载入最佳模型
best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
# 预测概率
y_prob = best_model.predict_proba(test_df.drop(label,axis=1))
# 计算AUC
auc = roc_auc_score(test_df[label], y_prob)
# 绘制ROC曲线
fpr, tpr, _ = roc_curve(test_df[label], y_prob)
plt.figure(figsize=(5, 4))
plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
sns.despine()
plt.legend(loc='best')
plt.savefig('./roc_curve.png',dpi=200,bbox_inches='tight')
# 绘制校准曲线
prob_true, prob_pred = calibration_curve(test_df[label], y_prob, n_bins=10)
plt.figure(figsize=(5, 4))
plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
sns.despine()
plt.legend()
plt.savefig('./Calibration_curve.png',dpi=200,bbox_inches='tight')
# 绘制决策曲线
y_pred=y_prob
y_test=test_df[label]
thresh_group=np.arange(0, 1, 0.01)
net_benefit_model = np.array([])
for thresh in thresh_group:
y_pred_label = y_pred > thresh
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
n = len(y_test)
net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
net_benefit_model = np.append(net_benefit_model, net_benefit)
net_benefit_all = np.array([])
tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
total = tp + tn
for thresh in thresh_group:
net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
net_benefit_all = np.append(net_benefit_all, net_benefit_)
plt.figure(figsize=(5, 4))
ax=plt.gca()
ax.plot(thresh_group, net_benefit_model)
ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
ax.set_xlim(0, 1)
ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
ax.grid(which='minor')
ax.spines['right'].set_color((0.8, 0.8, 0.8))
ax.spines['top'].set_color((0.8, 0.8, 0.8))
ax.legend(loc='upper right')
sns.despine()
plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15))
plt.savefig('./Decision_curve.png',dpi=200,bbox_inches='tight')
FI=predictor.feature_importance(test_df)
norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
colors = plt.cm.viridis(norm(FI.importance[:6].values))
# 绘制棒图
plt.figure(figsize=(5,4))
plt.bar(FI.index[:6], FI.importance[:6],color=colors)
ax=plt.gca()
# 添加标题和标签
plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
plt.xlabel('Features')
plt.ylabel('Permutation Shuffling Values')
sns.despine()
plt.xticks(rotation=45)
plt.savefig('./feature_importance.png',dpi=200,bbox_inches='tight')
return './roc_curve.png','./Calibration_curve.png','./Decision_curve.png' ,'./feature_importance.png'
# 外部验证
def external_evaluate(file):
# 读取csv件
df = pd.read_csv(file.name)
label='hospital_expire_flag'
# 训练模型
predictor=TabularPredictor.load('./autogluon/')
# 载入最佳模型
best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
# 预测概率
y_prob = best_model.predict_proba(df.drop(label,axis=1))
# 计算AUC
auc = roc_auc_score(df[label], y_prob)
# 绘制ROC曲线
fpr, tpr, _ = roc_curve(df[label], y_prob)
plt.figure(figsize=(5, 4))
plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
sns.despine()
plt.legend(loc='best')
plt.savefig('./roc_curve_external.png',dpi=200,bbox_inches='tight')
# 绘制校准曲线
prob_true, prob_pred = calibration_curve(df[label], y_prob, n_bins=10)
plt.figure(figsize=(5, 4))
plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
sns.despine()
plt.legend()
plt.savefig('./Calibration_curve_external.png',dpi=200,bbox_inches='tight')
# 绘制决策曲线
y_pred=y_prob
y_test=df[label]
thresh_group=np.arange(0, 1, 0.01)
net_benefit_model = np.array([])
for thresh in thresh_group:
y_pred_label = y_pred > thresh
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
n = len(y_test)
net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
net_benefit_model = np.append(net_benefit_model, net_benefit)
net_benefit_all = np.array([])
tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
total = tp + tn
for thresh in thresh_group:
net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
net_benefit_all = np.append(net_benefit_all, net_benefit_)
plt.figure(figsize=(5, 4))
ax=plt.gca()
ax.plot(thresh_group, net_benefit_model)
ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
ax.set_xlim(0, 1)
ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
ax.grid(which='minor')
ax.spines['right'].set_color((0.8, 0.8, 0.8))
ax.spines['top'].set_color((0.8, 0.8, 0.8))
ax.legend(loc='upper right')
sns.despine()
plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15),pad=0.01)
plt.savefig('./Decision_curve_external.png',dpi=200,bbox_inches='tight')
# 绘制棒图
FI=predictor.feature_importance(df)
norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
colors = plt.cm.viridis(norm(FI.importance[:6].values))
plt.figure(figsize=(5,4))
plt.bar(FI.index[:6], FI.importance[:6],color=colors)
ax=plt.gca()
# 添加标题和标签
plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
plt.xlabel('Features')
plt.ylabel('Permutation Shuffling Values')
sns.despine()
plt.xticks(rotation=45)
plt.savefig('./feature_importance_external.png',dpi=200,bbox_inches='tight')
return './roc_curve_external.png','./Calibration_curve_external.png','./Decision_curve_external.png' ,'./feature_importance_external.png'
def preview_excel(file):
df = pd.read_csv(file.name)
return df.head(3)
import gradio as gr
import base64
# CSS styles for the interface
css = """
body {
background-color: #f8f9fa;
font-family: 'Arial', sans-serif;
}
#file_input, #external_file_input, #dataframe {
border: 2px dashed #007bff;
padding: 20px;
border-radius: 10px;
background-color: #fff;
}
#train_button, #evaluate_button, #dataframe_button {
background-color: #007bff;
color: gray; /* Changed to white for better contrast */
font-size: 18px;
border-radius: 5px;
margin-top: 10px;
transition: background-color 0.3s;
}
#train_button:hover, #evaluate_button:hover, #dataframe_button:hover {
background-color: #0056b3;
}
#roc_image, #calibration_image, #decision_image, #external_eval_image1, #external_eval_image2, #external_eval_image3 {
border: 1px solid #ddd;
border-radius: 10px;
padding: 10px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
}
h1 {
color: blue;
text-align: center;
font-size: 28px;
}
h2 {
color: #007bff;
text-align: center;
}
p {
color: #555;
text-align: center;
}
.spinner {
display: none;
text-align: center;
margin-top: 20px;
}
"""
# Load and encode the background image
with open("D:/Haoran/科研/毕设/分析/模型部署/automl6.png", "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
# Create the HTML layout with a background image
background_image = f"""
<div style="position: relative; height: 30vh;">
<div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; background-image: url('data:image/jpeg;base64,{encoded_string}'); background-size: contain; background-repeat: no-repeat; background-position: center; opacity: 0.7;">
</div>
<div style="position: absolute; top: 85%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
<h1 style="color: blue; font-weight: bold; font-size: 45px; white-space: nowrap;">Clinical Prediction Model Training and Evaluation based on AutoML</h1>
<p>Upload your CSV file with a 'hospital_expire_flag' column for binary classification. The tool will train a model, evaluate it, and display ROC, Calibration, Decision curves and Feature Importance plot.</p>
</div>
</div>
"""
# Create Gradio Blocks interface
with gr.Blocks(css=css) as interface:
gr.HTML(background_image)
with gr.Row():
file_input = gr.File(label='Upload Model Training CSV File', elem_id="file_input")
pre_button = gr.Button('Preview of the First 3 Rows', elem_id='dataframe_button')
with gr.Row():
dataframe = gr.DataFrame(elem_id='dataframe')
pre_button.click(fn=preview_excel, inputs=file_input, outputs=dataframe)
train_button = gr.Button("Train and Internal Evaluate", elem_id="train_button")
with gr.Row():
img1 = gr.Image(label="ROC Curve", type='filepath', elem_id="roc_image")
img2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="calibration_image")
img3 = gr.Image(label="Decision Curve", type='filepath', elem_id="decision_image")
img4 = gr.Image(label="Feature Importance", type='filepath', elem_id="feature_importance_image")
spinner = gr.Markdown("<div class='spinner'>Training model... Please wait...</div>")
def handle_click(file):
spinner.update(value="正在训练模型,请稍候...", visible=True)
try:
results = train_and_evaluate(file)
return results
except Exception as e:
return f"训练失败: {str(e)}"
finally:
spinner.update(visible=False)
train_button.click(fn=handle_click, inputs=file_input, outputs=[img1, img2, img3, img4])
# External evaluation section
gr.Markdown("<h2 style='text-align: center;'>External Evaluation</h2>")
external_file_input = gr.File(label='Upload External Evaluation CSV File', elem_id="external_file_input")
evaluate_button = gr.Button("External Evaluate", elem_id="evaluate_button")
with gr.Row():
external_eval_image1 = gr.Image(label="ROC Curve", type='filepath', elem_id="external_eval_image1")
external_eval_image2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="external_eval_image2")
external_eval_image3 = gr.Image(label="Decision Curve", type='filepath', elem_id="external_eval_image3")
external_eval_image4 = gr.Image(label="Feature Importance", type='filepath', elem_id="external_eval_image4")
def evaluate_click(file):
spinner.update(value="正在进行外部评估,请稍候...", visible=True)
try:
results = external_evaluate(file)
return results
except Exception as e:
return f"外部评估失败: {str(e)}"
finally:
spinner.update(visible=False)
evaluate_button.click(fn=evaluate_click, inputs=external_file_input, outputs=[external_eval_image1, external_eval_image2, external_eval_image3, external_eval_image4])
# Launch the interface
interface.launch()