Spaces:

haoranrr
/

AMBER

Runtime error

App Files Files Community

AMBER / app.py

haoranrr

AutoML

cc49bd9 verified 11 months ago

raw

history blame contribute delete

13.9 kB

	import autogluon
	from tkinter import Tk,filedialog
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from autogluon.tabular import TabularDataset,TabularPredictor
	from sklearn.metrics import roc_auc_score,f1_score,roc_curve,confusion_matrix
	import gradio as gr
	import matplotlib.pyplot as plt
	from sklearn.calibration import calibration_curve
	import seaborn as sns
	# Tkinter部分，用于上传csv文件
	# 可以使用gradio默认的部分
	def upload_file():
	root=Tk()
	root.withdraw()
	file_path = filedialog.askopenfilename(title="Select CSV File", filetypes=[("Training files", ".xlsx .csv")])
	return file_path
	# 模型训练内部评估
	plt.rc('font',family='Times New Roman')
	def train_and_evaluate(file):
	# 读取csv件
	df = pd.read_csv(file.name)
	label='hospital_expire_flag'
	# 分割数据集
	train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
	# 训练模型
	predictor=TabularPredictor(label=label,problem_type='binary',eval_metric='f1',path='./autogluon/').fit(train_df)
	# 载入最佳模型
	best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
	# 预测概率
	y_prob = best_model.predict_proba(test_df.drop(label,axis=1))

	# 计算AUC
	auc = roc_auc_score(test_df[label], y_prob)
	# 绘制ROC曲线
	fpr, tpr, _ = roc_curve(test_df[label], y_prob)
	plt.figure(figsize=(5, 4))
	plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
	plt.plot([0, 1], [0, 1], linestyle='--')
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('ROC Curve')
	sns.despine()
	plt.legend(loc='best')
	plt.savefig('./roc_curve.png',dpi=200,bbox_inches='tight')

	# 绘制校准曲线
	prob_true, prob_pred = calibration_curve(test_df[label], y_prob, n_bins=10)
	plt.figure(figsize=(5, 4))
	plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
	plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
	plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
	plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
	plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
	sns.despine()
	plt.legend()
	plt.savefig('./Calibration_curve.png',dpi=200,bbox_inches='tight')

	# 绘制决策曲线
	y_pred=y_prob
	y_test=test_df[label]
	thresh_group=np.arange(0, 1, 0.01)
	net_benefit_model = np.array([])
	for thresh in thresh_group:
	y_pred_label = y_pred > thresh
	tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
	n = len(y_test)
	net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
	net_benefit_model = np.append(net_benefit_model, net_benefit)

	net_benefit_all = np.array([])
	tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
	total = tp + tn
	for thresh in thresh_group:
	net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
	net_benefit_all = np.append(net_benefit_all, net_benefit_)
	plt.figure(figsize=(5, 4))
	ax=plt.gca()
	ax.plot(thresh_group, net_benefit_model)
	ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
	ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
	ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
	ax.set_xlim(0, 1)
	ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
	ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
	ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
	ax.grid(which='minor')
	ax.spines['right'].set_color((0.8, 0.8, 0.8))
	ax.spines['top'].set_color((0.8, 0.8, 0.8))
	ax.legend(loc='upper right')
	sns.despine()
	plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15))
	plt.savefig('./Decision_curve.png',dpi=200,bbox_inches='tight')

	FI=predictor.feature_importance(test_df)
	norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
	colors = plt.cm.viridis(norm(FI.importance[:6].values))
	# 绘制棒图
	plt.figure(figsize=(5,4))
	plt.bar(FI.index[:6], FI.importance[:6],color=colors)
	ax=plt.gca()
	# 添加标题和标签
	plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
	plt.xlabel('Features')
	plt.ylabel('Permutation Shuffling Values')
	sns.despine()
	plt.xticks(rotation=45)
	plt.savefig('./feature_importance.png',dpi=200,bbox_inches='tight')

	return './roc_curve.png','./Calibration_curve.png','./Decision_curve.png' ,'./feature_importance.png'
	# 外部验证
	def external_evaluate(file):
	# 读取csv件
	df = pd.read_csv(file.name)
	label='hospital_expire_flag'
	# 训练模型
	predictor=TabularPredictor.load('./autogluon/')
	# 载入最佳模型
	best_model=predictor._trainer.load_model(predictor.get_model_names()[-1])
	# 预测概率
	y_prob = best_model.predict_proba(df.drop(label,axis=1))
	# 计算AUC
	auc = roc_auc_score(df[label], y_prob)
	# 绘制ROC曲线
	fpr, tpr, _ = roc_curve(df[label], y_prob)
	plt.figure(figsize=(5, 4))
	plt.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
	plt.plot([0, 1], [0, 1], linestyle='--')
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('ROC Curve')
	sns.despine()
	plt.legend(loc='best')
	plt.savefig('./roc_curve_external.png',dpi=200,bbox_inches='tight')

	# 绘制校准曲线
	prob_true, prob_pred = calibration_curve(df[label], y_prob, n_bins=10)
	plt.figure(figsize=(5, 4))
	plt.plot(prob_true, prob_pred, marker='o', label='Autogluon')
	plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly Calibrated')
	plt.ylabel('Predicted Probability',fontdict=dict(family='Times New Roman',size=15))
	plt.xlabel('True Probability',fontdict=dict(family='Times New Roman',size=15))
	plt.title('Calibration Curve',fontdict=dict(family='Times New Roman',size=15))
	sns.despine()
	plt.legend()
	plt.savefig('./Calibration_curve_external.png',dpi=200,bbox_inches='tight')

	# 绘制决策曲线
	y_pred=y_prob
	y_test=df[label]
	thresh_group=np.arange(0, 1, 0.01)
	net_benefit_model = np.array([])
	for thresh in thresh_group:
	y_pred_label = y_pred > thresh
	tn, fp, fn, tp = confusion_matrix(y_test, y_pred_label).ravel()
	n = len(y_test)
	net_benefit = (tp / n) - (fp / n) * (thresh / (1 - thresh))
	net_benefit_model = np.append(net_benefit_model, net_benefit)

	net_benefit_all = np.array([])
	tn, fp, fn, tp = confusion_matrix(y_test, y_test).ravel()
	total = tp + tn
	for thresh in thresh_group:
	net_benefit_ = (tp / total) - (tn / total) * (thresh / (1 - thresh))
	net_benefit_all = np.append(net_benefit_all, net_benefit_)
	plt.figure(figsize=(5, 4))
	ax=plt.gca()
	ax.plot(thresh_group, net_benefit_model)
	ax.plot(thresh_group, net_benefit_all, linestyle='--', label='Treat all')
	ax.plot((0, 1), (0, 0), color='black', linestyle='--', label='Treat none')
	ax.fill_between(thresh_group, net_benefit_model, 0, alpha=0.2)
	ax.set_xlim(0, 1)
	ax.set_ylim(net_benefit_model.min() - 0.15, net_benefit_model.max() + 0.15)
	ax.set_xlabel('Threshold Probability', fontdict={'family': 'Times New Roman', 'fontsize': 15})
	ax.set_ylabel('Net Benefit', fontdict={'family': 'Times New Roman', 'fontsize': 15})
	ax.grid(which='minor')
	ax.spines['right'].set_color((0.8, 0.8, 0.8))
	ax.spines['top'].set_color((0.8, 0.8, 0.8))
	ax.legend(loc='upper right')
	sns.despine()
	plt.title('Decision Curve',fontdict=dict(family='Times New Roman',size=15),pad=0.01)
	plt.savefig('./Decision_curve_external.png',dpi=200,bbox_inches='tight')
	# 绘制棒图
	FI=predictor.feature_importance(df)
	norm = plt.Normalize(min(FI.importance[:6]), max(FI.importance[:6]))
	colors = plt.cm.viridis(norm(FI.importance[:6].values))

	plt.figure(figsize=(5,4))
	plt.bar(FI.index[:6], FI.importance[:6],color=colors)
	ax=plt.gca()
	# 添加标题和标签
	plt.title('Feature Importance',fontdict=dict(family='Times New Roman',size=15),pad=0.2)
	plt.xlabel('Features')
	plt.ylabel('Permutation Shuffling Values')
	sns.despine()
	plt.xticks(rotation=45)
	plt.savefig('./feature_importance_external.png',dpi=200,bbox_inches='tight')

	return './roc_curve_external.png','./Calibration_curve_external.png','./Decision_curve_external.png' ,'./feature_importance_external.png'
	def preview_excel(file):
	df = pd.read_csv(file.name)
	return df.head(3)
	import gradio as gr
	import base64

	# CSS styles for the interface
	css = """
	body {
	background-color: #f8f9fa;
	font-family: 'Arial', sans-serif;
	}
	#file_input, #external_file_input, #dataframe {
	border: 2px dashed #007bff;
	padding: 20px;
	border-radius: 10px;
	background-color: #fff;
	}
	#train_button, #evaluate_button, #dataframe_button {
	background-color: #007bff;
	color: gray; /* Changed to white for better contrast */
	font-size: 18px;
	border-radius: 5px;
	margin-top: 10px;
	transition: background-color 0.3s;
	}
	#train_button:hover, #evaluate_button:hover, #dataframe_button:hover {
	background-color: #0056b3;
	}
	#roc_image, #calibration_image, #decision_image, #external_eval_image1, #external_eval_image2, #external_eval_image3 {
	border: 1px solid #ddd;
	border-radius: 10px;
	padding: 10px;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
	}
	h1 {
	color: blue;
	text-align: center;
	font-size: 28px;
	}
	h2 {
	color: #007bff;
	text-align: center;
	}
	p {
	color: #555;
	text-align: center;
	}
	.spinner {
	display: none;
	text-align: center;
	margin-top: 20px;
	}
	"""

	# Load and encode the background image
	with open("D:/Haoran/科研/毕设/分析/模型部署/automl6.png", "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode()

	# Create the HTML layout with a background image
	background_image = f"""
	<div style="position: relative; height: 30vh;">
	<div style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; background-image: url('data:image/jpeg;base64,{encoded_string}'); background-size: contain; background-repeat: no-repeat; background-position: center; opacity: 0.7;">
	</div>
	<div style="position: absolute; top: 85%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
	<h1 style="color: blue; font-weight: bold; font-size: 45px; white-space: nowrap;">Clinical Prediction Model Training and Evaluation based on AutoML</h1>
	<p>Upload your CSV file with a 'hospital_expire_flag' column for binary classification. The tool will train a model, evaluate it, and display ROC, Calibration, Decision curves and Feature Importance plot.</p>
	</div>
	</div>
	"""

	# Create Gradio Blocks interface
	with gr.Blocks(css=css) as interface:
	gr.HTML(background_image)

	with gr.Row():
	file_input = gr.File(label='Upload Model Training CSV File', elem_id="file_input")

	pre_button = gr.Button('Preview of the First 3 Rows', elem_id='dataframe_button')

	with gr.Row():
	dataframe = gr.DataFrame(elem_id='dataframe')

	pre_button.click(fn=preview_excel, inputs=file_input, outputs=dataframe)

	train_button = gr.Button("Train and Internal Evaluate", elem_id="train_button")

	with gr.Row():
	img1 = gr.Image(label="ROC Curve", type='filepath', elem_id="roc_image")
	img2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="calibration_image")
	img3 = gr.Image(label="Decision Curve", type='filepath', elem_id="decision_image")
	img4 = gr.Image(label="Feature Importance", type='filepath', elem_id="feature_importance_image")

	spinner = gr.Markdown("<div class='spinner'>Training model... Please wait...</div>")


	def handle_click(file):
	spinner.update(value="正在训练模型，请稍候...", visible=True)
	try:
	results = train_and_evaluate(file)
	return results
	except Exception as e:
	return f"训练失败: {str(e)}"
	finally:
	spinner.update(visible=False)
	train_button.click(fn=handle_click, inputs=file_input, outputs=[img1, img2, img3, img4])
	# External evaluation section
	gr.Markdown("<h2 style='text-align: center;'>External Evaluation</h2>")
	external_file_input = gr.File(label='Upload External Evaluation CSV File', elem_id="external_file_input")
	evaluate_button = gr.Button("External Evaluate", elem_id="evaluate_button")
	with gr.Row():
	external_eval_image1 = gr.Image(label="ROC Curve", type='filepath', elem_id="external_eval_image1")
	external_eval_image2 = gr.Image(label="Calibration Curve", type='filepath', elem_id="external_eval_image2")
	external_eval_image3 = gr.Image(label="Decision Curve", type='filepath', elem_id="external_eval_image3")
	external_eval_image4 = gr.Image(label="Feature Importance", type='filepath', elem_id="external_eval_image4")
	def evaluate_click(file):
	spinner.update(value="正在进行外部评估，请稍候...", visible=True)
	try:
	results = external_evaluate(file)
	return results
	except Exception as e:
	return f"外部评估失败: {str(e)}"
	finally:
	spinner.update(visible=False)
	evaluate_button.click(fn=evaluate_click, inputs=external_file_input, outputs=[external_eval_image1, external_eval_image2, external_eval_image3, external_eval_image4])
	# Launch the interface
	interface.launch()