Spaces:

wjc23
/

VPA_predict

No application file

App Files Files Community

VPA_predict / storage.py

wjc23

Upload 4 files

edff4f8 verified over 1 year ago

raw

history blame contribute delete

8.52 kB

	# -- coding: utf-8 --
	"""
	Created on Tue Dec 26 21:49:46 2023

	@author: admin
	"""
	import pandas as pd
	import numpy as np
	from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
	from sklearn.preprocessing import MinMaxScaler
	import matplotlib.pyplot as plt


	def preprocess_data(filepath,form):
	df=pd.read_excel(filepath)
	df = df[df['TAD'] >= 4]
	df_form1 = df[df['form'] == 1]
	df_form2 = df[df['form'] == 2]

	if form==1:
	return df_form1
	elif form==0:
	return df
	else:
	return df_form2

	def process_train_data(df,form_type,output_type):
	y = df.iloc[:, 3].values
	form = df.iloc[:, 4].values
	gend = df.iloc[:, 5].values
	BSA = df.iloc[:, 6].values
	zyme = df.iloc[:, 7].values
	age = df.iloc[:, 8].values
	t = df.iloc[:, 1].values
	AMT = df.iloc[:, 2].values

	# Reshaping data
	AMT = np.reshape(AMT, (-1))
	BSA = np.reshape(BSA, (-1, 1))
	t = np.reshape(t, (-1, 1))
	form = np.reshape(form, (-1, 1))
	gend = np.reshape(gend, (-1, 1))
	zyme = np.reshape(zyme, (-1, 1))
	age = np.reshape(age, (-1, 1))


	k_train = -(np.log(y / AMT))
	if output_type==1:
	k_train = -(np.log(y))
	elif output_type==2:
	k_train = -(np.log(y/AMT))

	AMT1 = np.reshape(AMT, (-1,1))
	max_k = np.max(k_train)
	min_k = np.min(k_train)
	y = np.reshape(y, (-1, 1))
	# train_out_normalized = k_train
	train_out_normalized = (k_train - min_k) / (max_k - min_k)
	# train_out_normalized = one_hot_encode(train_out_normalized,10)
	train_out_normalized = np.reshape(train_out_normalized,(-1,1))
	# min_max_scaler = MinMaxScaler()

	# # Fit the scaler on the features and transform
	# train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)




	if output_type==1:
	train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
	elif output_type==2:
	train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
	else:
	train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)


	if output_type==1:
	return train_in_normalized,train_out_normalized,max_k,min_k,AMT
	elif output_type==2:
	return train_in_normalized,train_out_normalized,max_k,min_k,AMT
	else:
	return train_in_normalized,y,max_k,min_k,AMT

	def process_train_data_DNN(df,form_type,output_type):
	y = df.iloc[:, 3].values
	form = df.iloc[:, 4].values
	gend = df.iloc[:, 5].values
	BSA = df.iloc[:, 6].values
	zyme = df.iloc[:, 7].values
	age = df.iloc[:, 8].values
	t = df.iloc[:, 1].values
	AMT = df.iloc[:, 2].values

	# Reshaping data
	AMT = np.reshape(AMT, (-1))
	BSA = np.reshape(BSA, (-1, 1))
	t = np.reshape(t, (-1, 1))
	form = np.reshape(form, (-1, 1))
	gend = np.reshape(gend, (-1, 1))
	zyme = np.reshape(zyme, (-1, 1))
	age = np.reshape(age, (-1, 1))
	max_AMT = np.max(AMT)
	min_AMT = np.min(AMT)


	k_train = -(np.log(y / AMT))
	if output_type==1:
	k_train = -(np.log(y))*1.
	elif output_type==2:
	k_train = -(np.log(y*5/AMT))

	# AMT = (AMT-min_AMT)/(max_AMT-min_AMT)
	AMT1 = np.reshape(AMT, (-1,1))
	max_k = np.max(k_train)
	min_k = np.min(k_train)


	y = np.reshape(y, (-1, 1))
	# train_out_normalized = k_train
	train_out_normalized = (k_train - min_k) / (max_k - min_k)
	# train_out_normalized = one_hot_encode(train_out_normalized,10)
	# train_out_normalized = np.reshape(train_out_normalized,(-1,1))
	# min_max_scaler = MinMaxScaler()
	# Fit the scaler on the features and transform
	# train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)


	if output_type==1:
	train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
	elif output_type==2:
	train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1)
	else:
	train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)


	if output_type==1:
	return train_in_normalized,train_out_normalized,max_k,min_k,AMT
	elif output_type==2:
	return train_in_normalized,train_out_normalized,max_k,min_k,AMT
	else:
	return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT


	def turn_back_DNN(data,max_k,min_k,train_data,output_type):
	if output_type==1:
	y1=np.reshape(data,-1)
	y1=y1*(max_k-min_k)+min_k
	AMT=train_data[:,3]
	# print(np.shape(AMT))
	# AMT = np.reshape(AMT, (-1))
	# y_1=AMT*np.exp(-y1);
	y_1=np.exp(-y1)/1;
	# y_1=y1
	elif output_type==2:
	y1=np.reshape(data,-1)
	y1=y1*(max_k-min_k)+min_k
	AMT=train_data[:,3]
	# print(np.shape(AMT))
	# AMT = np.reshape(AMT, (-1))
	y_1=AMT*np.exp(-y1)/5;#6
	else:
	y_1=data/1.
	return y_1



	def turn_back(data,max_k,min_k,train_data,output_type):
	if output_type==1:
	y1=np.reshape(data,-1)
	y1=y1*(max_k-min_k)+min_k
	AMT=train_data[:,2]
	# print(np.shape(AMT))
	# AMT = np.reshape(AMT, (-1))
	# y_1=AMT*np.exp(-y1)/6;
	y_1=np.exp(-y1)/1.25;
	# y_1=y1
	elif output_type==2:
	y1=np.reshape(data,-1)
	y1=y1*(max_k-min_k)+min_k
	AMT=train_data[:,2]
	# print(np.shape(AMT))
	# AMT = np.reshape(AMT, (-1))
	y_1=AMT*np.exp(-y1)/1;
	else:
	y_1=data/1.
	return y_1

	def result_output(train_y,y_train_pre):
	mse = mean_squared_error(train_y,y_train_pre)
	rmse = mean_squared_error(train_y,y_train_pre, squared=False)
	r2 = r2_score(train_y,y_train_pre)
	mae = mean_absolute_error(train_y,y_train_pre)

	print('train_MSE:', mse)
	print('train_RMSE:', rmse)
	print('train_R-squared:', r2)
	print('train_MAE:', mae)

	def one_hot_encode(values, num_classes=10):

	interval = 1 / num_classes

	# 计算每个值所属的类别
	categories = np.floor(values / interval).astype(int)
	categories[categories == num_classes] = num_classes - 1 # 处理边界情况

	# 应用one-hot编码
	one_hot_encoded = np.eye(num_classes)[categories]

	return one_hot_encoded



	def cal_accuracy(y_pred,test_y):


	# within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
	within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
	within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
	# print("within_10_percent:",within_10_percent*100)
	print("within_20_percent:",within_20_percent*100)
	print("within_30_percent:",within_30_percent*100)

	def draw_acc(train_y, y_train_pre,txt=None):
	fig, ax = plt.subplots()

	# Scatter plot: Actual vs Predicted Drug Concentrations
	ax.scatter(y_train_pre, train_y, s=10, label='Observations')

	# Set labels for x and y axes
	ax.set_xlabel('Predicted Concentration')
	ax.set_ylabel('Measured Concentration')
	ax.grid(True)
	# Generate data for the line and tolerance areas
	x = np.linspace(0, 100, 500)
	# y = x
	y_20_upper = x * 1.2
	y_20_lower = x * 0.8
	y_30_upper = x * 1.3
	y_30_lower = x * 0.7

	# Draw y=x line (Perfect Prediction Line)
	# ax.plot(x, y, color='black', label='Perfect Prediction Line y=x')

	# Draw 20% tolerance lines in blue
	ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound')
	ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound')

	# Draw 30% tolerance lines in red
	ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound')
	ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound')

	# Fill areas between 20% and 30% tolerance bands with lighter color
	ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1)
	ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1)
	ax.set_xlim([-5, 100])
	# Add legend
	ax.legend()
	fig.set_facecolor('white')

	# Display the plot
	# ax.show()
	# ax.savefig(txt, dpi=600,format='svg')
	if txt!=None:

	fig.savefig(txt, dpi=300, format='tif')

	# 然后显示图表
	plt.show()