VPA_predict / storage.py
wjc23's picture
Upload 4 files
edff4f8 verified
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 26 21:49:46 2023
@author: admin
"""
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
def preprocess_data(filepath,form):
df=pd.read_excel(filepath)
df = df[df['TAD'] >= 4]
df_form1 = df[df['form'] == 1]
df_form2 = df[df['form'] == 2]
if form==1:
return df_form1
elif form==0:
return df
else:
return df_form2
def process_train_data(df,form_type,output_type):
y = df.iloc[:, 3].values
form = df.iloc[:, 4].values
gend = df.iloc[:, 5].values
BSA = df.iloc[:, 6].values
zyme = df.iloc[:, 7].values
age = df.iloc[:, 8].values
t = df.iloc[:, 1].values
AMT = df.iloc[:, 2].values
# Reshaping data
AMT = np.reshape(AMT, (-1))
BSA = np.reshape(BSA, (-1, 1))
t = np.reshape(t, (-1, 1))
form = np.reshape(form, (-1, 1))
gend = np.reshape(gend, (-1, 1))
zyme = np.reshape(zyme, (-1, 1))
age = np.reshape(age, (-1, 1))
k_train = -(np.log(y / AMT))
if output_type==1:
k_train = -(np.log(y))
elif output_type==2:
k_train = -(np.log(y/AMT))
AMT1 = np.reshape(AMT, (-1,1))
max_k = np.max(k_train)
min_k = np.min(k_train)
y = np.reshape(y, (-1, 1))
# train_out_normalized = k_train
train_out_normalized = (k_train - min_k) / (max_k - min_k)
# train_out_normalized = one_hot_encode(train_out_normalized,10)
train_out_normalized = np.reshape(train_out_normalized,(-1,1))
# min_max_scaler = MinMaxScaler()
# # Fit the scaler on the features and transform
# train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
if output_type==1:
train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
elif output_type==2:
train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
else:
train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
if output_type==1:
return train_in_normalized,train_out_normalized,max_k,min_k,AMT
elif output_type==2:
return train_in_normalized,train_out_normalized,max_k,min_k,AMT
else:
return train_in_normalized,y,max_k,min_k,AMT
def process_train_data_DNN(df,form_type,output_type):
y = df.iloc[:, 3].values
form = df.iloc[:, 4].values
gend = df.iloc[:, 5].values
BSA = df.iloc[:, 6].values
zyme = df.iloc[:, 7].values
age = df.iloc[:, 8].values
t = df.iloc[:, 1].values
AMT = df.iloc[:, 2].values
# Reshaping data
AMT = np.reshape(AMT, (-1))
BSA = np.reshape(BSA, (-1, 1))
t = np.reshape(t, (-1, 1))
form = np.reshape(form, (-1, 1))
gend = np.reshape(gend, (-1, 1))
zyme = np.reshape(zyme, (-1, 1))
age = np.reshape(age, (-1, 1))
max_AMT = np.max(AMT)
min_AMT = np.min(AMT)
k_train = -(np.log(y / AMT))
if output_type==1:
k_train = -(np.log(y))*1.
elif output_type==2:
k_train = -(np.log(y*5/AMT))
# AMT = (AMT-min_AMT)/(max_AMT-min_AMT)
AMT1 = np.reshape(AMT, (-1,1))
max_k = np.max(k_train)
min_k = np.min(k_train)
y = np.reshape(y, (-1, 1))
# train_out_normalized = k_train
train_out_normalized = (k_train - min_k) / (max_k - min_k)
# train_out_normalized = one_hot_encode(train_out_normalized,10)
# train_out_normalized = np.reshape(train_out_normalized,(-1,1))
# min_max_scaler = MinMaxScaler()
# Fit the scaler on the features and transform
# train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
if output_type==1:
train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
elif output_type==2:
train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1)
else:
train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
if output_type==1:
return train_in_normalized,train_out_normalized,max_k,min_k,AMT
elif output_type==2:
return train_in_normalized,train_out_normalized,max_k,min_k,AMT
else:
return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT
def turn_back_DNN(data,max_k,min_k,train_data,output_type):
if output_type==1:
y1=np.reshape(data,-1)
y1=y1*(max_k-min_k)+min_k
AMT=train_data[:,3]
# print(np.shape(AMT))
# AMT = np.reshape(AMT, (-1))
# y_1=AMT*np.exp(-y1);
y_1=np.exp(-y1)/1;
# y_1=y1
elif output_type==2:
y1=np.reshape(data,-1)
y1=y1*(max_k-min_k)+min_k
AMT=train_data[:,3]
# print(np.shape(AMT))
# AMT = np.reshape(AMT, (-1))
y_1=AMT*np.exp(-y1)/5;#6
else:
y_1=data/1.
return y_1
def turn_back(data,max_k,min_k,train_data,output_type):
if output_type==1:
y1=np.reshape(data,-1)
y1=y1*(max_k-min_k)+min_k
AMT=train_data[:,2]
# print(np.shape(AMT))
# AMT = np.reshape(AMT, (-1))
# y_1=AMT*np.exp(-y1)/6;
y_1=np.exp(-y1)/1.25;
# y_1=y1
elif output_type==2:
y1=np.reshape(data,-1)
y1=y1*(max_k-min_k)+min_k
AMT=train_data[:,2]
# print(np.shape(AMT))
# AMT = np.reshape(AMT, (-1))
y_1=AMT*np.exp(-y1)/1;
else:
y_1=data/1.
return y_1
def result_output(train_y,y_train_pre):
mse = mean_squared_error(train_y,y_train_pre)
rmse = mean_squared_error(train_y,y_train_pre, squared=False)
r2 = r2_score(train_y,y_train_pre)
mae = mean_absolute_error(train_y,y_train_pre)
print('train_MSE:', mse)
print('train_RMSE:', rmse)
print('train_R-squared:', r2)
print('train_MAE:', mae)
def one_hot_encode(values, num_classes=10):
interval = 1 / num_classes
# 计算每个值所属的类别
categories = np.floor(values / interval).astype(int)
categories[categories == num_classes] = num_classes - 1 # 处理边界情况
# 应用one-hot编码
one_hot_encoded = np.eye(num_classes)[categories]
return one_hot_encoded
def cal_accuracy(y_pred,test_y):
# within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
# print("within_10_percent:",within_10_percent*100)
print("within_20_percent:",within_20_percent*100)
print("within_30_percent:",within_30_percent*100)
def draw_acc(train_y, y_train_pre,txt=None):
fig, ax = plt.subplots()
# Scatter plot: Actual vs Predicted Drug Concentrations
ax.scatter(y_train_pre, train_y, s=10, label='Observations')
# Set labels for x and y axes
ax.set_xlabel('Predicted Concentration')
ax.set_ylabel('Measured Concentration')
ax.grid(True)
# Generate data for the line and tolerance areas
x = np.linspace(0, 100, 500)
# y = x
y_20_upper = x * 1.2
y_20_lower = x * 0.8
y_30_upper = x * 1.3
y_30_lower = x * 0.7
# Draw y=x line (Perfect Prediction Line)
# ax.plot(x, y, color='black', label='Perfect Prediction Line y=x')
# Draw 20% tolerance lines in blue
ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound')
ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound')
# Draw 30% tolerance lines in red
ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound')
ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound')
# Fill areas between 20% and 30% tolerance bands with lighter color
ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1)
ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1)
ax.set_xlim([-5, 100])
# Add legend
ax.legend()
fig.set_facecolor('white')
# Display the plot
# ax.show()
# ax.savefig(txt, dpi=600,format='svg')
if txt!=None:
fig.savefig(txt, dpi=300, format='tif')
# 然后显示图表
plt.show()