Spaces:
No application file
No application file
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Tue Dec 26 21:49:46 2023 | |
| @author: admin | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | |
| from sklearn.preprocessing import MinMaxScaler | |
| import matplotlib.pyplot as plt | |
| def preprocess_data(filepath,form): | |
| df=pd.read_excel(filepath) | |
| df = df[df['TAD'] >= 4] | |
| df_form1 = df[df['form'] == 1] | |
| df_form2 = df[df['form'] == 2] | |
| if form==1: | |
| return df_form1 | |
| elif form==0: | |
| return df | |
| else: | |
| return df_form2 | |
| def process_train_data(df,form_type,output_type): | |
| y = df.iloc[:, 3].values | |
| form = df.iloc[:, 4].values | |
| gend = df.iloc[:, 5].values | |
| BSA = df.iloc[:, 6].values | |
| zyme = df.iloc[:, 7].values | |
| age = df.iloc[:, 8].values | |
| t = df.iloc[:, 1].values | |
| AMT = df.iloc[:, 2].values | |
| # Reshaping data | |
| AMT = np.reshape(AMT, (-1)) | |
| BSA = np.reshape(BSA, (-1, 1)) | |
| t = np.reshape(t, (-1, 1)) | |
| form = np.reshape(form, (-1, 1)) | |
| gend = np.reshape(gend, (-1, 1)) | |
| zyme = np.reshape(zyme, (-1, 1)) | |
| age = np.reshape(age, (-1, 1)) | |
| k_train = -(np.log(y / AMT)) | |
| if output_type==1: | |
| k_train = -(np.log(y)) | |
| elif output_type==2: | |
| k_train = -(np.log(y/AMT)) | |
| AMT1 = np.reshape(AMT, (-1,1)) | |
| max_k = np.max(k_train) | |
| min_k = np.min(k_train) | |
| y = np.reshape(y, (-1, 1)) | |
| # train_out_normalized = k_train | |
| train_out_normalized = (k_train - min_k) / (max_k - min_k) | |
| # train_out_normalized = one_hot_encode(train_out_normalized,10) | |
| train_out_normalized = np.reshape(train_out_normalized,(-1,1)) | |
| # min_max_scaler = MinMaxScaler() | |
| # # Fit the scaler on the features and transform | |
| # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized) | |
| if output_type==1: | |
| train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) | |
| elif output_type==2: | |
| train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) | |
| else: | |
| train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1) | |
| if output_type==1: | |
| return train_in_normalized,train_out_normalized,max_k,min_k,AMT | |
| elif output_type==2: | |
| return train_in_normalized,train_out_normalized,max_k,min_k,AMT | |
| else: | |
| return train_in_normalized,y,max_k,min_k,AMT | |
| def process_train_data_DNN(df,form_type,output_type): | |
| y = df.iloc[:, 3].values | |
| form = df.iloc[:, 4].values | |
| gend = df.iloc[:, 5].values | |
| BSA = df.iloc[:, 6].values | |
| zyme = df.iloc[:, 7].values | |
| age = df.iloc[:, 8].values | |
| t = df.iloc[:, 1].values | |
| AMT = df.iloc[:, 2].values | |
| # Reshaping data | |
| AMT = np.reshape(AMT, (-1)) | |
| BSA = np.reshape(BSA, (-1, 1)) | |
| t = np.reshape(t, (-1, 1)) | |
| form = np.reshape(form, (-1, 1)) | |
| gend = np.reshape(gend, (-1, 1)) | |
| zyme = np.reshape(zyme, (-1, 1)) | |
| age = np.reshape(age, (-1, 1)) | |
| max_AMT = np.max(AMT) | |
| min_AMT = np.min(AMT) | |
| k_train = -(np.log(y / AMT)) | |
| if output_type==1: | |
| k_train = -(np.log(y))*1. | |
| elif output_type==2: | |
| k_train = -(np.log(y*5/AMT)) | |
| # AMT = (AMT-min_AMT)/(max_AMT-min_AMT) | |
| AMT1 = np.reshape(AMT, (-1,1)) | |
| max_k = np.max(k_train) | |
| min_k = np.min(k_train) | |
| y = np.reshape(y, (-1, 1)) | |
| # train_out_normalized = k_train | |
| train_out_normalized = (k_train - min_k) / (max_k - min_k) | |
| # train_out_normalized = one_hot_encode(train_out_normalized,10) | |
| # train_out_normalized = np.reshape(train_out_normalized,(-1,1)) | |
| # min_max_scaler = MinMaxScaler() | |
| # Fit the scaler on the features and transform | |
| # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized) | |
| if output_type==1: | |
| train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) | |
| elif output_type==2: | |
| train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1) | |
| else: | |
| train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1) | |
| if output_type==1: | |
| return train_in_normalized,train_out_normalized,max_k,min_k,AMT | |
| elif output_type==2: | |
| return train_in_normalized,train_out_normalized,max_k,min_k,AMT | |
| else: | |
| return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT | |
| def turn_back_DNN(data,max_k,min_k,train_data,output_type): | |
| if output_type==1: | |
| y1=np.reshape(data,-1) | |
| y1=y1*(max_k-min_k)+min_k | |
| AMT=train_data[:,3] | |
| # print(np.shape(AMT)) | |
| # AMT = np.reshape(AMT, (-1)) | |
| # y_1=AMT*np.exp(-y1); | |
| y_1=np.exp(-y1)/1; | |
| # y_1=y1 | |
| elif output_type==2: | |
| y1=np.reshape(data,-1) | |
| y1=y1*(max_k-min_k)+min_k | |
| AMT=train_data[:,3] | |
| # print(np.shape(AMT)) | |
| # AMT = np.reshape(AMT, (-1)) | |
| y_1=AMT*np.exp(-y1)/5;#6 | |
| else: | |
| y_1=data/1. | |
| return y_1 | |
| def turn_back(data,max_k,min_k,train_data,output_type): | |
| if output_type==1: | |
| y1=np.reshape(data,-1) | |
| y1=y1*(max_k-min_k)+min_k | |
| AMT=train_data[:,2] | |
| # print(np.shape(AMT)) | |
| # AMT = np.reshape(AMT, (-1)) | |
| # y_1=AMT*np.exp(-y1)/6; | |
| y_1=np.exp(-y1)/1.25; | |
| # y_1=y1 | |
| elif output_type==2: | |
| y1=np.reshape(data,-1) | |
| y1=y1*(max_k-min_k)+min_k | |
| AMT=train_data[:,2] | |
| # print(np.shape(AMT)) | |
| # AMT = np.reshape(AMT, (-1)) | |
| y_1=AMT*np.exp(-y1)/1; | |
| else: | |
| y_1=data/1. | |
| return y_1 | |
| def result_output(train_y,y_train_pre): | |
| mse = mean_squared_error(train_y,y_train_pre) | |
| rmse = mean_squared_error(train_y,y_train_pre, squared=False) | |
| r2 = r2_score(train_y,y_train_pre) | |
| mae = mean_absolute_error(train_y,y_train_pre) | |
| print('train_MSE:', mse) | |
| print('train_RMSE:', rmse) | |
| print('train_R-squared:', r2) | |
| print('train_MAE:', mae) | |
| def one_hot_encode(values, num_classes=10): | |
| interval = 1 / num_classes | |
| # 计算每个值所属的类别 | |
| categories = np.floor(values / interval).astype(int) | |
| categories[categories == num_classes] = num_classes - 1 # 处理边界情况 | |
| # 应用one-hot编码 | |
| one_hot_encoded = np.eye(num_classes)[categories] | |
| return one_hot_encoded | |
| def cal_accuracy(y_pred,test_y): | |
| # within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) | |
| within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) | |
| within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) | |
| # print("within_10_percent:",within_10_percent*100) | |
| print("within_20_percent:",within_20_percent*100) | |
| print("within_30_percent:",within_30_percent*100) | |
| def draw_acc(train_y, y_train_pre,txt=None): | |
| fig, ax = plt.subplots() | |
| # Scatter plot: Actual vs Predicted Drug Concentrations | |
| ax.scatter(y_train_pre, train_y, s=10, label='Observations') | |
| # Set labels for x and y axes | |
| ax.set_xlabel('Predicted Concentration') | |
| ax.set_ylabel('Measured Concentration') | |
| ax.grid(True) | |
| # Generate data for the line and tolerance areas | |
| x = np.linspace(0, 100, 500) | |
| # y = x | |
| y_20_upper = x * 1.2 | |
| y_20_lower = x * 0.8 | |
| y_30_upper = x * 1.3 | |
| y_30_lower = x * 0.7 | |
| # Draw y=x line (Perfect Prediction Line) | |
| # ax.plot(x, y, color='black', label='Perfect Prediction Line y=x') | |
| # Draw 20% tolerance lines in blue | |
| ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound') | |
| ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound') | |
| # Draw 30% tolerance lines in red | |
| ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound') | |
| ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound') | |
| # Fill areas between 20% and 30% tolerance bands with lighter color | |
| ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1) | |
| ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1) | |
| ax.set_xlim([-5, 100]) | |
| # Add legend | |
| ax.legend() | |
| fig.set_facecolor('white') | |
| # Display the plot | |
| # ax.show() | |
| # ax.savefig(txt, dpi=600,format='svg') | |
| if txt!=None: | |
| fig.savefig(txt, dpi=300, format='tif') | |
| # 然后显示图表 | |
| plt.show() |