Spaces:

wjc23
/

VPA_predict

No application file

App Files Files Community

wjc23 commited on Apr 18, 2024

Commit

edff4f8

verified ·

1 Parent(s): bdfa5fd

Upload 4 files

Browse files

Files changed (4) hide show

CNN_model.py +126 -0
gradi.py +40 -0
model.zip +3 -0
storage.py +269 -0

CNN_model.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr  1 22:06:46 2024
+@author: admin
+"""
+import tensorflow as tf
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from storage import result_output,preprocess_data,process_train_data,turn_back,result_output,draw_acc,cal_accuracy
+filepath='data/VPA10.8.xlsx'
+df=pd.read_excel(filepath)
+df.columns = df.columns.str.replace('[{}:]', '')
+# 示例：确保有效标识符
+df.columns = df.columns.str.replace(' ', '_')  # 将空格替换为下划线
+df.columns = df.columns.str.replace('^[0-9]', 'X')  # 如果以数字开头，则在前面添加字符 'X'
+# 示例：删除特殊字符
+df.columns = df.columns.str.replace('[^a-zA-Z0-9_]', '')
+result = df.groupby('ID')['DV'].count().reset_index(name='Count')
+# 过滤出Count大于1的记录的ID
+filtered_ids = result[result['Count'] >= 1]['ID']
+# 保留ID在filtered_ids中的记录，并将AMT值设为上一行的AMT值
+filtered_df = df[df['ID'].isin(filtered_ids)]
+filtered_df['AMT'] = filtered_df['AMT'].fillna(filtered_df.groupby('ID')['AMT'].shift())
+filtered_df = filtered_df.dropna(subset=['DV'])
+samples_train = []
+samples_val = []
+samples_tr = []
+# 获取 'AMT' 特征的最小值和最大值
+min_amt = filtered_df['AMT'].min()
+max_amt = filtered_df['AMT'].max()
+min_dv = filtered_df['DV'].min()
+max_dv = filtered_df['DV'].max()
+filtered_df['BSA_square'] = filtered_df['BSA'] ** 2
+filtered_df['BSA_cubic'] = filtered_df['BSA'] ** 3
+filtered_df['AMT'] = np.log(filtered_df['AMT'])
+for id_value, group in filtered_df.groupby('ID'):
+    count = group['DV'].count()
+    if count >= 1:
+        # for i in range(count - 1):
+            i=count - 2
+            input_features = group.iloc[i + 1][['AMT', 't','BSA','BW','age','height']].tolist()
+            output_feature = group.iloc[i+1]['DV']
+            if group.iloc[i + 1]['ID']>290:
+                samples_val.append((input_features, output_feature))
+            else:
+                samples_train.append((input_features, output_feature))
+# 提取输入特征和输出特征
+X = [input_features for input_features, _ in samples_train]
+y = [output_feature for _, output_feature in samples_train]
+val_x = [input_features for input_features, _ in samples_val]
+val_y = [output_feature for _, output_feature in samples_val]
+train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.1)
+save_path = 'model/model_CNN'
+# save_path = 'C:/Users/admin/Desktop/药物建模/VPA手稿/model/model_CNN'
+loaded_model = tf.saved_model.load(save_path)
+model_pre = loaded_model.signatures['serving_default']
+train_x = np.array(train_x).reshape(-1, 6, 1)
+test_x = np.array(test_x).reshape(-1, 6, 1)
+val_x = np.array(val_x).reshape(-1, 6, 1)
+train_predictions = model_pre(tf.constant(train_x, dtype=tf.float32))
+train_predictions = train_predictions['dense_5'].numpy()
+test_predictions = model_pre(tf.constant(test_x, dtype=tf.float32))
+val_predictions = model_pre(tf.constant(val_x, dtype=tf.float32))
+val_predictions = val_predictions['dense_5'].numpy()
+train_y = np.reshape(train_y,(-1,1))
+test_y = np.reshape(test_y,(-1,1))
+val_y = np.reshape(val_y,(-1,1))
+cal_accuracy(train_predictions,train_y)
+cal_accuracy(val_predictions,val_y)
+import gradio as gr
+import tensorflow as tf
+import numpy as np
+# 加载 TensorFlow 模型
+model_path = 'model/model_CNN'
+loaded_model = tf.saved_model.load(model_path)
+model_predict = loaded_model.signatures['serving_default']
+def predict(AMT, t, BSA, BW, age, height):
+    # 格式化输入数据以匹配模型的输入格式
+    input_features = np.array([[np.log(AMT), t, BSA, BW, age, height]], dtype=float).reshape(1, 6, 1)
+    predictions = model_predict(tf.constant(input_features, dtype=tf.float32))['dense_5'].numpy()
+    return predictions.flatten()[0]
+# 创建 Gradio 界面
+iface = gr.Interface(
+    fn=predict,
+    inputs=[gr.Number(label='AMT', default=1.0),
+            gr.Number(label='t', default=1.0),
+            gr.Number(label='BSA', default=1.0),
+            gr.Number(label='BW', default=1.0),
+            gr.Number(label='age', default=30),
+            gr.Number(label='height', default=160)],
+    outputs='text',
+    title="Drug Response Prediction",
+    description="Enter the values for AMT, t, BSA, BW, age, and height to predict the drug response."
+)
+iface.launch()

gradi.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 17 21:57:52 2024
+@author: admin
+"""
+import gradio as gr
+import tensorflow as tf
+import numpy as np
+# 加载 TensorFlow 模型
+model_path = 'model/model_CNN'
+loaded_model = tf.saved_model.load(model_path)
+model_predict = loaded_model.signatures['serving_default']
+# 假设我们有一个处理这些输入的函数
+def process_inputs(AMT, t, BSA, BW, age, height):
+    # 在这里执行您的逻辑，比如模型预测、计算等
+    input_features = np.array([[AMT, t, BSA, BW, age, height]], dtype=float).reshape(-1, 6, 1)
+    predictions = model_predict(tf.constant(input_features, dtype=tf.float32))['dense_5'].numpy()
+    return predictions
+# 创建Gradio界面
+description = "Enter values for AMT, t, BSA, BW, age, and height."
+interface = gr.Interface(
+    fn=process_inputs,
+    inputs=[
+        gr.Number(label="AMT"),
+        gr.Number(label="t"),
+        gr.Number(label="BSA"),
+        gr.Number(label="BW"),
+        gr.Number(label="Age"),
+        gr.Number(label="Height")
+    ],
+    outputs="text",
+    description=description,
+    title="Input Processing Interface"
+)
+interface.launch()

model.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83ca484cc4bc70aec34d97563b2b9c1e37e4dff20a16014cde6b161d6a9b87d5
+size 1048745

storage.py ADDED Viewed

	@@ -0,0 +1,269 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Tue Dec 26 21:49:46 2023
+@author: admin
+"""
+import pandas as pd
+import numpy as np
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from sklearn.preprocessing import MinMaxScaler
+import matplotlib.pyplot as plt
+def preprocess_data(filepath,form):
+    df=pd.read_excel(filepath)
+    df = df[df['TAD'] >= 4]
+    df_form1 = df[df['form'] == 1]
+    df_form2 = df[df['form'] == 2]
+    if form==1:
+        return df_form1
+    elif form==0:
+        return df
+    else:
+        return df_form2
+def process_train_data(df,form_type,output_type):
+    y = df.iloc[:, 3].values
+    form = df.iloc[:, 4].values
+    gend = df.iloc[:, 5].values
+    BSA = df.iloc[:, 6].values
+    zyme = df.iloc[:, 7].values
+    age = df.iloc[:, 8].values
+    t = df.iloc[:, 1].values
+    AMT = df.iloc[:, 2].values
+    # Reshaping data
+    AMT = np.reshape(AMT, (-1))
+    BSA = np.reshape(BSA, (-1, 1))
+    t = np.reshape(t, (-1, 1))
+    form = np.reshape(form, (-1, 1))
+    gend = np.reshape(gend, (-1, 1))
+    zyme = np.reshape(zyme, (-1, 1))
+    age = np.reshape(age, (-1, 1))
+    k_train = -(np.log(y / AMT))
+    if output_type==1:
+        k_train = -(np.log(y))
+    elif output_type==2:
+        k_train = -(np.log(y/AMT))
+    AMT1 = np.reshape(AMT, (-1,1))
+    max_k = np.max(k_train)
+    min_k = np.min(k_train)
+    y = np.reshape(y, (-1, 1))
+    # train_out_normalized = k_train
+    train_out_normalized = (k_train - min_k) / (max_k - min_k)
+    # train_out_normalized = one_hot_encode(train_out_normalized,10)
+    train_out_normalized = np.reshape(train_out_normalized,(-1,1))
+    # min_max_scaler = MinMaxScaler()
+    # # Fit the scaler on the features and transform
+    # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
+    if output_type==1:
+        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
+    elif output_type==2:
+        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
+    else:
+        train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
+    if output_type==1:
+        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
+    elif output_type==2:
+        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
+    else:
+        return train_in_normalized,y,max_k,min_k,AMT
+def process_train_data_DNN(df,form_type,output_type):
+    y = df.iloc[:, 3].values
+    form = df.iloc[:, 4].values
+    gend = df.iloc[:, 5].values
+    BSA = df.iloc[:, 6].values
+    zyme = df.iloc[:, 7].values
+    age = df.iloc[:, 8].values
+    t = df.iloc[:, 1].values
+    AMT = df.iloc[:, 2].values
+    # Reshaping data
+    AMT = np.reshape(AMT, (-1))
+    BSA = np.reshape(BSA, (-1, 1))
+    t = np.reshape(t, (-1, 1))
+    form = np.reshape(form, (-1, 1))
+    gend = np.reshape(gend, (-1, 1))
+    zyme = np.reshape(zyme, (-1, 1))
+    age = np.reshape(age, (-1, 1))
+    max_AMT = np.max(AMT)
+    min_AMT = np.min(AMT)
+    k_train = -(np.log(y / AMT))
+    if output_type==1:
+        k_train = -(np.log(y))*1.
+    elif output_type==2:
+        k_train = -(np.log(y*5/AMT))
+    # AMT = (AMT-min_AMT)/(max_AMT-min_AMT)
+    AMT1 = np.reshape(AMT, (-1,1))
+    max_k = np.max(k_train)
+    min_k = np.min(k_train)
+    y = np.reshape(y, (-1, 1))
+    # train_out_normalized = k_train
+    train_out_normalized = (k_train - min_k) / (max_k - min_k)
+    # train_out_normalized = one_hot_encode(train_out_normalized,10)
+    # train_out_normalized = np.reshape(train_out_normalized,(-1,1))
+    # min_max_scaler = MinMaxScaler()
+    # Fit the scaler on the features and transform
+    # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
+    if output_type==1:
+        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
+    elif output_type==2:
+        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1)
+    else:
+        train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
+    if output_type==1:
+        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
+    elif output_type==2:
+        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
+    else:
+        return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT
+def turn_back_DNN(data,max_k,min_k,train_data,output_type):
+    if output_type==1:
+        y1=np.reshape(data,-1)
+        y1=y1*(max_k-min_k)+min_k
+        AMT=train_data[:,3]
+        # print(np.shape(AMT))
+        # AMT = np.reshape(AMT, (-1))
+        # y_1=AMT*np.exp(-y1);
+        y_1=np.exp(-y1)/1;
+        # y_1=y1
+    elif output_type==2:
+        y1=np.reshape(data,-1)
+        y1=y1*(max_k-min_k)+min_k
+        AMT=train_data[:,3]
+        # print(np.shape(AMT))
+        # AMT = np.reshape(AMT, (-1))
+        y_1=AMT*np.exp(-y1)/5;#6
+    else:
+        y_1=data/1.
+    return y_1
+def turn_back(data,max_k,min_k,train_data,output_type):
+    if output_type==1:
+        y1=np.reshape(data,-1)
+        y1=y1*(max_k-min_k)+min_k
+        AMT=train_data[:,2]
+        # print(np.shape(AMT))
+        # AMT = np.reshape(AMT, (-1))
+        # y_1=AMT*np.exp(-y1)/6;
+        y_1=np.exp(-y1)/1.25;
+        # y_1=y1
+    elif output_type==2:
+        y1=np.reshape(data,-1)
+        y1=y1*(max_k-min_k)+min_k
+        AMT=train_data[:,2]
+        # print(np.shape(AMT))
+        # AMT = np.reshape(AMT, (-1))
+        y_1=AMT*np.exp(-y1)/1;
+    else:
+        y_1=data/1.
+    return y_1
+def result_output(train_y,y_train_pre):
+    mse = mean_squared_error(train_y,y_train_pre)
+    rmse = mean_squared_error(train_y,y_train_pre, squared=False)
+    r2 = r2_score(train_y,y_train_pre)
+    mae = mean_absolute_error(train_y,y_train_pre)
+    print('train_MSE:', mse)
+    print('train_RMSE:', rmse)
+    print('train_R-squared:', r2)
+    print('train_MAE:', mae)
+def one_hot_encode(values, num_classes=10):
+    interval = 1 / num_classes
+    # 计算每个值所属的类别
+    categories = np.floor(values / interval).astype(int)
+    categories[categories == num_classes] = num_classes - 1  # 处理边界情况
+    # 应用one-hot编码
+    one_hot_encoded = np.eye(num_classes)[categories]
+    return one_hot_encoded
+def cal_accuracy(y_pred,test_y):
+    # within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
+    within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
+    within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
+    # print("within_10_percent:",within_10_percent*100)
+    print("within_20_percent:",within_20_percent*100)
+    print("within_30_percent:",within_30_percent*100)
+def draw_acc(train_y, y_train_pre,txt=None):
+    fig, ax = plt.subplots()
+    # Scatter plot: Actual vs Predicted Drug Concentrations
+    ax.scatter(y_train_pre, train_y, s=10, label='Observations')
+    # Set labels for x and y axes
+    ax.set_xlabel('Predicted Concentration')
+    ax.set_ylabel('Measured Concentration')
+    ax.grid(True)
+    # Generate data for the line and tolerance areas
+    x = np.linspace(0, 100, 500)
+    # y = x
+    y_20_upper = x * 1.2
+    y_20_lower = x * 0.8
+    y_30_upper = x * 1.3
+    y_30_lower = x * 0.7
+    # Draw y=x line (Perfect Prediction Line)
+    # ax.plot(x, y, color='black', label='Perfect Prediction Line y=x')
+    # Draw 20% tolerance lines in blue
+    ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound')
+    ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound')
+    # Draw 30% tolerance lines in red
+    ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound')
+    ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound')
+    # Fill areas between 20% and 30% tolerance bands with lighter color
+    ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1)
+    ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1)
+    ax.set_xlim([-5, 100])
+    # Add legend
+    ax.legend()
+    fig.set_facecolor('white')
+    # Display the plot
+    # ax.show()
+    # ax.savefig(txt, dpi=600,format='svg')
+    if txt!=None:
+        fig.savefig(txt, dpi=300, format='tif')
+    # 然后显示图表
+    plt.show()