wjc23 commited on
Commit
edff4f8
·
verified ·
1 Parent(s): bdfa5fd

Upload 4 files

Browse files
Files changed (4) hide show
  1. CNN_model.py +126 -0
  2. gradi.py +40 -0
  3. model.zip +3 -0
  4. storage.py +269 -0
CNN_model.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Apr 1 22:06:46 2024
4
+
5
+ @author: admin
6
+ """
7
+
8
+ import tensorflow as tf
9
+ import pandas as pd
10
+ import numpy as np
11
+ from sklearn.model_selection import train_test_split
12
+
13
+ from storage import result_output,preprocess_data,process_train_data,turn_back,result_output,draw_acc,cal_accuracy
14
+
15
+ filepath='data/VPA10.8.xlsx'
16
+ df=pd.read_excel(filepath)
17
+ df.columns = df.columns.str.replace('[{}:]', '')
18
+ # 示例:确保有效标识符
19
+ df.columns = df.columns.str.replace(' ', '_') # 将空格替换为下划线
20
+ df.columns = df.columns.str.replace('^[0-9]', 'X') # 如果以数字开头,则在前面添加字符 'X'
21
+ # 示例:删除特殊字符
22
+ df.columns = df.columns.str.replace('[^a-zA-Z0-9_]', '')
23
+
24
+
25
+ result = df.groupby('ID')['DV'].count().reset_index(name='Count')
26
+
27
+ # 过滤出Count大于1的记录的ID
28
+ filtered_ids = result[result['Count'] >= 1]['ID']
29
+
30
+ # 保留ID在filtered_ids中的记录,并将AMT值设为上一行的AMT值
31
+ filtered_df = df[df['ID'].isin(filtered_ids)]
32
+ filtered_df['AMT'] = filtered_df['AMT'].fillna(filtered_df.groupby('ID')['AMT'].shift())
33
+ filtered_df = filtered_df.dropna(subset=['DV'])
34
+
35
+ samples_train = []
36
+ samples_val = []
37
+ samples_tr = []
38
+ # 获取 'AMT' 特征的最小值和最大值
39
+ min_amt = filtered_df['AMT'].min()
40
+ max_amt = filtered_df['AMT'].max()
41
+ min_dv = filtered_df['DV'].min()
42
+ max_dv = filtered_df['DV'].max()
43
+ filtered_df['BSA_square'] = filtered_df['BSA'] ** 2
44
+ filtered_df['BSA_cubic'] = filtered_df['BSA'] ** 3
45
+ filtered_df['AMT'] = np.log(filtered_df['AMT'])
46
+
47
+ for id_value, group in filtered_df.groupby('ID'):
48
+ count = group['DV'].count()
49
+ if count >= 1:
50
+ # for i in range(count - 1):
51
+ i=count - 2
52
+ input_features = group.iloc[i + 1][['AMT', 't','BSA','BW','age','height']].tolist()
53
+
54
+ output_feature = group.iloc[i+1]['DV']
55
+
56
+
57
+ if group.iloc[i + 1]['ID']>290:
58
+ samples_val.append((input_features, output_feature))
59
+ else:
60
+ samples_train.append((input_features, output_feature))
61
+
62
+
63
+ # 提取输入特征和输出特征
64
+ X = [input_features for input_features, _ in samples_train]
65
+ y = [output_feature for _, output_feature in samples_train]
66
+ val_x = [input_features for input_features, _ in samples_val]
67
+ val_y = [output_feature for _, output_feature in samples_val]
68
+
69
+ train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.1)
70
+
71
+
72
+ save_path = 'model/model_CNN'
73
+ # save_path = 'C:/Users/admin/Desktop/药物建模/VPA手稿/model/model_CNN'
74
+
75
+ loaded_model = tf.saved_model.load(save_path)
76
+ model_pre = loaded_model.signatures['serving_default']
77
+
78
+ train_x = np.array(train_x).reshape(-1, 6, 1)
79
+ test_x = np.array(test_x).reshape(-1, 6, 1)
80
+ val_x = np.array(val_x).reshape(-1, 6, 1)
81
+ train_predictions = model_pre(tf.constant(train_x, dtype=tf.float32))
82
+ train_predictions = train_predictions['dense_5'].numpy()
83
+ test_predictions = model_pre(tf.constant(test_x, dtype=tf.float32))
84
+ val_predictions = model_pre(tf.constant(val_x, dtype=tf.float32))
85
+ val_predictions = val_predictions['dense_5'].numpy()
86
+
87
+ train_y = np.reshape(train_y,(-1,1))
88
+ test_y = np.reshape(test_y,(-1,1))
89
+ val_y = np.reshape(val_y,(-1,1))
90
+
91
+
92
+ cal_accuracy(train_predictions,train_y)
93
+ cal_accuracy(val_predictions,val_y)
94
+
95
+
96
+ import gradio as gr
97
+ import tensorflow as tf
98
+ import numpy as np
99
+
100
+ # 加载 TensorFlow 模型
101
+ model_path = 'model/model_CNN'
102
+ loaded_model = tf.saved_model.load(model_path)
103
+ model_predict = loaded_model.signatures['serving_default']
104
+
105
+ def predict(AMT, t, BSA, BW, age, height):
106
+ # 格式化输入数据以匹配模型的输入格式
107
+ input_features = np.array([[np.log(AMT), t, BSA, BW, age, height]], dtype=float).reshape(1, 6, 1)
108
+ predictions = model_predict(tf.constant(input_features, dtype=tf.float32))['dense_5'].numpy()
109
+ return predictions.flatten()[0]
110
+
111
+ # 创建 Gradio 界面
112
+ iface = gr.Interface(
113
+ fn=predict,
114
+ inputs=[gr.Number(label='AMT', default=1.0),
115
+ gr.Number(label='t', default=1.0),
116
+ gr.Number(label='BSA', default=1.0),
117
+ gr.Number(label='BW', default=1.0),
118
+ gr.Number(label='age', default=30),
119
+ gr.Number(label='height', default=160)],
120
+ outputs='text',
121
+ title="Drug Response Prediction",
122
+ description="Enter the values for AMT, t, BSA, BW, age, and height to predict the drug response."
123
+ )
124
+
125
+ iface.launch()
126
+
gradi.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Apr 17 21:57:52 2024
4
+
5
+ @author: admin
6
+ """
7
+
8
+ import gradio as gr
9
+ import tensorflow as tf
10
+ import numpy as np
11
+
12
+ # 加载 TensorFlow 模型
13
+ model_path = 'model/model_CNN'
14
+ loaded_model = tf.saved_model.load(model_path)
15
+ model_predict = loaded_model.signatures['serving_default']
16
+ # 假设我们有一个处理这些输入的函数
17
+ def process_inputs(AMT, t, BSA, BW, age, height):
18
+ # 在这里执行您的逻辑,比如模型预测、计算等
19
+ input_features = np.array([[AMT, t, BSA, BW, age, height]], dtype=float).reshape(-1, 6, 1)
20
+ predictions = model_predict(tf.constant(input_features, dtype=tf.float32))['dense_5'].numpy()
21
+ return predictions
22
+
23
+ # 创建Gradio界面
24
+ description = "Enter values for AMT, t, BSA, BW, age, and height."
25
+ interface = gr.Interface(
26
+ fn=process_inputs,
27
+ inputs=[
28
+ gr.Number(label="AMT"),
29
+ gr.Number(label="t"),
30
+ gr.Number(label="BSA"),
31
+ gr.Number(label="BW"),
32
+ gr.Number(label="Age"),
33
+ gr.Number(label="Height")
34
+ ],
35
+ outputs="text",
36
+ description=description,
37
+ title="Input Processing Interface"
38
+ )
39
+
40
+ interface.launch()
model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ca484cc4bc70aec34d97563b2b9c1e37e4dff20a16014cde6b161d6a9b87d5
3
+ size 1048745
storage.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Dec 26 21:49:46 2023
4
+
5
+ @author: admin
6
+ """
7
+ import pandas as pd
8
+ import numpy as np
9
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
10
+ from sklearn.preprocessing import MinMaxScaler
11
+ import matplotlib.pyplot as plt
12
+
13
+
14
+ def preprocess_data(filepath,form):
15
+ df=pd.read_excel(filepath)
16
+ df = df[df['TAD'] >= 4]
17
+ df_form1 = df[df['form'] == 1]
18
+ df_form2 = df[df['form'] == 2]
19
+
20
+ if form==1:
21
+ return df_form1
22
+ elif form==0:
23
+ return df
24
+ else:
25
+ return df_form2
26
+
27
+ def process_train_data(df,form_type,output_type):
28
+ y = df.iloc[:, 3].values
29
+ form = df.iloc[:, 4].values
30
+ gend = df.iloc[:, 5].values
31
+ BSA = df.iloc[:, 6].values
32
+ zyme = df.iloc[:, 7].values
33
+ age = df.iloc[:, 8].values
34
+ t = df.iloc[:, 1].values
35
+ AMT = df.iloc[:, 2].values
36
+
37
+ # Reshaping data
38
+ AMT = np.reshape(AMT, (-1))
39
+ BSA = np.reshape(BSA, (-1, 1))
40
+ t = np.reshape(t, (-1, 1))
41
+ form = np.reshape(form, (-1, 1))
42
+ gend = np.reshape(gend, (-1, 1))
43
+ zyme = np.reshape(zyme, (-1, 1))
44
+ age = np.reshape(age, (-1, 1))
45
+
46
+
47
+ k_train = -(np.log(y / AMT))
48
+ if output_type==1:
49
+ k_train = -(np.log(y))
50
+ elif output_type==2:
51
+ k_train = -(np.log(y/AMT))
52
+
53
+ AMT1 = np.reshape(AMT, (-1,1))
54
+ max_k = np.max(k_train)
55
+ min_k = np.min(k_train)
56
+ y = np.reshape(y, (-1, 1))
57
+ # train_out_normalized = k_train
58
+ train_out_normalized = (k_train - min_k) / (max_k - min_k)
59
+ # train_out_normalized = one_hot_encode(train_out_normalized,10)
60
+ train_out_normalized = np.reshape(train_out_normalized,(-1,1))
61
+ # min_max_scaler = MinMaxScaler()
62
+
63
+ # # Fit the scaler on the features and transform
64
+ # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
65
+
66
+
67
+
68
+
69
+ if output_type==1:
70
+ train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
71
+ elif output_type==2:
72
+ train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
73
+ else:
74
+ train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
75
+
76
+
77
+ if output_type==1:
78
+ return train_in_normalized,train_out_normalized,max_k,min_k,AMT
79
+ elif output_type==2:
80
+ return train_in_normalized,train_out_normalized,max_k,min_k,AMT
81
+ else:
82
+ return train_in_normalized,y,max_k,min_k,AMT
83
+
84
+ def process_train_data_DNN(df,form_type,output_type):
85
+ y = df.iloc[:, 3].values
86
+ form = df.iloc[:, 4].values
87
+ gend = df.iloc[:, 5].values
88
+ BSA = df.iloc[:, 6].values
89
+ zyme = df.iloc[:, 7].values
90
+ age = df.iloc[:, 8].values
91
+ t = df.iloc[:, 1].values
92
+ AMT = df.iloc[:, 2].values
93
+
94
+ # Reshaping data
95
+ AMT = np.reshape(AMT, (-1))
96
+ BSA = np.reshape(BSA, (-1, 1))
97
+ t = np.reshape(t, (-1, 1))
98
+ form = np.reshape(form, (-1, 1))
99
+ gend = np.reshape(gend, (-1, 1))
100
+ zyme = np.reshape(zyme, (-1, 1))
101
+ age = np.reshape(age, (-1, 1))
102
+ max_AMT = np.max(AMT)
103
+ min_AMT = np.min(AMT)
104
+
105
+
106
+ k_train = -(np.log(y / AMT))
107
+ if output_type==1:
108
+ k_train = -(np.log(y))*1.
109
+ elif output_type==2:
110
+ k_train = -(np.log(y*5/AMT))
111
+
112
+ # AMT = (AMT-min_AMT)/(max_AMT-min_AMT)
113
+ AMT1 = np.reshape(AMT, (-1,1))
114
+ max_k = np.max(k_train)
115
+ min_k = np.min(k_train)
116
+
117
+
118
+ y = np.reshape(y, (-1, 1))
119
+ # train_out_normalized = k_train
120
+ train_out_normalized = (k_train - min_k) / (max_k - min_k)
121
+ # train_out_normalized = one_hot_encode(train_out_normalized,10)
122
+ # train_out_normalized = np.reshape(train_out_normalized,(-1,1))
123
+ # min_max_scaler = MinMaxScaler()
124
+ # Fit the scaler on the features and transform
125
+ # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
126
+
127
+
128
+ if output_type==1:
129
+ train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
130
+ elif output_type==2:
131
+ train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1)
132
+ else:
133
+ train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
134
+
135
+
136
+ if output_type==1:
137
+ return train_in_normalized,train_out_normalized,max_k,min_k,AMT
138
+ elif output_type==2:
139
+ return train_in_normalized,train_out_normalized,max_k,min_k,AMT
140
+ else:
141
+ return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT
142
+
143
+
144
+ def turn_back_DNN(data,max_k,min_k,train_data,output_type):
145
+ if output_type==1:
146
+ y1=np.reshape(data,-1)
147
+ y1=y1*(max_k-min_k)+min_k
148
+ AMT=train_data[:,3]
149
+ # print(np.shape(AMT))
150
+ # AMT = np.reshape(AMT, (-1))
151
+ # y_1=AMT*np.exp(-y1);
152
+ y_1=np.exp(-y1)/1;
153
+ # y_1=y1
154
+ elif output_type==2:
155
+ y1=np.reshape(data,-1)
156
+ y1=y1*(max_k-min_k)+min_k
157
+ AMT=train_data[:,3]
158
+ # print(np.shape(AMT))
159
+ # AMT = np.reshape(AMT, (-1))
160
+ y_1=AMT*np.exp(-y1)/5;#6
161
+ else:
162
+ y_1=data/1.
163
+ return y_1
164
+
165
+
166
+
167
+ def turn_back(data,max_k,min_k,train_data,output_type):
168
+ if output_type==1:
169
+ y1=np.reshape(data,-1)
170
+ y1=y1*(max_k-min_k)+min_k
171
+ AMT=train_data[:,2]
172
+ # print(np.shape(AMT))
173
+ # AMT = np.reshape(AMT, (-1))
174
+ # y_1=AMT*np.exp(-y1)/6;
175
+ y_1=np.exp(-y1)/1.25;
176
+ # y_1=y1
177
+ elif output_type==2:
178
+ y1=np.reshape(data,-1)
179
+ y1=y1*(max_k-min_k)+min_k
180
+ AMT=train_data[:,2]
181
+ # print(np.shape(AMT))
182
+ # AMT = np.reshape(AMT, (-1))
183
+ y_1=AMT*np.exp(-y1)/1;
184
+ else:
185
+ y_1=data/1.
186
+ return y_1
187
+
188
+ def result_output(train_y,y_train_pre):
189
+ mse = mean_squared_error(train_y,y_train_pre)
190
+ rmse = mean_squared_error(train_y,y_train_pre, squared=False)
191
+ r2 = r2_score(train_y,y_train_pre)
192
+ mae = mean_absolute_error(train_y,y_train_pre)
193
+
194
+ print('train_MSE:', mse)
195
+ print('train_RMSE:', rmse)
196
+ print('train_R-squared:', r2)
197
+ print('train_MAE:', mae)
198
+
199
+ def one_hot_encode(values, num_classes=10):
200
+
201
+ interval = 1 / num_classes
202
+
203
+ # 计算每个值所属的类别
204
+ categories = np.floor(values / interval).astype(int)
205
+ categories[categories == num_classes] = num_classes - 1 # 处理边界情况
206
+
207
+ # 应用one-hot编码
208
+ one_hot_encoded = np.eye(num_classes)[categories]
209
+
210
+ return one_hot_encoded
211
+
212
+
213
+
214
+ def cal_accuracy(y_pred,test_y):
215
+
216
+
217
+ # within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
218
+ within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
219
+ within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
220
+ # print("within_10_percent:",within_10_percent*100)
221
+ print("within_20_percent:",within_20_percent*100)
222
+ print("within_30_percent:",within_30_percent*100)
223
+
224
+ def draw_acc(train_y, y_train_pre,txt=None):
225
+ fig, ax = plt.subplots()
226
+
227
+ # Scatter plot: Actual vs Predicted Drug Concentrations
228
+ ax.scatter(y_train_pre, train_y, s=10, label='Observations')
229
+
230
+ # Set labels for x and y axes
231
+ ax.set_xlabel('Predicted Concentration')
232
+ ax.set_ylabel('Measured Concentration')
233
+ ax.grid(True)
234
+ # Generate data for the line and tolerance areas
235
+ x = np.linspace(0, 100, 500)
236
+ # y = x
237
+ y_20_upper = x * 1.2
238
+ y_20_lower = x * 0.8
239
+ y_30_upper = x * 1.3
240
+ y_30_lower = x * 0.7
241
+
242
+ # Draw y=x line (Perfect Prediction Line)
243
+ # ax.plot(x, y, color='black', label='Perfect Prediction Line y=x')
244
+
245
+ # Draw 20% tolerance lines in blue
246
+ ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound')
247
+ ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound')
248
+
249
+ # Draw 30% tolerance lines in red
250
+ ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound')
251
+ ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound')
252
+
253
+ # Fill areas between 20% and 30% tolerance bands with lighter color
254
+ ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1)
255
+ ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1)
256
+ ax.set_xlim([-5, 100])
257
+ # Add legend
258
+ ax.legend()
259
+ fig.set_facecolor('white')
260
+
261
+ # Display the plot
262
+ # ax.show()
263
+ # ax.savefig(txt, dpi=600,format='svg')
264
+ if txt!=None:
265
+
266
+ fig.savefig(txt, dpi=300, format='tif')
267
+
268
+ # 然后显示图表
269
+ plt.show()