keegan111 commited on
Commit
3731d81
·
verified ·
1 Parent(s): 008c0f2

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. cross_cell_type_generization/.DS_Store +0 -0
  3. cross_cell_type_generization/L4/.DS_Store +0 -0
  4. cross_cell_type_generization/L4/Epitheliums/.DS_Store +0 -0
  5. cross_cell_type_generization/L4/Epitheliums/config.json +25 -0
  6. cross_cell_type_generization/L4/Epitheliums/eval_results.json +8 -0
  7. cross_cell_type_generization/L4/Macrophages/.DS_Store +0 -0
  8. cross_cell_type_generization/L4/Macrophages/all_results.json +8 -0
  9. cross_cell_type_generization/L4/Macrophages/config.json +25 -0
  10. cross_cell_type_generization/L4/Macrophages/eval_results.json +8 -0
  11. cross_cell_type_generization/L4/Macrophages/trainer_state.json +450 -0
  12. cross_cell_type_generization/L4/NKT/.DS_Store +0 -0
  13. cross_cell_type_generization/L4/NKT/all_results.json +8 -0
  14. cross_cell_type_generization/L4/NKT/config.json +25 -0
  15. cross_cell_type_generization/L4/NKT/eval_results.json +8 -0
  16. cross_cell_type_generization/L4/NKT/trainer_state.json +450 -0
  17. cross_cell_type_generization/L4/NKs/.DS_Store +0 -0
  18. cross_cell_type_generization/L4/NKs/all_results.json +8 -0
  19. cross_cell_type_generization/L4/NKs/config.json +25 -0
  20. cross_cell_type_generization/L4/NKs/eval_results.json +8 -0
  21. cross_cell_type_generization/L4/NKs/trainer_state.json +450 -0
  22. cross_cell_type_generization/L4/T cells/.DS_Store +0 -0
  23. cross_cell_type_generization/L4/T cells/all_results.json +8 -0
  24. cross_cell_type_generization/L4/T cells/config.json +25 -0
  25. cross_cell_type_generization/L4/T cells/eval_results.json +8 -0
  26. cross_cell_type_generization/L4/T cells/trainer_state.json +450 -0
  27. data_curation&frozen_layers/.DS_Store +0 -0
  28. data_curation&frozen_layers/dataset1_all/.DS_Store +0 -0
  29. data_curation&frozen_layers/dataset1_all/F0/.DS_Store +0 -0
  30. data_curation&frozen_layers/dataset1_all/F0/all_results.json +8 -0
  31. data_curation&frozen_layers/dataset1_all/F0/config.json +27 -0
  32. data_curation&frozen_layers/dataset1_all/F0/eval_results.json +8 -0
  33. data_curation&frozen_layers/dataset1_all/F0/trainer_state.json +398 -0
  34. data_curation&frozen_layers/dataset1_all/F2/.DS_Store +0 -0
  35. data_curation&frozen_layers/dataset1_all/F2/all_results.json +8 -0
  36. data_curation&frozen_layers/dataset1_all/F2/config.json +27 -0
  37. data_curation&frozen_layers/dataset1_all/F2/eval_results.json +8 -0
  38. data_curation&frozen_layers/dataset1_all/F2/predictions.pickle +3 -0
  39. data_curation&frozen_layers/dataset1_all/F2/rng_state.pth +3 -0
  40. data_curation&frozen_layers/dataset1_all/F2/scheduler.pt +3 -0
  41. data_curation&frozen_layers/dataset1_all/F2/trainer_state.json +566 -0
  42. data_curation&frozen_layers/dataset1_all/F4/.DS_Store +0 -0
  43. data_curation&frozen_layers/dataset1_all/F4/all_results.json +8 -0
  44. data_curation&frozen_layers/dataset1_all/F4/config.json +27 -0
  45. data_curation&frozen_layers/dataset1_all/F4/eval_results.json +8 -0
  46. data_curation&frozen_layers/dataset1_all/F4/optimizer.pt +3 -0
  47. data_curation&frozen_layers/dataset1_all/F4/predictions.pickle +3 -0
  48. data_curation&frozen_layers/dataset1_all/F4/pytorch_model.bin +3 -0
  49. data_curation&frozen_layers/dataset1_all/F4/rng_state.pth +3 -0
  50. data_curation&frozen_layers/dataset1_all/F4/scheduler.pt +3 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
cross_cell_type_generization/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/.DS_Store and b/cross_cell_type_generization/.DS_Store differ
 
cross_cell_type_generization/L4/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/.DS_Store and b/cross_cell_type_generization/L4/.DS_Store differ
 
cross_cell_type_generization/L4/Epitheliums/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/Epitheliums/.DS_Store and b/cross_cell_type_generization/L4/Epitheliums/.DS_Store differ
 
cross_cell_type_generization/L4/Epitheliums/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.02,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.02,
9
+ "hidden_size": 256,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
cross_cell_type_generization/L4/Epitheliums/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7655555555555555,
3
+ "test_loss": 0.5546273589134216,
4
+ "test_macro_f1": 0.7257171579155659,
5
+ "test_runtime": 2.5959,
6
+ "test_samples_per_second": 346.703,
7
+ "test_steps_per_second": 3.467
8
+ }
cross_cell_type_generization/L4/Macrophages/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/Macrophages/.DS_Store and b/cross_cell_type_generization/L4/Macrophages/.DS_Store differ
 
cross_cell_type_generization/L4/Macrophages/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.8377777777777777,
3
+ "test_loss": 0.3709336817264557,
4
+ "test_macro_f1": 0.8374237227740413,
5
+ "test_runtime": 2.6942,
6
+ "test_samples_per_second": 334.056,
7
+ "test_steps_per_second": 3.341
8
+ }
cross_cell_type_generization/L4/Macrophages/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.02,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.02,
9
+ "hidden_size": 256,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
cross_cell_type_generization/L4/Macrophages/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.8377777777777777,
3
+ "test_loss": 0.3709336817264557,
4
+ "test_macro_f1": 0.8374237227740413,
5
+ "test_runtime": 2.6942,
6
+ "test_samples_per_second": 334.056,
7
+ "test_steps_per_second": 3.341
8
+ }
cross_cell_type_generization/L4/Macrophages/trainer_state.json ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.39814653992652893,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_diversity/L4/Macrophages/fold4/checkpoint-1122",
4
+ "epoch": 8.0,
5
+ "global_step": 1496,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 2.5e-06,
13
+ "loss": 0.6917,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 5e-06,
19
+ "loss": 0.6919,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 7.5e-06,
25
+ "loss": 0.6932,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1e-05,
31
+ "loss": 0.6934,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.67,
36
+ "learning_rate": 1.25e-05,
37
+ "loss": 0.6944,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 1.5e-05,
43
+ "loss": 0.6912,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.94,
48
+ "learning_rate": 1.75e-05,
49
+ "loss": 0.6906,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.5171275278580273,
55
+ "eval_loss": 0.6910191178321838,
56
+ "eval_macro_f1": 0.4080778330325605,
57
+ "eval_runtime": 14.041,
58
+ "eval_samples_per_second": 345.132,
59
+ "eval_steps_per_second": 3.347,
60
+ "step": 187
61
+ },
62
+ {
63
+ "epoch": 1.07,
64
+ "learning_rate": 2e-05,
65
+ "loss": 0.6912,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 1.2,
70
+ "learning_rate": 2.25e-05,
71
+ "loss": 0.6911,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 1.34,
76
+ "learning_rate": 2.5e-05,
77
+ "loss": 0.6875,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 1.47,
82
+ "learning_rate": 2.7500000000000004e-05,
83
+ "loss": 0.6873,
84
+ "step": 275
85
+ },
86
+ {
87
+ "epoch": 1.6,
88
+ "learning_rate": 3e-05,
89
+ "loss": 0.6844,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 1.74,
94
+ "learning_rate": 3.2500000000000004e-05,
95
+ "loss": 0.6814,
96
+ "step": 325
97
+ },
98
+ {
99
+ "epoch": 1.87,
100
+ "learning_rate": 3.5e-05,
101
+ "loss": 0.6795,
102
+ "step": 350
103
+ },
104
+ {
105
+ "epoch": 2.0,
106
+ "eval_accuracy": 0.5870821295914156,
107
+ "eval_loss": 0.6697810292243958,
108
+ "eval_macro_f1": 0.5825487540387015,
109
+ "eval_runtime": 14.066,
110
+ "eval_samples_per_second": 344.519,
111
+ "eval_steps_per_second": 3.341,
112
+ "step": 374
113
+ },
114
+ {
115
+ "epoch": 2.01,
116
+ "learning_rate": 3.7500000000000003e-05,
117
+ "loss": 0.6781,
118
+ "step": 375
119
+ },
120
+ {
121
+ "epoch": 2.14,
122
+ "learning_rate": 4e-05,
123
+ "loss": 0.6564,
124
+ "step": 400
125
+ },
126
+ {
127
+ "epoch": 2.27,
128
+ "learning_rate": 4.25e-05,
129
+ "loss": 0.6297,
130
+ "step": 425
131
+ },
132
+ {
133
+ "epoch": 2.41,
134
+ "learning_rate": 4.5e-05,
135
+ "loss": 0.6131,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 2.54,
140
+ "learning_rate": 4.75e-05,
141
+ "loss": 0.5841,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 2.67,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.5394,
148
+ "step": 500
149
+ },
150
+ {
151
+ "epoch": 2.81,
152
+ "learning_rate": 4.9858757062146896e-05,
153
+ "loss": 0.492,
154
+ "step": 525
155
+ },
156
+ {
157
+ "epoch": 2.94,
158
+ "learning_rate": 4.971751412429379e-05,
159
+ "loss": 0.4617,
160
+ "step": 550
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7686751960379694,
165
+ "eval_loss": 0.46376147866249084,
166
+ "eval_macro_f1": 0.7685935958162224,
167
+ "eval_runtime": 14.296,
168
+ "eval_samples_per_second": 338.976,
169
+ "eval_steps_per_second": 3.288,
170
+ "step": 561
171
+ },
172
+ {
173
+ "epoch": 3.07,
174
+ "learning_rate": 4.957627118644068e-05,
175
+ "loss": 0.4375,
176
+ "step": 575
177
+ },
178
+ {
179
+ "epoch": 3.21,
180
+ "learning_rate": 4.9435028248587575e-05,
181
+ "loss": 0.407,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 3.34,
186
+ "learning_rate": 4.929378531073446e-05,
187
+ "loss": 0.414,
188
+ "step": 625
189
+ },
190
+ {
191
+ "epoch": 3.48,
192
+ "learning_rate": 4.915254237288136e-05,
193
+ "loss": 0.4284,
194
+ "step": 650
195
+ },
196
+ {
197
+ "epoch": 3.61,
198
+ "learning_rate": 4.9011299435028255e-05,
199
+ "loss": 0.4237,
200
+ "step": 675
201
+ },
202
+ {
203
+ "epoch": 3.74,
204
+ "learning_rate": 4.887005649717514e-05,
205
+ "loss": 0.3709,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 3.88,
210
+ "learning_rate": 4.8728813559322034e-05,
211
+ "loss": 0.3842,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 4.0,
216
+ "eval_accuracy": 0.7944696657036732,
217
+ "eval_loss": 0.4250829815864563,
218
+ "eval_macro_f1": 0.7944693506298842,
219
+ "eval_runtime": 14.2291,
220
+ "eval_samples_per_second": 340.57,
221
+ "eval_steps_per_second": 3.303,
222
+ "step": 748
223
+ },
224
+ {
225
+ "epoch": 4.01,
226
+ "learning_rate": 4.8587570621468934e-05,
227
+ "loss": 0.3891,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 4.14,
232
+ "learning_rate": 4.844632768361582e-05,
233
+ "loss": 0.3184,
234
+ "step": 775
235
+ },
236
+ {
237
+ "epoch": 4.28,
238
+ "learning_rate": 4.8305084745762714e-05,
239
+ "loss": 0.3392,
240
+ "step": 800
241
+ },
242
+ {
243
+ "epoch": 4.41,
244
+ "learning_rate": 4.816384180790961e-05,
245
+ "loss": 0.3404,
246
+ "step": 825
247
+ },
248
+ {
249
+ "epoch": 4.55,
250
+ "learning_rate": 4.80225988700565e-05,
251
+ "loss": 0.3744,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 4.68,
256
+ "learning_rate": 4.788135593220339e-05,
257
+ "loss": 0.3229,
258
+ "step": 875
259
+ },
260
+ {
261
+ "epoch": 4.81,
262
+ "learning_rate": 4.7740112994350286e-05,
263
+ "loss": 0.3298,
264
+ "step": 900
265
+ },
266
+ {
267
+ "epoch": 4.95,
268
+ "learning_rate": 4.759887005649718e-05,
269
+ "loss": 0.3446,
270
+ "step": 925
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7911679735864631,
275
+ "eval_loss": 0.4325283169746399,
276
+ "eval_macro_f1": 0.7906017246983076,
277
+ "eval_runtime": 13.721,
278
+ "eval_samples_per_second": 353.181,
279
+ "eval_steps_per_second": 3.425,
280
+ "step": 935
281
+ },
282
+ {
283
+ "epoch": 5.08,
284
+ "learning_rate": 4.745762711864407e-05,
285
+ "loss": 0.319,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 5.21,
290
+ "learning_rate": 4.7316384180790966e-05,
291
+ "loss": 0.266,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 5.35,
296
+ "learning_rate": 4.717514124293785e-05,
297
+ "loss": 0.2992,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 5.48,
302
+ "learning_rate": 4.703389830508475e-05,
303
+ "loss": 0.2987,
304
+ "step": 1025
305
+ },
306
+ {
307
+ "epoch": 5.61,
308
+ "learning_rate": 4.689265536723164e-05,
309
+ "loss": 0.2906,
310
+ "step": 1050
311
+ },
312
+ {
313
+ "epoch": 5.75,
314
+ "learning_rate": 4.675141242937853e-05,
315
+ "loss": 0.3031,
316
+ "step": 1075
317
+ },
318
+ {
319
+ "epoch": 5.88,
320
+ "learning_rate": 4.6610169491525425e-05,
321
+ "loss": 0.2794,
322
+ "step": 1100
323
+ },
324
+ {
325
+ "epoch": 6.0,
326
+ "eval_accuracy": 0.819026000825423,
327
+ "eval_loss": 0.39814653992652893,
328
+ "eval_macro_f1": 0.8178884568287126,
329
+ "eval_runtime": 13.8654,
330
+ "eval_samples_per_second": 349.504,
331
+ "eval_steps_per_second": 3.39,
332
+ "step": 1122
333
+ },
334
+ {
335
+ "epoch": 6.02,
336
+ "learning_rate": 4.646892655367232e-05,
337
+ "loss": 0.2802,
338
+ "step": 1125
339
+ },
340
+ {
341
+ "epoch": 6.15,
342
+ "learning_rate": 4.632768361581921e-05,
343
+ "loss": 0.2637,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 6.28,
348
+ "learning_rate": 4.6186440677966104e-05,
349
+ "loss": 0.274,
350
+ "step": 1175
351
+ },
352
+ {
353
+ "epoch": 6.42,
354
+ "learning_rate": 4.6045197740113e-05,
355
+ "loss": 0.2754,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 6.55,
360
+ "learning_rate": 4.590395480225989e-05,
361
+ "loss": 0.2332,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 6.68,
366
+ "learning_rate": 4.5762711864406784e-05,
367
+ "loss": 0.259,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 6.82,
372
+ "learning_rate": 4.562146892655367e-05,
373
+ "loss": 0.2446,
374
+ "step": 1275
375
+ },
376
+ {
377
+ "epoch": 6.95,
378
+ "learning_rate": 4.548022598870056e-05,
379
+ "loss": 0.246,
380
+ "step": 1300
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.8208832026413537,
385
+ "eval_loss": 0.4227023422718048,
386
+ "eval_macro_f1": 0.8184059652831023,
387
+ "eval_runtime": 13.7547,
388
+ "eval_samples_per_second": 352.315,
389
+ "eval_steps_per_second": 3.417,
390
+ "step": 1309
391
+ },
392
+ {
393
+ "epoch": 7.09,
394
+ "learning_rate": 4.533898305084746e-05,
395
+ "loss": 0.232,
396
+ "step": 1325
397
+ },
398
+ {
399
+ "epoch": 7.22,
400
+ "learning_rate": 4.519774011299435e-05,
401
+ "loss": 0.2373,
402
+ "step": 1350
403
+ },
404
+ {
405
+ "epoch": 7.35,
406
+ "learning_rate": 4.505649717514124e-05,
407
+ "loss": 0.2381,
408
+ "step": 1375
409
+ },
410
+ {
411
+ "epoch": 7.49,
412
+ "learning_rate": 4.491525423728814e-05,
413
+ "loss": 0.2607,
414
+ "step": 1400
415
+ },
416
+ {
417
+ "epoch": 7.62,
418
+ "learning_rate": 4.477401129943503e-05,
419
+ "loss": 0.221,
420
+ "step": 1425
421
+ },
422
+ {
423
+ "epoch": 7.75,
424
+ "learning_rate": 4.463276836158192e-05,
425
+ "loss": 0.2267,
426
+ "step": 1450
427
+ },
428
+ {
429
+ "epoch": 7.89,
430
+ "learning_rate": 4.4491525423728816e-05,
431
+ "loss": 0.21,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 8.0,
436
+ "eval_accuracy": 0.8012794056954189,
437
+ "eval_loss": 0.47252345085144043,
438
+ "eval_macro_f1": 0.8003815623782113,
439
+ "eval_runtime": 14.1303,
440
+ "eval_samples_per_second": 342.951,
441
+ "eval_steps_per_second": 3.326,
442
+ "step": 1496
443
+ }
444
+ ],
445
+ "max_steps": 9350,
446
+ "num_train_epochs": 50,
447
+ "total_flos": 4145176027791360.0,
448
+ "trial_name": null,
449
+ "trial_params": null
450
+ }
cross_cell_type_generization/L4/NKT/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/NKT/.DS_Store and b/cross_cell_type_generization/L4/NKT/.DS_Store differ
 
cross_cell_type_generization/L4/NKT/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9466666666666667,
3
+ "test_loss": 0.16052913665771484,
4
+ "test_macro_f1": 0.9411302982731554,
5
+ "test_runtime": 2.5904,
6
+ "test_samples_per_second": 347.442,
7
+ "test_steps_per_second": 3.474
8
+ }
cross_cell_type_generization/L4/NKT/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.02,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.02,
9
+ "hidden_size": 256,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
cross_cell_type_generization/L4/NKT/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9466666666666667,
3
+ "test_loss": 0.16052913665771484,
4
+ "test_macro_f1": 0.9411302982731554,
5
+ "test_runtime": 2.5904,
6
+ "test_samples_per_second": 347.442,
7
+ "test_steps_per_second": 3.474
8
+ }
cross_cell_type_generization/L4/NKT/trainer_state.json ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.39814653992652893,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_diversity/L4/NKT/fold4/checkpoint-1122",
4
+ "epoch": 8.0,
5
+ "global_step": 1496,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 2.5e-06,
13
+ "loss": 0.6917,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 5e-06,
19
+ "loss": 0.6919,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 7.5e-06,
25
+ "loss": 0.6932,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1e-05,
31
+ "loss": 0.6934,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.67,
36
+ "learning_rate": 1.25e-05,
37
+ "loss": 0.6944,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 1.5e-05,
43
+ "loss": 0.6912,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.94,
48
+ "learning_rate": 1.75e-05,
49
+ "loss": 0.6906,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.5171275278580273,
55
+ "eval_loss": 0.6910191178321838,
56
+ "eval_macro_f1": 0.4080778330325605,
57
+ "eval_runtime": 13.555,
58
+ "eval_samples_per_second": 357.506,
59
+ "eval_steps_per_second": 3.467,
60
+ "step": 187
61
+ },
62
+ {
63
+ "epoch": 1.07,
64
+ "learning_rate": 2e-05,
65
+ "loss": 0.6912,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 1.2,
70
+ "learning_rate": 2.25e-05,
71
+ "loss": 0.6911,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 1.34,
76
+ "learning_rate": 2.5e-05,
77
+ "loss": 0.6875,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 1.47,
82
+ "learning_rate": 2.7500000000000004e-05,
83
+ "loss": 0.6873,
84
+ "step": 275
85
+ },
86
+ {
87
+ "epoch": 1.6,
88
+ "learning_rate": 3e-05,
89
+ "loss": 0.6844,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 1.74,
94
+ "learning_rate": 3.2500000000000004e-05,
95
+ "loss": 0.6814,
96
+ "step": 325
97
+ },
98
+ {
99
+ "epoch": 1.87,
100
+ "learning_rate": 3.5e-05,
101
+ "loss": 0.6795,
102
+ "step": 350
103
+ },
104
+ {
105
+ "epoch": 2.0,
106
+ "eval_accuracy": 0.5870821295914156,
107
+ "eval_loss": 0.6697810292243958,
108
+ "eval_macro_f1": 0.5825487540387015,
109
+ "eval_runtime": 14.1178,
110
+ "eval_samples_per_second": 343.255,
111
+ "eval_steps_per_second": 3.329,
112
+ "step": 374
113
+ },
114
+ {
115
+ "epoch": 2.01,
116
+ "learning_rate": 3.7500000000000003e-05,
117
+ "loss": 0.6781,
118
+ "step": 375
119
+ },
120
+ {
121
+ "epoch": 2.14,
122
+ "learning_rate": 4e-05,
123
+ "loss": 0.6564,
124
+ "step": 400
125
+ },
126
+ {
127
+ "epoch": 2.27,
128
+ "learning_rate": 4.25e-05,
129
+ "loss": 0.6297,
130
+ "step": 425
131
+ },
132
+ {
133
+ "epoch": 2.41,
134
+ "learning_rate": 4.5e-05,
135
+ "loss": 0.6131,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 2.54,
140
+ "learning_rate": 4.75e-05,
141
+ "loss": 0.5841,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 2.67,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.5394,
148
+ "step": 500
149
+ },
150
+ {
151
+ "epoch": 2.81,
152
+ "learning_rate": 4.9858757062146896e-05,
153
+ "loss": 0.492,
154
+ "step": 525
155
+ },
156
+ {
157
+ "epoch": 2.94,
158
+ "learning_rate": 4.971751412429379e-05,
159
+ "loss": 0.4617,
160
+ "step": 550
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7686751960379694,
165
+ "eval_loss": 0.46376147866249084,
166
+ "eval_macro_f1": 0.7685935958162224,
167
+ "eval_runtime": 14.3072,
168
+ "eval_samples_per_second": 338.711,
169
+ "eval_steps_per_second": 3.285,
170
+ "step": 561
171
+ },
172
+ {
173
+ "epoch": 3.07,
174
+ "learning_rate": 4.957627118644068e-05,
175
+ "loss": 0.4375,
176
+ "step": 575
177
+ },
178
+ {
179
+ "epoch": 3.21,
180
+ "learning_rate": 4.9435028248587575e-05,
181
+ "loss": 0.407,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 3.34,
186
+ "learning_rate": 4.929378531073446e-05,
187
+ "loss": 0.414,
188
+ "step": 625
189
+ },
190
+ {
191
+ "epoch": 3.48,
192
+ "learning_rate": 4.915254237288136e-05,
193
+ "loss": 0.4284,
194
+ "step": 650
195
+ },
196
+ {
197
+ "epoch": 3.61,
198
+ "learning_rate": 4.9011299435028255e-05,
199
+ "loss": 0.4237,
200
+ "step": 675
201
+ },
202
+ {
203
+ "epoch": 3.74,
204
+ "learning_rate": 4.887005649717514e-05,
205
+ "loss": 0.3709,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 3.88,
210
+ "learning_rate": 4.8728813559322034e-05,
211
+ "loss": 0.3842,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 4.0,
216
+ "eval_accuracy": 0.7944696657036732,
217
+ "eval_loss": 0.4250829815864563,
218
+ "eval_macro_f1": 0.7944693506298842,
219
+ "eval_runtime": 14.4864,
220
+ "eval_samples_per_second": 334.521,
221
+ "eval_steps_per_second": 3.244,
222
+ "step": 748
223
+ },
224
+ {
225
+ "epoch": 4.01,
226
+ "learning_rate": 4.8587570621468934e-05,
227
+ "loss": 0.3891,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 4.14,
232
+ "learning_rate": 4.844632768361582e-05,
233
+ "loss": 0.3184,
234
+ "step": 775
235
+ },
236
+ {
237
+ "epoch": 4.28,
238
+ "learning_rate": 4.8305084745762714e-05,
239
+ "loss": 0.3392,
240
+ "step": 800
241
+ },
242
+ {
243
+ "epoch": 4.41,
244
+ "learning_rate": 4.816384180790961e-05,
245
+ "loss": 0.3404,
246
+ "step": 825
247
+ },
248
+ {
249
+ "epoch": 4.55,
250
+ "learning_rate": 4.80225988700565e-05,
251
+ "loss": 0.3744,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 4.68,
256
+ "learning_rate": 4.788135593220339e-05,
257
+ "loss": 0.3229,
258
+ "step": 875
259
+ },
260
+ {
261
+ "epoch": 4.81,
262
+ "learning_rate": 4.7740112994350286e-05,
263
+ "loss": 0.3298,
264
+ "step": 900
265
+ },
266
+ {
267
+ "epoch": 4.95,
268
+ "learning_rate": 4.759887005649718e-05,
269
+ "loss": 0.3446,
270
+ "step": 925
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7911679735864631,
275
+ "eval_loss": 0.4325283169746399,
276
+ "eval_macro_f1": 0.7906017246983076,
277
+ "eval_runtime": 14.5425,
278
+ "eval_samples_per_second": 333.229,
279
+ "eval_steps_per_second": 3.232,
280
+ "step": 935
281
+ },
282
+ {
283
+ "epoch": 5.08,
284
+ "learning_rate": 4.745762711864407e-05,
285
+ "loss": 0.319,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 5.21,
290
+ "learning_rate": 4.7316384180790966e-05,
291
+ "loss": 0.266,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 5.35,
296
+ "learning_rate": 4.717514124293785e-05,
297
+ "loss": 0.2992,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 5.48,
302
+ "learning_rate": 4.703389830508475e-05,
303
+ "loss": 0.2987,
304
+ "step": 1025
305
+ },
306
+ {
307
+ "epoch": 5.61,
308
+ "learning_rate": 4.689265536723164e-05,
309
+ "loss": 0.2906,
310
+ "step": 1050
311
+ },
312
+ {
313
+ "epoch": 5.75,
314
+ "learning_rate": 4.675141242937853e-05,
315
+ "loss": 0.3031,
316
+ "step": 1075
317
+ },
318
+ {
319
+ "epoch": 5.88,
320
+ "learning_rate": 4.6610169491525425e-05,
321
+ "loss": 0.2794,
322
+ "step": 1100
323
+ },
324
+ {
325
+ "epoch": 6.0,
326
+ "eval_accuracy": 0.819026000825423,
327
+ "eval_loss": 0.39814653992652893,
328
+ "eval_macro_f1": 0.8178884568287126,
329
+ "eval_runtime": 14.7665,
330
+ "eval_samples_per_second": 328.175,
331
+ "eval_steps_per_second": 3.183,
332
+ "step": 1122
333
+ },
334
+ {
335
+ "epoch": 6.02,
336
+ "learning_rate": 4.646892655367232e-05,
337
+ "loss": 0.2802,
338
+ "step": 1125
339
+ },
340
+ {
341
+ "epoch": 6.15,
342
+ "learning_rate": 4.632768361581921e-05,
343
+ "loss": 0.2637,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 6.28,
348
+ "learning_rate": 4.6186440677966104e-05,
349
+ "loss": 0.274,
350
+ "step": 1175
351
+ },
352
+ {
353
+ "epoch": 6.42,
354
+ "learning_rate": 4.6045197740113e-05,
355
+ "loss": 0.2754,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 6.55,
360
+ "learning_rate": 4.590395480225989e-05,
361
+ "loss": 0.2332,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 6.68,
366
+ "learning_rate": 4.5762711864406784e-05,
367
+ "loss": 0.259,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 6.82,
372
+ "learning_rate": 4.562146892655367e-05,
373
+ "loss": 0.2446,
374
+ "step": 1275
375
+ },
376
+ {
377
+ "epoch": 6.95,
378
+ "learning_rate": 4.548022598870056e-05,
379
+ "loss": 0.246,
380
+ "step": 1300
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.8208832026413537,
385
+ "eval_loss": 0.4227023422718048,
386
+ "eval_macro_f1": 0.8184059652831023,
387
+ "eval_runtime": 13.9954,
388
+ "eval_samples_per_second": 346.256,
389
+ "eval_steps_per_second": 3.358,
390
+ "step": 1309
391
+ },
392
+ {
393
+ "epoch": 7.09,
394
+ "learning_rate": 4.533898305084746e-05,
395
+ "loss": 0.232,
396
+ "step": 1325
397
+ },
398
+ {
399
+ "epoch": 7.22,
400
+ "learning_rate": 4.519774011299435e-05,
401
+ "loss": 0.2373,
402
+ "step": 1350
403
+ },
404
+ {
405
+ "epoch": 7.35,
406
+ "learning_rate": 4.505649717514124e-05,
407
+ "loss": 0.2381,
408
+ "step": 1375
409
+ },
410
+ {
411
+ "epoch": 7.49,
412
+ "learning_rate": 4.491525423728814e-05,
413
+ "loss": 0.2607,
414
+ "step": 1400
415
+ },
416
+ {
417
+ "epoch": 7.62,
418
+ "learning_rate": 4.477401129943503e-05,
419
+ "loss": 0.221,
420
+ "step": 1425
421
+ },
422
+ {
423
+ "epoch": 7.75,
424
+ "learning_rate": 4.463276836158192e-05,
425
+ "loss": 0.2267,
426
+ "step": 1450
427
+ },
428
+ {
429
+ "epoch": 7.89,
430
+ "learning_rate": 4.4491525423728816e-05,
431
+ "loss": 0.21,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 8.0,
436
+ "eval_accuracy": 0.8012794056954189,
437
+ "eval_loss": 0.47252345085144043,
438
+ "eval_macro_f1": 0.8003815623782113,
439
+ "eval_runtime": 14.3033,
440
+ "eval_samples_per_second": 338.804,
441
+ "eval_steps_per_second": 3.286,
442
+ "step": 1496
443
+ }
444
+ ],
445
+ "max_steps": 9350,
446
+ "num_train_epochs": 50,
447
+ "total_flos": 4145176027791360.0,
448
+ "trial_name": null,
449
+ "trial_params": null
450
+ }
cross_cell_type_generization/L4/NKs/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/NKs/.DS_Store and b/cross_cell_type_generization/L4/NKs/.DS_Store differ
 
cross_cell_type_generization/L4/NKs/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9044444444444445,
3
+ "test_loss": 0.24761898815631866,
4
+ "test_macro_f1": 0.861289328238912,
5
+ "test_runtime": 2.5343,
6
+ "test_samples_per_second": 355.126,
7
+ "test_steps_per_second": 3.551
8
+ }
cross_cell_type_generization/L4/NKs/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.02,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.02,
9
+ "hidden_size": 256,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
cross_cell_type_generization/L4/NKs/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9044444444444445,
3
+ "test_loss": 0.24761898815631866,
4
+ "test_macro_f1": 0.861289328238912,
5
+ "test_runtime": 2.5343,
6
+ "test_samples_per_second": 355.126,
7
+ "test_steps_per_second": 3.551
8
+ }
cross_cell_type_generization/L4/NKs/trainer_state.json ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.39814653992652893,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_diversity/L4/NKs/fold4/checkpoint-1122",
4
+ "epoch": 8.0,
5
+ "global_step": 1496,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 2.5e-06,
13
+ "loss": 0.6917,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 5e-06,
19
+ "loss": 0.6919,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 7.5e-06,
25
+ "loss": 0.6932,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1e-05,
31
+ "loss": 0.6934,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.67,
36
+ "learning_rate": 1.25e-05,
37
+ "loss": 0.6944,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 1.5e-05,
43
+ "loss": 0.6912,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.94,
48
+ "learning_rate": 1.75e-05,
49
+ "loss": 0.6906,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.5171275278580273,
55
+ "eval_loss": 0.6910191178321838,
56
+ "eval_macro_f1": 0.4080778330325605,
57
+ "eval_runtime": 14.5066,
58
+ "eval_samples_per_second": 334.055,
59
+ "eval_steps_per_second": 3.24,
60
+ "step": 187
61
+ },
62
+ {
63
+ "epoch": 1.07,
64
+ "learning_rate": 2e-05,
65
+ "loss": 0.6912,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 1.2,
70
+ "learning_rate": 2.25e-05,
71
+ "loss": 0.6911,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 1.34,
76
+ "learning_rate": 2.5e-05,
77
+ "loss": 0.6875,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 1.47,
82
+ "learning_rate": 2.7500000000000004e-05,
83
+ "loss": 0.6873,
84
+ "step": 275
85
+ },
86
+ {
87
+ "epoch": 1.6,
88
+ "learning_rate": 3e-05,
89
+ "loss": 0.6844,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 1.74,
94
+ "learning_rate": 3.2500000000000004e-05,
95
+ "loss": 0.6814,
96
+ "step": 325
97
+ },
98
+ {
99
+ "epoch": 1.87,
100
+ "learning_rate": 3.5e-05,
101
+ "loss": 0.6795,
102
+ "step": 350
103
+ },
104
+ {
105
+ "epoch": 2.0,
106
+ "eval_accuracy": 0.5870821295914156,
107
+ "eval_loss": 0.6697810292243958,
108
+ "eval_macro_f1": 0.5825487540387015,
109
+ "eval_runtime": 13.6383,
110
+ "eval_samples_per_second": 355.322,
111
+ "eval_steps_per_second": 3.446,
112
+ "step": 374
113
+ },
114
+ {
115
+ "epoch": 2.01,
116
+ "learning_rate": 3.7500000000000003e-05,
117
+ "loss": 0.6781,
118
+ "step": 375
119
+ },
120
+ {
121
+ "epoch": 2.14,
122
+ "learning_rate": 4e-05,
123
+ "loss": 0.6564,
124
+ "step": 400
125
+ },
126
+ {
127
+ "epoch": 2.27,
128
+ "learning_rate": 4.25e-05,
129
+ "loss": 0.6297,
130
+ "step": 425
131
+ },
132
+ {
133
+ "epoch": 2.41,
134
+ "learning_rate": 4.5e-05,
135
+ "loss": 0.6131,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 2.54,
140
+ "learning_rate": 4.75e-05,
141
+ "loss": 0.5841,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 2.67,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.5394,
148
+ "step": 500
149
+ },
150
+ {
151
+ "epoch": 2.81,
152
+ "learning_rate": 4.9858757062146896e-05,
153
+ "loss": 0.492,
154
+ "step": 525
155
+ },
156
+ {
157
+ "epoch": 2.94,
158
+ "learning_rate": 4.971751412429379e-05,
159
+ "loss": 0.4617,
160
+ "step": 550
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7686751960379694,
165
+ "eval_loss": 0.46376147866249084,
166
+ "eval_macro_f1": 0.7685935958162224,
167
+ "eval_runtime": 13.8492,
168
+ "eval_samples_per_second": 349.911,
169
+ "eval_steps_per_second": 3.394,
170
+ "step": 561
171
+ },
172
+ {
173
+ "epoch": 3.07,
174
+ "learning_rate": 4.957627118644068e-05,
175
+ "loss": 0.4375,
176
+ "step": 575
177
+ },
178
+ {
179
+ "epoch": 3.21,
180
+ "learning_rate": 4.9435028248587575e-05,
181
+ "loss": 0.407,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 3.34,
186
+ "learning_rate": 4.929378531073446e-05,
187
+ "loss": 0.414,
188
+ "step": 625
189
+ },
190
+ {
191
+ "epoch": 3.48,
192
+ "learning_rate": 4.915254237288136e-05,
193
+ "loss": 0.4284,
194
+ "step": 650
195
+ },
196
+ {
197
+ "epoch": 3.61,
198
+ "learning_rate": 4.9011299435028255e-05,
199
+ "loss": 0.4237,
200
+ "step": 675
201
+ },
202
+ {
203
+ "epoch": 3.74,
204
+ "learning_rate": 4.887005649717514e-05,
205
+ "loss": 0.3709,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 3.88,
210
+ "learning_rate": 4.8728813559322034e-05,
211
+ "loss": 0.3842,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 4.0,
216
+ "eval_accuracy": 0.7944696657036732,
217
+ "eval_loss": 0.4250829815864563,
218
+ "eval_macro_f1": 0.7944693506298842,
219
+ "eval_runtime": 14.4424,
220
+ "eval_samples_per_second": 335.539,
221
+ "eval_steps_per_second": 3.254,
222
+ "step": 748
223
+ },
224
+ {
225
+ "epoch": 4.01,
226
+ "learning_rate": 4.8587570621468934e-05,
227
+ "loss": 0.3891,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 4.14,
232
+ "learning_rate": 4.844632768361582e-05,
233
+ "loss": 0.3184,
234
+ "step": 775
235
+ },
236
+ {
237
+ "epoch": 4.28,
238
+ "learning_rate": 4.8305084745762714e-05,
239
+ "loss": 0.3392,
240
+ "step": 800
241
+ },
242
+ {
243
+ "epoch": 4.41,
244
+ "learning_rate": 4.816384180790961e-05,
245
+ "loss": 0.3404,
246
+ "step": 825
247
+ },
248
+ {
249
+ "epoch": 4.55,
250
+ "learning_rate": 4.80225988700565e-05,
251
+ "loss": 0.3744,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 4.68,
256
+ "learning_rate": 4.788135593220339e-05,
257
+ "loss": 0.3229,
258
+ "step": 875
259
+ },
260
+ {
261
+ "epoch": 4.81,
262
+ "learning_rate": 4.7740112994350286e-05,
263
+ "loss": 0.3298,
264
+ "step": 900
265
+ },
266
+ {
267
+ "epoch": 4.95,
268
+ "learning_rate": 4.759887005649718e-05,
269
+ "loss": 0.3446,
270
+ "step": 925
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7911679735864631,
275
+ "eval_loss": 0.4325283169746399,
276
+ "eval_macro_f1": 0.7906017246983076,
277
+ "eval_runtime": 13.9518,
278
+ "eval_samples_per_second": 347.339,
279
+ "eval_steps_per_second": 3.369,
280
+ "step": 935
281
+ },
282
+ {
283
+ "epoch": 5.08,
284
+ "learning_rate": 4.745762711864407e-05,
285
+ "loss": 0.319,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 5.21,
290
+ "learning_rate": 4.7316384180790966e-05,
291
+ "loss": 0.266,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 5.35,
296
+ "learning_rate": 4.717514124293785e-05,
297
+ "loss": 0.2992,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 5.48,
302
+ "learning_rate": 4.703389830508475e-05,
303
+ "loss": 0.2987,
304
+ "step": 1025
305
+ },
306
+ {
307
+ "epoch": 5.61,
308
+ "learning_rate": 4.689265536723164e-05,
309
+ "loss": 0.2906,
310
+ "step": 1050
311
+ },
312
+ {
313
+ "epoch": 5.75,
314
+ "learning_rate": 4.675141242937853e-05,
315
+ "loss": 0.3031,
316
+ "step": 1075
317
+ },
318
+ {
319
+ "epoch": 5.88,
320
+ "learning_rate": 4.6610169491525425e-05,
321
+ "loss": 0.2794,
322
+ "step": 1100
323
+ },
324
+ {
325
+ "epoch": 6.0,
326
+ "eval_accuracy": 0.819026000825423,
327
+ "eval_loss": 0.39814653992652893,
328
+ "eval_macro_f1": 0.8178884568287126,
329
+ "eval_runtime": 14.2206,
330
+ "eval_samples_per_second": 340.773,
331
+ "eval_steps_per_second": 3.305,
332
+ "step": 1122
333
+ },
334
+ {
335
+ "epoch": 6.02,
336
+ "learning_rate": 4.646892655367232e-05,
337
+ "loss": 0.2802,
338
+ "step": 1125
339
+ },
340
+ {
341
+ "epoch": 6.15,
342
+ "learning_rate": 4.632768361581921e-05,
343
+ "loss": 0.2637,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 6.28,
348
+ "learning_rate": 4.6186440677966104e-05,
349
+ "loss": 0.274,
350
+ "step": 1175
351
+ },
352
+ {
353
+ "epoch": 6.42,
354
+ "learning_rate": 4.6045197740113e-05,
355
+ "loss": 0.2754,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 6.55,
360
+ "learning_rate": 4.590395480225989e-05,
361
+ "loss": 0.2332,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 6.68,
366
+ "learning_rate": 4.5762711864406784e-05,
367
+ "loss": 0.259,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 6.82,
372
+ "learning_rate": 4.562146892655367e-05,
373
+ "loss": 0.2446,
374
+ "step": 1275
375
+ },
376
+ {
377
+ "epoch": 6.95,
378
+ "learning_rate": 4.548022598870056e-05,
379
+ "loss": 0.246,
380
+ "step": 1300
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.8208832026413537,
385
+ "eval_loss": 0.4227023422718048,
386
+ "eval_macro_f1": 0.8184059652831023,
387
+ "eval_runtime": 14.2381,
388
+ "eval_samples_per_second": 340.355,
389
+ "eval_steps_per_second": 3.301,
390
+ "step": 1309
391
+ },
392
+ {
393
+ "epoch": 7.09,
394
+ "learning_rate": 4.533898305084746e-05,
395
+ "loss": 0.232,
396
+ "step": 1325
397
+ },
398
+ {
399
+ "epoch": 7.22,
400
+ "learning_rate": 4.519774011299435e-05,
401
+ "loss": 0.2373,
402
+ "step": 1350
403
+ },
404
+ {
405
+ "epoch": 7.35,
406
+ "learning_rate": 4.505649717514124e-05,
407
+ "loss": 0.2381,
408
+ "step": 1375
409
+ },
410
+ {
411
+ "epoch": 7.49,
412
+ "learning_rate": 4.491525423728814e-05,
413
+ "loss": 0.2607,
414
+ "step": 1400
415
+ },
416
+ {
417
+ "epoch": 7.62,
418
+ "learning_rate": 4.477401129943503e-05,
419
+ "loss": 0.221,
420
+ "step": 1425
421
+ },
422
+ {
423
+ "epoch": 7.75,
424
+ "learning_rate": 4.463276836158192e-05,
425
+ "loss": 0.2267,
426
+ "step": 1450
427
+ },
428
+ {
429
+ "epoch": 7.89,
430
+ "learning_rate": 4.4491525423728816e-05,
431
+ "loss": 0.21,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 8.0,
436
+ "eval_accuracy": 0.8012794056954189,
437
+ "eval_loss": 0.47252345085144043,
438
+ "eval_macro_f1": 0.8003815623782113,
439
+ "eval_runtime": 13.6574,
440
+ "eval_samples_per_second": 354.827,
441
+ "eval_steps_per_second": 3.441,
442
+ "step": 1496
443
+ }
444
+ ],
445
+ "max_steps": 9350,
446
+ "num_train_epochs": 50,
447
+ "total_flos": 4145176027791360.0,
448
+ "trial_name": null,
449
+ "trial_params": null
450
+ }
cross_cell_type_generization/L4/T cells/.DS_Store CHANGED
Binary files a/cross_cell_type_generization/L4/T cells/.DS_Store and b/cross_cell_type_generization/L4/T cells/.DS_Store differ
 
cross_cell_type_generization/L4/T cells/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9255555555555556,
3
+ "test_loss": 0.19469526410102844,
4
+ "test_macro_f1": 0.9254980694980695,
5
+ "test_runtime": 2.8235,
6
+ "test_samples_per_second": 318.748,
7
+ "test_steps_per_second": 3.187
8
+ }
cross_cell_type_generization/L4/T cells/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.02,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.02,
9
+ "hidden_size": 256,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 2048,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 4,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "problem_type": "single_label_classification",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
cross_cell_type_generization/L4/T cells/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9255555555555556,
3
+ "test_loss": 0.19469526410102844,
4
+ "test_macro_f1": 0.9254980694980695,
5
+ "test_runtime": 2.8235,
6
+ "test_samples_per_second": 318.748,
7
+ "test_steps_per_second": 3.187
8
+ }
cross_cell_type_generization/L4/T cells/trainer_state.json ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.39814653992652893,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_diversity/L4/T cells/fold4/checkpoint-1122",
4
+ "epoch": 8.0,
5
+ "global_step": 1496,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 2.5e-06,
13
+ "loss": 0.6917,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 5e-06,
19
+ "loss": 0.6919,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 7.5e-06,
25
+ "loss": 0.6932,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1e-05,
31
+ "loss": 0.6934,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.67,
36
+ "learning_rate": 1.25e-05,
37
+ "loss": 0.6944,
38
+ "step": 125
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 1.5e-05,
43
+ "loss": 0.6912,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.94,
48
+ "learning_rate": 1.75e-05,
49
+ "loss": 0.6906,
50
+ "step": 175
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.5171275278580273,
55
+ "eval_loss": 0.6910191178321838,
56
+ "eval_macro_f1": 0.4080778330325605,
57
+ "eval_runtime": 14.0812,
58
+ "eval_samples_per_second": 344.146,
59
+ "eval_steps_per_second": 3.338,
60
+ "step": 187
61
+ },
62
+ {
63
+ "epoch": 1.07,
64
+ "learning_rate": 2e-05,
65
+ "loss": 0.6912,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 1.2,
70
+ "learning_rate": 2.25e-05,
71
+ "loss": 0.6911,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 1.34,
76
+ "learning_rate": 2.5e-05,
77
+ "loss": 0.6875,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 1.47,
82
+ "learning_rate": 2.7500000000000004e-05,
83
+ "loss": 0.6873,
84
+ "step": 275
85
+ },
86
+ {
87
+ "epoch": 1.6,
88
+ "learning_rate": 3e-05,
89
+ "loss": 0.6844,
90
+ "step": 300
91
+ },
92
+ {
93
+ "epoch": 1.74,
94
+ "learning_rate": 3.2500000000000004e-05,
95
+ "loss": 0.6814,
96
+ "step": 325
97
+ },
98
+ {
99
+ "epoch": 1.87,
100
+ "learning_rate": 3.5e-05,
101
+ "loss": 0.6795,
102
+ "step": 350
103
+ },
104
+ {
105
+ "epoch": 2.0,
106
+ "eval_accuracy": 0.5870821295914156,
107
+ "eval_loss": 0.6697810292243958,
108
+ "eval_macro_f1": 0.5825487540387015,
109
+ "eval_runtime": 13.5633,
110
+ "eval_samples_per_second": 357.289,
111
+ "eval_steps_per_second": 3.465,
112
+ "step": 374
113
+ },
114
+ {
115
+ "epoch": 2.01,
116
+ "learning_rate": 3.7500000000000003e-05,
117
+ "loss": 0.6781,
118
+ "step": 375
119
+ },
120
+ {
121
+ "epoch": 2.14,
122
+ "learning_rate": 4e-05,
123
+ "loss": 0.6564,
124
+ "step": 400
125
+ },
126
+ {
127
+ "epoch": 2.27,
128
+ "learning_rate": 4.25e-05,
129
+ "loss": 0.6297,
130
+ "step": 425
131
+ },
132
+ {
133
+ "epoch": 2.41,
134
+ "learning_rate": 4.5e-05,
135
+ "loss": 0.6131,
136
+ "step": 450
137
+ },
138
+ {
139
+ "epoch": 2.54,
140
+ "learning_rate": 4.75e-05,
141
+ "loss": 0.5841,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 2.67,
146
+ "learning_rate": 5e-05,
147
+ "loss": 0.5394,
148
+ "step": 500
149
+ },
150
+ {
151
+ "epoch": 2.81,
152
+ "learning_rate": 4.9858757062146896e-05,
153
+ "loss": 0.492,
154
+ "step": 525
155
+ },
156
+ {
157
+ "epoch": 2.94,
158
+ "learning_rate": 4.971751412429379e-05,
159
+ "loss": 0.4617,
160
+ "step": 550
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7686751960379694,
165
+ "eval_loss": 0.46376147866249084,
166
+ "eval_macro_f1": 0.7685935958162224,
167
+ "eval_runtime": 14.5761,
168
+ "eval_samples_per_second": 332.462,
169
+ "eval_steps_per_second": 3.224,
170
+ "step": 561
171
+ },
172
+ {
173
+ "epoch": 3.07,
174
+ "learning_rate": 4.957627118644068e-05,
175
+ "loss": 0.4375,
176
+ "step": 575
177
+ },
178
+ {
179
+ "epoch": 3.21,
180
+ "learning_rate": 4.9435028248587575e-05,
181
+ "loss": 0.407,
182
+ "step": 600
183
+ },
184
+ {
185
+ "epoch": 3.34,
186
+ "learning_rate": 4.929378531073446e-05,
187
+ "loss": 0.414,
188
+ "step": 625
189
+ },
190
+ {
191
+ "epoch": 3.48,
192
+ "learning_rate": 4.915254237288136e-05,
193
+ "loss": 0.4284,
194
+ "step": 650
195
+ },
196
+ {
197
+ "epoch": 3.61,
198
+ "learning_rate": 4.9011299435028255e-05,
199
+ "loss": 0.4237,
200
+ "step": 675
201
+ },
202
+ {
203
+ "epoch": 3.74,
204
+ "learning_rate": 4.887005649717514e-05,
205
+ "loss": 0.3709,
206
+ "step": 700
207
+ },
208
+ {
209
+ "epoch": 3.88,
210
+ "learning_rate": 4.8728813559322034e-05,
211
+ "loss": 0.3842,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 4.0,
216
+ "eval_accuracy": 0.7944696657036732,
217
+ "eval_loss": 0.4250829815864563,
218
+ "eval_macro_f1": 0.7944693506298842,
219
+ "eval_runtime": 14.4363,
220
+ "eval_samples_per_second": 335.683,
221
+ "eval_steps_per_second": 3.256,
222
+ "step": 748
223
+ },
224
+ {
225
+ "epoch": 4.01,
226
+ "learning_rate": 4.8587570621468934e-05,
227
+ "loss": 0.3891,
228
+ "step": 750
229
+ },
230
+ {
231
+ "epoch": 4.14,
232
+ "learning_rate": 4.844632768361582e-05,
233
+ "loss": 0.3184,
234
+ "step": 775
235
+ },
236
+ {
237
+ "epoch": 4.28,
238
+ "learning_rate": 4.8305084745762714e-05,
239
+ "loss": 0.3392,
240
+ "step": 800
241
+ },
242
+ {
243
+ "epoch": 4.41,
244
+ "learning_rate": 4.816384180790961e-05,
245
+ "loss": 0.3404,
246
+ "step": 825
247
+ },
248
+ {
249
+ "epoch": 4.55,
250
+ "learning_rate": 4.80225988700565e-05,
251
+ "loss": 0.3744,
252
+ "step": 850
253
+ },
254
+ {
255
+ "epoch": 4.68,
256
+ "learning_rate": 4.788135593220339e-05,
257
+ "loss": 0.3229,
258
+ "step": 875
259
+ },
260
+ {
261
+ "epoch": 4.81,
262
+ "learning_rate": 4.7740112994350286e-05,
263
+ "loss": 0.3298,
264
+ "step": 900
265
+ },
266
+ {
267
+ "epoch": 4.95,
268
+ "learning_rate": 4.759887005649718e-05,
269
+ "loss": 0.3446,
270
+ "step": 925
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7911679735864631,
275
+ "eval_loss": 0.4325283169746399,
276
+ "eval_macro_f1": 0.7906017246983076,
277
+ "eval_runtime": 14.0025,
278
+ "eval_samples_per_second": 346.08,
279
+ "eval_steps_per_second": 3.357,
280
+ "step": 935
281
+ },
282
+ {
283
+ "epoch": 5.08,
284
+ "learning_rate": 4.745762711864407e-05,
285
+ "loss": 0.319,
286
+ "step": 950
287
+ },
288
+ {
289
+ "epoch": 5.21,
290
+ "learning_rate": 4.7316384180790966e-05,
291
+ "loss": 0.266,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 5.35,
296
+ "learning_rate": 4.717514124293785e-05,
297
+ "loss": 0.2992,
298
+ "step": 1000
299
+ },
300
+ {
301
+ "epoch": 5.48,
302
+ "learning_rate": 4.703389830508475e-05,
303
+ "loss": 0.2987,
304
+ "step": 1025
305
+ },
306
+ {
307
+ "epoch": 5.61,
308
+ "learning_rate": 4.689265536723164e-05,
309
+ "loss": 0.2906,
310
+ "step": 1050
311
+ },
312
+ {
313
+ "epoch": 5.75,
314
+ "learning_rate": 4.675141242937853e-05,
315
+ "loss": 0.3031,
316
+ "step": 1075
317
+ },
318
+ {
319
+ "epoch": 5.88,
320
+ "learning_rate": 4.6610169491525425e-05,
321
+ "loss": 0.2794,
322
+ "step": 1100
323
+ },
324
+ {
325
+ "epoch": 6.0,
326
+ "eval_accuracy": 0.819026000825423,
327
+ "eval_loss": 0.39814653992652893,
328
+ "eval_macro_f1": 0.8178884568287126,
329
+ "eval_runtime": 14.1681,
330
+ "eval_samples_per_second": 342.036,
331
+ "eval_steps_per_second": 3.317,
332
+ "step": 1122
333
+ },
334
+ {
335
+ "epoch": 6.02,
336
+ "learning_rate": 4.646892655367232e-05,
337
+ "loss": 0.2802,
338
+ "step": 1125
339
+ },
340
+ {
341
+ "epoch": 6.15,
342
+ "learning_rate": 4.632768361581921e-05,
343
+ "loss": 0.2637,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 6.28,
348
+ "learning_rate": 4.6186440677966104e-05,
349
+ "loss": 0.274,
350
+ "step": 1175
351
+ },
352
+ {
353
+ "epoch": 6.42,
354
+ "learning_rate": 4.6045197740113e-05,
355
+ "loss": 0.2754,
356
+ "step": 1200
357
+ },
358
+ {
359
+ "epoch": 6.55,
360
+ "learning_rate": 4.590395480225989e-05,
361
+ "loss": 0.2332,
362
+ "step": 1225
363
+ },
364
+ {
365
+ "epoch": 6.68,
366
+ "learning_rate": 4.5762711864406784e-05,
367
+ "loss": 0.259,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 6.82,
372
+ "learning_rate": 4.562146892655367e-05,
373
+ "loss": 0.2446,
374
+ "step": 1275
375
+ },
376
+ {
377
+ "epoch": 6.95,
378
+ "learning_rate": 4.548022598870056e-05,
379
+ "loss": 0.246,
380
+ "step": 1300
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.8208832026413537,
385
+ "eval_loss": 0.4227023422718048,
386
+ "eval_macro_f1": 0.8184059652831023,
387
+ "eval_runtime": 14.6282,
388
+ "eval_samples_per_second": 331.279,
389
+ "eval_steps_per_second": 3.213,
390
+ "step": 1309
391
+ },
392
+ {
393
+ "epoch": 7.09,
394
+ "learning_rate": 4.533898305084746e-05,
395
+ "loss": 0.232,
396
+ "step": 1325
397
+ },
398
+ {
399
+ "epoch": 7.22,
400
+ "learning_rate": 4.519774011299435e-05,
401
+ "loss": 0.2373,
402
+ "step": 1350
403
+ },
404
+ {
405
+ "epoch": 7.35,
406
+ "learning_rate": 4.505649717514124e-05,
407
+ "loss": 0.2381,
408
+ "step": 1375
409
+ },
410
+ {
411
+ "epoch": 7.49,
412
+ "learning_rate": 4.491525423728814e-05,
413
+ "loss": 0.2607,
414
+ "step": 1400
415
+ },
416
+ {
417
+ "epoch": 7.62,
418
+ "learning_rate": 4.477401129943503e-05,
419
+ "loss": 0.221,
420
+ "step": 1425
421
+ },
422
+ {
423
+ "epoch": 7.75,
424
+ "learning_rate": 4.463276836158192e-05,
425
+ "loss": 0.2267,
426
+ "step": 1450
427
+ },
428
+ {
429
+ "epoch": 7.89,
430
+ "learning_rate": 4.4491525423728816e-05,
431
+ "loss": 0.21,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 8.0,
436
+ "eval_accuracy": 0.8012794056954189,
437
+ "eval_loss": 0.47252345085144043,
438
+ "eval_macro_f1": 0.8003815623782113,
439
+ "eval_runtime": 14.1684,
440
+ "eval_samples_per_second": 342.028,
441
+ "eval_steps_per_second": 3.317,
442
+ "step": 1496
443
+ }
444
+ ],
445
+ "max_steps": 9350,
446
+ "num_train_epochs": 50,
447
+ "total_flos": 4145176027791360.0,
448
+ "trial_name": null,
449
+ "trial_params": null
450
+ }
data_curation&frozen_layers/.DS_Store CHANGED
Binary files a/data_curation&frozen_layers/.DS_Store and b/data_curation&frozen_layers/.DS_Store differ
 
data_curation&frozen_layers/dataset1_all/.DS_Store CHANGED
Binary files a/data_curation&frozen_layers/dataset1_all/.DS_Store and b/data_curation&frozen_layers/dataset1_all/.DS_Store differ
 
data_curation&frozen_layers/dataset1_all/F0/.DS_Store CHANGED
Binary files a/data_curation&frozen_layers/dataset1_all/F0/.DS_Store and b/data_curation&frozen_layers/dataset1_all/F0/.DS_Store differ
 
data_curation&frozen_layers/dataset1_all/F0/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7918886593454387,
3
+ "test_loss": 0.4178936779499054,
4
+ "test_macro_f1": 0.7900743751766766,
5
+ "test_runtime": 28.8947,
6
+ "test_samples_per_second": 318.294,
7
+ "test_steps_per_second": 3.08
8
+ }
data_curation&frozen_layers/dataset1_all/F0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/vsphhome/fengguoqing/Geneformer",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.02,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "relu",
10
+ "hidden_dropout_prob": 0.02,
11
+ "hidden_size": 256,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 512,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 4,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.28.0",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 25426
27
+ }
data_curation&frozen_layers/dataset1_all/F0/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7918886593454387,
3
+ "test_loss": 0.4178936779499054,
4
+ "test_macro_f1": 0.7900743751766766,
5
+ "test_runtime": 28.8947,
6
+ "test_samples_per_second": 318.294,
7
+ "test_steps_per_second": 3.08
8
+ }
data_curation&frozen_layers/dataset1_all/F0/trainer_state.json ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4178936779499054,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_curation/240716_geneformer_CellClassifier_PM25_dataset1_L2048_B26_LR5e-05_LSlinear_WU500_E50_Oadamw_F0_fold4/checkpoint-1770",
4
+ "epoch": 7.0,
5
+ "global_step": 2478,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 4.7e-06,
13
+ "loss": 0.699,
14
+ "step": 47
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 9.4e-06,
19
+ "loss": 0.6968,
20
+ "step": 94
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 1.4099999999999999e-05,
25
+ "loss": 0.6872,
26
+ "step": 141
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1.88e-05,
31
+ "loss": 0.6734,
32
+ "step": 188
33
+ },
34
+ {
35
+ "epoch": 0.66,
36
+ "learning_rate": 2.35e-05,
37
+ "loss": 0.6567,
38
+ "step": 235
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 2.8199999999999998e-05,
43
+ "loss": 0.6318,
44
+ "step": 282
45
+ },
46
+ {
47
+ "epoch": 0.93,
48
+ "learning_rate": 3.29e-05,
49
+ "loss": 0.6259,
50
+ "step": 329
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.6278134174187235,
55
+ "eval_loss": 0.6198515295982361,
56
+ "eval_macro_f1": 0.6250609592590812,
57
+ "eval_runtime": 29.085,
58
+ "eval_samples_per_second": 316.211,
59
+ "eval_steps_per_second": 3.06,
60
+ "step": 354
61
+ },
62
+ {
63
+ "epoch": 1.06,
64
+ "learning_rate": 3.76e-05,
65
+ "loss": 0.6026,
66
+ "step": 376
67
+ },
68
+ {
69
+ "epoch": 1.19,
70
+ "learning_rate": 4.23e-05,
71
+ "loss": 0.5907,
72
+ "step": 423
73
+ },
74
+ {
75
+ "epoch": 1.33,
76
+ "learning_rate": 4.7e-05,
77
+ "loss": 0.6026,
78
+ "step": 470
79
+ },
80
+ {
81
+ "epoch": 1.46,
82
+ "learning_rate": 4.995058139534884e-05,
83
+ "loss": 0.5725,
84
+ "step": 517
85
+ },
86
+ {
87
+ "epoch": 1.59,
88
+ "learning_rate": 4.981395348837209e-05,
89
+ "loss": 0.5673,
90
+ "step": 564
91
+ },
92
+ {
93
+ "epoch": 1.73,
94
+ "learning_rate": 4.967732558139535e-05,
95
+ "loss": 0.5563,
96
+ "step": 611
97
+ },
98
+ {
99
+ "epoch": 1.86,
100
+ "learning_rate": 4.954069767441861e-05,
101
+ "loss": 0.554,
102
+ "step": 658
103
+ },
104
+ {
105
+ "epoch": 1.99,
106
+ "learning_rate": 4.940406976744186e-05,
107
+ "loss": 0.5169,
108
+ "step": 705
109
+ },
110
+ {
111
+ "epoch": 2.0,
112
+ "eval_accuracy": 0.7059910840491465,
113
+ "eval_loss": 0.5674108266830444,
114
+ "eval_macro_f1": 0.704985311360717,
115
+ "eval_runtime": 28.6864,
116
+ "eval_samples_per_second": 320.605,
117
+ "eval_steps_per_second": 3.103,
118
+ "step": 708
119
+ },
120
+ {
121
+ "epoch": 2.12,
122
+ "learning_rate": 4.926744186046512e-05,
123
+ "loss": 0.544,
124
+ "step": 752
125
+ },
126
+ {
127
+ "epoch": 2.26,
128
+ "learning_rate": 4.9130813953488376e-05,
129
+ "loss": 0.5296,
130
+ "step": 799
131
+ },
132
+ {
133
+ "epoch": 2.39,
134
+ "learning_rate": 4.899418604651163e-05,
135
+ "loss": 0.5126,
136
+ "step": 846
137
+ },
138
+ {
139
+ "epoch": 2.52,
140
+ "learning_rate": 4.8857558139534885e-05,
141
+ "loss": 0.5277,
142
+ "step": 893
143
+ },
144
+ {
145
+ "epoch": 2.66,
146
+ "learning_rate": 4.8720930232558146e-05,
147
+ "loss": 0.4896,
148
+ "step": 940
149
+ },
150
+ {
151
+ "epoch": 2.79,
152
+ "learning_rate": 4.85843023255814e-05,
153
+ "loss": 0.4887,
154
+ "step": 987
155
+ },
156
+ {
157
+ "epoch": 2.92,
158
+ "learning_rate": 4.8447674418604654e-05,
159
+ "loss": 0.4916,
160
+ "step": 1034
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7490486028052625,
165
+ "eval_loss": 0.47779449820518494,
166
+ "eval_macro_f1": 0.7485365926009326,
167
+ "eval_runtime": 29.287,
168
+ "eval_samples_per_second": 314.03,
169
+ "eval_steps_per_second": 3.039,
170
+ "step": 1062
171
+ },
172
+ {
173
+ "epoch": 3.05,
174
+ "learning_rate": 4.831104651162791e-05,
175
+ "loss": 0.4725,
176
+ "step": 1081
177
+ },
178
+ {
179
+ "epoch": 3.19,
180
+ "learning_rate": 4.817441860465117e-05,
181
+ "loss": 0.457,
182
+ "step": 1128
183
+ },
184
+ {
185
+ "epoch": 3.32,
186
+ "learning_rate": 4.8037790697674424e-05,
187
+ "loss": 0.4775,
188
+ "step": 1175
189
+ },
190
+ {
191
+ "epoch": 3.45,
192
+ "learning_rate": 4.790116279069768e-05,
193
+ "loss": 0.4532,
194
+ "step": 1222
195
+ },
196
+ {
197
+ "epoch": 3.58,
198
+ "learning_rate": 4.776453488372093e-05,
199
+ "loss": 0.4651,
200
+ "step": 1269
201
+ },
202
+ {
203
+ "epoch": 3.72,
204
+ "learning_rate": 4.762790697674419e-05,
205
+ "loss": 0.4592,
206
+ "step": 1316
207
+ },
208
+ {
209
+ "epoch": 3.85,
210
+ "learning_rate": 4.749127906976744e-05,
211
+ "loss": 0.4466,
212
+ "step": 1363
213
+ },
214
+ {
215
+ "epoch": 3.98,
216
+ "learning_rate": 4.7354651162790695e-05,
217
+ "loss": 0.4476,
218
+ "step": 1410
219
+ },
220
+ {
221
+ "epoch": 4.0,
222
+ "eval_accuracy": 0.7726432532347505,
223
+ "eval_loss": 0.4460304081439972,
224
+ "eval_macro_f1": 0.7668150187606645,
225
+ "eval_runtime": 28.5449,
226
+ "eval_samples_per_second": 322.194,
227
+ "eval_steps_per_second": 3.118,
228
+ "step": 1416
229
+ },
230
+ {
231
+ "epoch": 4.12,
232
+ "learning_rate": 4.7218023255813956e-05,
233
+ "loss": 0.4375,
234
+ "step": 1457
235
+ },
236
+ {
237
+ "epoch": 4.25,
238
+ "learning_rate": 4.708139534883721e-05,
239
+ "loss": 0.4181,
240
+ "step": 1504
241
+ },
242
+ {
243
+ "epoch": 4.38,
244
+ "learning_rate": 4.6944767441860464e-05,
245
+ "loss": 0.408,
246
+ "step": 1551
247
+ },
248
+ {
249
+ "epoch": 4.51,
250
+ "learning_rate": 4.680813953488372e-05,
251
+ "loss": 0.4046,
252
+ "step": 1598
253
+ },
254
+ {
255
+ "epoch": 4.65,
256
+ "learning_rate": 4.667151162790698e-05,
257
+ "loss": 0.4175,
258
+ "step": 1645
259
+ },
260
+ {
261
+ "epoch": 4.78,
262
+ "learning_rate": 4.6534883720930234e-05,
263
+ "loss": 0.4072,
264
+ "step": 1692
265
+ },
266
+ {
267
+ "epoch": 4.91,
268
+ "learning_rate": 4.639825581395349e-05,
269
+ "loss": 0.3991,
270
+ "step": 1739
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7918886593454387,
275
+ "eval_loss": 0.4178936779499054,
276
+ "eval_macro_f1": 0.7900743751766766,
277
+ "eval_runtime": 29.1563,
278
+ "eval_samples_per_second": 315.438,
279
+ "eval_steps_per_second": 3.053,
280
+ "step": 1770
281
+ },
282
+ {
283
+ "epoch": 5.05,
284
+ "learning_rate": 4.626162790697675e-05,
285
+ "loss": 0.3955,
286
+ "step": 1786
287
+ },
288
+ {
289
+ "epoch": 5.18,
290
+ "learning_rate": 4.6125e-05,
291
+ "loss": 0.3857,
292
+ "step": 1833
293
+ },
294
+ {
295
+ "epoch": 5.31,
296
+ "learning_rate": 4.598837209302326e-05,
297
+ "loss": 0.3799,
298
+ "step": 1880
299
+ },
300
+ {
301
+ "epoch": 5.44,
302
+ "learning_rate": 4.585174418604651e-05,
303
+ "loss": 0.3798,
304
+ "step": 1927
305
+ },
306
+ {
307
+ "epoch": 5.58,
308
+ "learning_rate": 4.571511627906977e-05,
309
+ "loss": 0.3919,
310
+ "step": 1974
311
+ },
312
+ {
313
+ "epoch": 5.71,
314
+ "learning_rate": 4.557848837209303e-05,
315
+ "loss": 0.3751,
316
+ "step": 2021
317
+ },
318
+ {
319
+ "epoch": 5.84,
320
+ "learning_rate": 4.544186046511628e-05,
321
+ "loss": 0.4059,
322
+ "step": 2068
323
+ },
324
+ {
325
+ "epoch": 5.97,
326
+ "learning_rate": 4.5305232558139535e-05,
327
+ "loss": 0.3856,
328
+ "step": 2115
329
+ },
330
+ {
331
+ "epoch": 6.0,
332
+ "eval_accuracy": 0.7924323148852886,
333
+ "eval_loss": 0.4189501702785492,
334
+ "eval_macro_f1": 0.791898051259053,
335
+ "eval_runtime": 28.6324,
336
+ "eval_samples_per_second": 321.209,
337
+ "eval_steps_per_second": 3.108,
338
+ "step": 2124
339
+ },
340
+ {
341
+ "epoch": 6.11,
342
+ "learning_rate": 4.5168604651162796e-05,
343
+ "loss": 0.3676,
344
+ "step": 2162
345
+ },
346
+ {
347
+ "epoch": 6.24,
348
+ "learning_rate": 4.503197674418605e-05,
349
+ "loss": 0.3699,
350
+ "step": 2209
351
+ },
352
+ {
353
+ "epoch": 6.37,
354
+ "learning_rate": 4.4895348837209305e-05,
355
+ "loss": 0.3526,
356
+ "step": 2256
357
+ },
358
+ {
359
+ "epoch": 6.51,
360
+ "learning_rate": 4.475872093023256e-05,
361
+ "loss": 0.3744,
362
+ "step": 2303
363
+ },
364
+ {
365
+ "epoch": 6.64,
366
+ "learning_rate": 4.462209302325582e-05,
367
+ "loss": 0.3517,
368
+ "step": 2350
369
+ },
370
+ {
371
+ "epoch": 6.77,
372
+ "learning_rate": 4.4485465116279074e-05,
373
+ "loss": 0.37,
374
+ "step": 2397
375
+ },
376
+ {
377
+ "epoch": 6.9,
378
+ "learning_rate": 4.434883720930233e-05,
379
+ "loss": 0.3793,
380
+ "step": 2444
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.7862346417309992,
385
+ "eval_loss": 0.4403926134109497,
386
+ "eval_macro_f1": 0.7775918523788579,
387
+ "eval_runtime": 28.523,
388
+ "eval_samples_per_second": 322.441,
389
+ "eval_steps_per_second": 3.12,
390
+ "step": 2478
391
+ }
392
+ ],
393
+ "max_steps": 17700,
394
+ "num_train_epochs": 50,
395
+ "total_flos": 1.021966170021888e+16,
396
+ "trial_name": null,
397
+ "trial_params": null
398
+ }
data_curation&frozen_layers/dataset1_all/F2/.DS_Store CHANGED
Binary files a/data_curation&frozen_layers/dataset1_all/F2/.DS_Store and b/data_curation&frozen_layers/dataset1_all/F2/.DS_Store differ
 
data_curation&frozen_layers/dataset1_all/F2/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7876481461346091,
3
+ "test_loss": 0.42375245690345764,
4
+ "test_macro_f1": 0.7860128082271005,
5
+ "test_runtime": 28.2868,
6
+ "test_samples_per_second": 325.133,
7
+ "test_steps_per_second": 3.146
8
+ }
data_curation&frozen_layers/dataset1_all/F2/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/vsphhome/fengguoqing/Geneformer",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.02,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "relu",
10
+ "hidden_dropout_prob": 0.02,
11
+ "hidden_size": 256,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 512,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 4,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.28.0",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 25426
27
+ }
data_curation&frozen_layers/dataset1_all/F2/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7876481461346091,
3
+ "test_loss": 0.42375245690345764,
4
+ "test_macro_f1": 0.7860128082271005,
5
+ "test_runtime": 28.2868,
6
+ "test_samples_per_second": 325.133,
7
+ "test_steps_per_second": 3.146
8
+ }
data_curation&frozen_layers/dataset1_all/F2/predictions.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80418474e74868149cc0013cb607167bdbe4bf88c016ca8d6ca430a8be4599be
3
+ size 147612
data_curation&frozen_layers/dataset1_all/F2/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b59f0bd49e567d300464159bff0c123e0631c148c751ff5b6fae76ae15bc72bd
3
+ size 15006
data_curation&frozen_layers/dataset1_all/F2/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba7ccc8bcbde81186750973c8893ef9d6aa9bf52befe50f650c061df92088a1
3
+ size 1064
data_curation&frozen_layers/dataset1_all/F2/trainer_state.json ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.42375245690345764,
3
+ "best_model_checkpoint": "/vsphhome/fengguoqing/Geneformer/models/data_curation/240716_geneformer_CellClassifier_PM25_dataset1_L2048_B26_LR5e-05_LSlinear_WU500_E50_Oadamw_F2_fold4/checkpoint-2832",
4
+ "epoch": 10.0,
5
+ "global_step": 3540,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 4.7e-06,
13
+ "loss": 0.6937,
14
+ "step": 47
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "learning_rate": 9.4e-06,
19
+ "loss": 0.69,
20
+ "step": 94
21
+ },
22
+ {
23
+ "epoch": 0.4,
24
+ "learning_rate": 1.4099999999999999e-05,
25
+ "loss": 0.6825,
26
+ "step": 141
27
+ },
28
+ {
29
+ "epoch": 0.53,
30
+ "learning_rate": 1.88e-05,
31
+ "loss": 0.6712,
32
+ "step": 188
33
+ },
34
+ {
35
+ "epoch": 0.66,
36
+ "learning_rate": 2.35e-05,
37
+ "loss": 0.6597,
38
+ "step": 235
39
+ },
40
+ {
41
+ "epoch": 0.8,
42
+ "learning_rate": 2.8199999999999998e-05,
43
+ "loss": 0.635,
44
+ "step": 282
45
+ },
46
+ {
47
+ "epoch": 0.93,
48
+ "learning_rate": 3.29e-05,
49
+ "loss": 0.6164,
50
+ "step": 329
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.6319451995215831,
55
+ "eval_loss": 0.6155699491500854,
56
+ "eval_macro_f1": 0.6302866279772024,
57
+ "eval_runtime": 28.4084,
58
+ "eval_samples_per_second": 323.742,
59
+ "eval_steps_per_second": 3.133,
60
+ "step": 354
61
+ },
62
+ {
63
+ "epoch": 1.06,
64
+ "learning_rate": 3.76e-05,
65
+ "loss": 0.5959,
66
+ "step": 376
67
+ },
68
+ {
69
+ "epoch": 1.19,
70
+ "learning_rate": 4.23e-05,
71
+ "loss": 0.5948,
72
+ "step": 423
73
+ },
74
+ {
75
+ "epoch": 1.33,
76
+ "learning_rate": 4.7e-05,
77
+ "loss": 0.6044,
78
+ "step": 470
79
+ },
80
+ {
81
+ "epoch": 1.46,
82
+ "learning_rate": 4.995058139534884e-05,
83
+ "loss": 0.5994,
84
+ "step": 517
85
+ },
86
+ {
87
+ "epoch": 1.59,
88
+ "learning_rate": 4.981395348837209e-05,
89
+ "loss": 0.5673,
90
+ "step": 564
91
+ },
92
+ {
93
+ "epoch": 1.73,
94
+ "learning_rate": 4.967732558139535e-05,
95
+ "loss": 0.5619,
96
+ "step": 611
97
+ },
98
+ {
99
+ "epoch": 1.86,
100
+ "learning_rate": 4.954069767441861e-05,
101
+ "loss": 0.5816,
102
+ "step": 658
103
+ },
104
+ {
105
+ "epoch": 1.99,
106
+ "learning_rate": 4.940406976744186e-05,
107
+ "loss": 0.5384,
108
+ "step": 705
109
+ },
110
+ {
111
+ "epoch": 2.0,
112
+ "eval_accuracy": 0.7233880613243449,
113
+ "eval_loss": 0.5276534557342529,
114
+ "eval_macro_f1": 0.7229937346384986,
115
+ "eval_runtime": 28.9502,
116
+ "eval_samples_per_second": 317.684,
117
+ "eval_steps_per_second": 3.074,
118
+ "step": 708
119
+ },
120
+ {
121
+ "epoch": 2.12,
122
+ "learning_rate": 4.926744186046512e-05,
123
+ "loss": 0.5601,
124
+ "step": 752
125
+ },
126
+ {
127
+ "epoch": 2.26,
128
+ "learning_rate": 4.9130813953488376e-05,
129
+ "loss": 0.5334,
130
+ "step": 799
131
+ },
132
+ {
133
+ "epoch": 2.39,
134
+ "learning_rate": 4.899418604651163e-05,
135
+ "loss": 0.5024,
136
+ "step": 846
137
+ },
138
+ {
139
+ "epoch": 2.52,
140
+ "learning_rate": 4.8857558139534885e-05,
141
+ "loss": 0.5114,
142
+ "step": 893
143
+ },
144
+ {
145
+ "epoch": 2.66,
146
+ "learning_rate": 4.8720930232558146e-05,
147
+ "loss": 0.4943,
148
+ "step": 940
149
+ },
150
+ {
151
+ "epoch": 2.79,
152
+ "learning_rate": 4.85843023255814e-05,
153
+ "loss": 0.4806,
154
+ "step": 987
155
+ },
156
+ {
157
+ "epoch": 2.92,
158
+ "learning_rate": 4.8447674418604654e-05,
159
+ "loss": 0.4762,
160
+ "step": 1034
161
+ },
162
+ {
163
+ "epoch": 3.0,
164
+ "eval_accuracy": 0.7345873654452539,
165
+ "eval_loss": 0.49307793378829956,
166
+ "eval_macro_f1": 0.7239669430836269,
167
+ "eval_runtime": 29.4659,
168
+ "eval_samples_per_second": 312.123,
169
+ "eval_steps_per_second": 3.02,
170
+ "step": 1062
171
+ },
172
+ {
173
+ "epoch": 3.05,
174
+ "learning_rate": 4.831104651162791e-05,
175
+ "loss": 0.482,
176
+ "step": 1081
177
+ },
178
+ {
179
+ "epoch": 3.19,
180
+ "learning_rate": 4.817441860465117e-05,
181
+ "loss": 0.4553,
182
+ "step": 1128
183
+ },
184
+ {
185
+ "epoch": 3.32,
186
+ "learning_rate": 4.8037790697674424e-05,
187
+ "loss": 0.4866,
188
+ "step": 1175
189
+ },
190
+ {
191
+ "epoch": 3.45,
192
+ "learning_rate": 4.790116279069768e-05,
193
+ "loss": 0.4729,
194
+ "step": 1222
195
+ },
196
+ {
197
+ "epoch": 3.58,
198
+ "learning_rate": 4.776453488372093e-05,
199
+ "loss": 0.4637,
200
+ "step": 1269
201
+ },
202
+ {
203
+ "epoch": 3.72,
204
+ "learning_rate": 4.762790697674419e-05,
205
+ "loss": 0.464,
206
+ "step": 1316
207
+ },
208
+ {
209
+ "epoch": 3.85,
210
+ "learning_rate": 4.749127906976744e-05,
211
+ "loss": 0.47,
212
+ "step": 1363
213
+ },
214
+ {
215
+ "epoch": 3.98,
216
+ "learning_rate": 4.7354651162790695e-05,
217
+ "loss": 0.4643,
218
+ "step": 1410
219
+ },
220
+ {
221
+ "epoch": 4.0,
222
+ "eval_accuracy": 0.7752527998260302,
223
+ "eval_loss": 0.4479929208755493,
224
+ "eval_macro_f1": 0.7729510982990464,
225
+ "eval_runtime": 29.7623,
226
+ "eval_samples_per_second": 309.015,
227
+ "eval_steps_per_second": 2.99,
228
+ "step": 1416
229
+ },
230
+ {
231
+ "epoch": 4.12,
232
+ "learning_rate": 4.7218023255813956e-05,
233
+ "loss": 0.4516,
234
+ "step": 1457
235
+ },
236
+ {
237
+ "epoch": 4.25,
238
+ "learning_rate": 4.708139534883721e-05,
239
+ "loss": 0.4284,
240
+ "step": 1504
241
+ },
242
+ {
243
+ "epoch": 4.38,
244
+ "learning_rate": 4.6944767441860464e-05,
245
+ "loss": 0.4292,
246
+ "step": 1551
247
+ },
248
+ {
249
+ "epoch": 4.51,
250
+ "learning_rate": 4.680813953488372e-05,
251
+ "loss": 0.4251,
252
+ "step": 1598
253
+ },
254
+ {
255
+ "epoch": 4.65,
256
+ "learning_rate": 4.667151162790698e-05,
257
+ "loss": 0.4173,
258
+ "step": 1645
259
+ },
260
+ {
261
+ "epoch": 4.78,
262
+ "learning_rate": 4.6534883720930234e-05,
263
+ "loss": 0.4122,
264
+ "step": 1692
265
+ },
266
+ {
267
+ "epoch": 4.91,
268
+ "learning_rate": 4.639825581395349e-05,
269
+ "loss": 0.4161,
270
+ "step": 1739
271
+ },
272
+ {
273
+ "epoch": 5.0,
274
+ "eval_accuracy": 0.7766663042296401,
275
+ "eval_loss": 0.43961769342422485,
276
+ "eval_macro_f1": 0.7711253496732764,
277
+ "eval_runtime": 29.1171,
278
+ "eval_samples_per_second": 315.863,
279
+ "eval_steps_per_second": 3.057,
280
+ "step": 1770
281
+ },
282
+ {
283
+ "epoch": 5.05,
284
+ "learning_rate": 4.626162790697675e-05,
285
+ "loss": 0.4198,
286
+ "step": 1786
287
+ },
288
+ {
289
+ "epoch": 5.18,
290
+ "learning_rate": 4.6125e-05,
291
+ "loss": 0.4047,
292
+ "step": 1833
293
+ },
294
+ {
295
+ "epoch": 5.31,
296
+ "learning_rate": 4.598837209302326e-05,
297
+ "loss": 0.4109,
298
+ "step": 1880
299
+ },
300
+ {
301
+ "epoch": 5.44,
302
+ "learning_rate": 4.585174418604651e-05,
303
+ "loss": 0.402,
304
+ "step": 1927
305
+ },
306
+ {
307
+ "epoch": 5.58,
308
+ "learning_rate": 4.571511627906977e-05,
309
+ "loss": 0.4146,
310
+ "step": 1974
311
+ },
312
+ {
313
+ "epoch": 5.71,
314
+ "learning_rate": 4.557848837209303e-05,
315
+ "loss": 0.3954,
316
+ "step": 2021
317
+ },
318
+ {
319
+ "epoch": 5.84,
320
+ "learning_rate": 4.544186046511628e-05,
321
+ "loss": 0.4254,
322
+ "step": 2068
323
+ },
324
+ {
325
+ "epoch": 5.97,
326
+ "learning_rate": 4.5305232558139535e-05,
327
+ "loss": 0.4244,
328
+ "step": 2115
329
+ },
330
+ {
331
+ "epoch": 6.0,
332
+ "eval_accuracy": 0.7843862128955094,
333
+ "eval_loss": 0.4381129741668701,
334
+ "eval_macro_f1": 0.7827506323705966,
335
+ "eval_runtime": 29.0184,
336
+ "eval_samples_per_second": 316.937,
337
+ "eval_steps_per_second": 3.067,
338
+ "step": 2124
339
+ },
340
+ {
341
+ "epoch": 6.11,
342
+ "learning_rate": 4.5168604651162796e-05,
343
+ "loss": 0.4082,
344
+ "step": 2162
345
+ },
346
+ {
347
+ "epoch": 6.24,
348
+ "learning_rate": 4.503197674418605e-05,
349
+ "loss": 0.401,
350
+ "step": 2209
351
+ },
352
+ {
353
+ "epoch": 6.37,
354
+ "learning_rate": 4.4895348837209305e-05,
355
+ "loss": 0.3686,
356
+ "step": 2256
357
+ },
358
+ {
359
+ "epoch": 6.51,
360
+ "learning_rate": 4.475872093023256e-05,
361
+ "loss": 0.4074,
362
+ "step": 2303
363
+ },
364
+ {
365
+ "epoch": 6.64,
366
+ "learning_rate": 4.462209302325582e-05,
367
+ "loss": 0.3775,
368
+ "step": 2350
369
+ },
370
+ {
371
+ "epoch": 6.77,
372
+ "learning_rate": 4.4485465116279074e-05,
373
+ "loss": 0.4056,
374
+ "step": 2397
375
+ },
376
+ {
377
+ "epoch": 6.9,
378
+ "learning_rate": 4.434883720930233e-05,
379
+ "loss": 0.3998,
380
+ "step": 2444
381
+ },
382
+ {
383
+ "epoch": 7.0,
384
+ "eval_accuracy": 0.7610090246819615,
385
+ "eval_loss": 0.4613344073295593,
386
+ "eval_macro_f1": 0.7609479537198214,
387
+ "eval_runtime": 29.0367,
388
+ "eval_samples_per_second": 316.737,
389
+ "eval_steps_per_second": 3.065,
390
+ "step": 2478
391
+ },
392
+ {
393
+ "epoch": 7.04,
394
+ "learning_rate": 4.421220930232558e-05,
395
+ "loss": 0.3861,
396
+ "step": 2491
397
+ },
398
+ {
399
+ "epoch": 7.17,
400
+ "learning_rate": 4.4075581395348844e-05,
401
+ "loss": 0.3773,
402
+ "step": 2538
403
+ },
404
+ {
405
+ "epoch": 7.3,
406
+ "learning_rate": 4.39389534883721e-05,
407
+ "loss": 0.3902,
408
+ "step": 2585
409
+ },
410
+ {
411
+ "epoch": 7.44,
412
+ "learning_rate": 4.380232558139535e-05,
413
+ "loss": 0.3815,
414
+ "step": 2632
415
+ },
416
+ {
417
+ "epoch": 7.57,
418
+ "learning_rate": 4.3665697674418606e-05,
419
+ "loss": 0.377,
420
+ "step": 2679
421
+ },
422
+ {
423
+ "epoch": 7.7,
424
+ "learning_rate": 4.352906976744186e-05,
425
+ "loss": 0.3835,
426
+ "step": 2726
427
+ },
428
+ {
429
+ "epoch": 7.83,
430
+ "learning_rate": 4.3392441860465115e-05,
431
+ "loss": 0.4,
432
+ "step": 2773
433
+ },
434
+ {
435
+ "epoch": 7.97,
436
+ "learning_rate": 4.325581395348837e-05,
437
+ "loss": 0.3806,
438
+ "step": 2820
439
+ },
440
+ {
441
+ "epoch": 8.0,
442
+ "eval_accuracy": 0.7876481461346091,
443
+ "eval_loss": 0.42375245690345764,
444
+ "eval_macro_f1": 0.7860128082271005,
445
+ "eval_runtime": 29.7134,
446
+ "eval_samples_per_second": 309.523,
447
+ "eval_steps_per_second": 2.995,
448
+ "step": 2832
449
+ },
450
+ {
451
+ "epoch": 8.1,
452
+ "learning_rate": 4.311918604651163e-05,
453
+ "loss": 0.3679,
454
+ "step": 2867
455
+ },
456
+ {
457
+ "epoch": 8.23,
458
+ "learning_rate": 4.2982558139534884e-05,
459
+ "loss": 0.3563,
460
+ "step": 2914
461
+ },
462
+ {
463
+ "epoch": 8.36,
464
+ "learning_rate": 4.284593023255814e-05,
465
+ "loss": 0.3514,
466
+ "step": 2961
467
+ },
468
+ {
469
+ "epoch": 8.5,
470
+ "learning_rate": 4.27093023255814e-05,
471
+ "loss": 0.3527,
472
+ "step": 3008
473
+ },
474
+ {
475
+ "epoch": 8.63,
476
+ "learning_rate": 4.2572674418604654e-05,
477
+ "loss": 0.3829,
478
+ "step": 3055
479
+ },
480
+ {
481
+ "epoch": 8.76,
482
+ "learning_rate": 4.243604651162791e-05,
483
+ "loss": 0.3756,
484
+ "step": 3102
485
+ },
486
+ {
487
+ "epoch": 8.9,
488
+ "learning_rate": 4.229941860465116e-05,
489
+ "loss": 0.3553,
490
+ "step": 3149
491
+ },
492
+ {
493
+ "epoch": 9.0,
494
+ "eval_accuracy": 0.7750353376100902,
495
+ "eval_loss": 0.45743581652641296,
496
+ "eval_macro_f1": 0.7614865114284046,
497
+ "eval_runtime": 29.6041,
498
+ "eval_samples_per_second": 310.667,
499
+ "eval_steps_per_second": 3.006,
500
+ "step": 3186
501
+ },
502
+ {
503
+ "epoch": 9.03,
504
+ "learning_rate": 4.216279069767442e-05,
505
+ "loss": 0.3757,
506
+ "step": 3196
507
+ },
508
+ {
509
+ "epoch": 9.16,
510
+ "learning_rate": 4.202616279069768e-05,
511
+ "loss": 0.3445,
512
+ "step": 3243
513
+ },
514
+ {
515
+ "epoch": 9.29,
516
+ "learning_rate": 4.188953488372093e-05,
517
+ "loss": 0.3753,
518
+ "step": 3290
519
+ },
520
+ {
521
+ "epoch": 9.43,
522
+ "learning_rate": 4.1752906976744186e-05,
523
+ "loss": 0.3587,
524
+ "step": 3337
525
+ },
526
+ {
527
+ "epoch": 9.56,
528
+ "learning_rate": 4.161627906976745e-05,
529
+ "loss": 0.3513,
530
+ "step": 3384
531
+ },
532
+ {
533
+ "epoch": 9.69,
534
+ "learning_rate": 4.14796511627907e-05,
535
+ "loss": 0.3578,
536
+ "step": 3431
537
+ },
538
+ {
539
+ "epoch": 9.82,
540
+ "learning_rate": 4.1343023255813955e-05,
541
+ "loss": 0.3661,
542
+ "step": 3478
543
+ },
544
+ {
545
+ "epoch": 9.96,
546
+ "learning_rate": 4.120639534883721e-05,
547
+ "loss": 0.3506,
548
+ "step": 3525
549
+ },
550
+ {
551
+ "epoch": 10.0,
552
+ "eval_accuracy": 0.788409263890399,
553
+ "eval_loss": 0.4321058988571167,
554
+ "eval_macro_f1": 0.7788198668907367,
555
+ "eval_runtime": 28.9187,
556
+ "eval_samples_per_second": 318.03,
557
+ "eval_steps_per_second": 3.078,
558
+ "step": 3540
559
+ }
560
+ ],
561
+ "max_steps": 17700,
562
+ "num_train_epochs": 50,
563
+ "total_flos": 1.45995167145984e+16,
564
+ "trial_name": null,
565
+ "trial_params": null
566
+ }
data_curation&frozen_layers/dataset1_all/F4/.DS_Store CHANGED
Binary files a/data_curation&frozen_layers/dataset1_all/F4/.DS_Store and b/data_curation&frozen_layers/dataset1_all/F4/.DS_Store differ
 
data_curation&frozen_layers/dataset1_all/F4/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7649233445688811,
3
+ "test_loss": 0.4680415391921997,
4
+ "test_macro_f1": 0.7626351149703829,
5
+ "test_runtime": 29.0877,
6
+ "test_samples_per_second": 316.182,
7
+ "test_steps_per_second": 3.06
8
+ }
data_curation&frozen_layers/dataset1_all/F4/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/vsphhome/fengguoqing/Geneformer",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.02,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "relu",
10
+ "hidden_dropout_prob": 0.02,
11
+ "hidden_size": 256,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 512,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 4,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.28.0",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 25426
27
+ }
data_curation&frozen_layers/dataset1_all/F4/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7649233445688811,
3
+ "test_loss": 0.4680415391921997,
4
+ "test_macro_f1": 0.7626351149703829,
5
+ "test_runtime": 29.0877,
6
+ "test_samples_per_second": 316.182,
7
+ "test_steps_per_second": 3.06
8
+ }
data_curation&frozen_layers/dataset1_all/F4/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e30a5c69f99c38707598aceaf66d323941584b710b19d93b3988986bae5712
3
+ size 65263500
data_curation&frozen_layers/dataset1_all/F4/predictions.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbdf534b6eb865c439392081dfc84a258031d58a19209b88cebb3d6dd5135b98
3
+ size 147612
data_curation&frozen_layers/dataset1_all/F4/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c53ac15664fb8080b3ea0c1aa4972bf7b42c11445f34425cc83da0049faacd5
3
+ size 41105778
data_curation&frozen_layers/dataset1_all/F4/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b67c12eae2f1f8f7e56cf5c4ad756416f58327aa916250e907b979787424ba
3
+ size 15006
data_curation&frozen_layers/dataset1_all/F4/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2abe87f776f0304299cfd007f13ebbea52990058bcfa21eeda52477f26c98d
3
+ size 1064