alessandronascimento commited on
Commit
24fb2fe
·
verified ·
1 Parent(s): 764abdb

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:219be41537bbb98fca70ca3f58664027f22ac8b626937fc54ed0c58b1a583287
3
  size 1713050034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcef207828e97d334020207b7c4e0ce8b10911f91e884313cc27540bcf22215
3
  size 1713050034
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c557980c0526bbe9b748ef020546f7bc8e22ad8fcbd68d484140a76b913f895
3
  size 816721594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3ed2b4efa016d58457f3d8931b71a9ebaaae4412d1d353411975961be30dbac
3
  size 816721594
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5f4c3829d3c61d60d4aa81f39b1ae90c914023d099d2c2879c131506416ca01
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba1195da48c8391083911b2a9cc3e15ab8d82f8f8936d669e0116eb962ce738
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c3e6445408f158b3d87cbf8d2d8e36840ad25379ff7117c45407306acac4e6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edebf099edd988740678361da4ee055b665f3729f5f42b95a748d6bab1b73604
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.317481165751815e-05,
3
- "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-11916",
4
- "epoch": 2.9998111901315374,
5
  "eval_steps": 500,
6
- "global_step": 11916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -192,6 +192,70 @@
192
  "eval_samples_per_second": 15.248,
193
  "eval_steps_per_second": 0.953,
194
  "step": 11916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
  ],
197
  "logging_steps": 500,
@@ -220,7 +284,7 @@
220
  "attributes": {}
221
  }
222
  },
223
- "total_flos": 8.002002428481992e+18,
224
  "train_batch_size": 32,
225
  "trial_name": null,
226
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.0006157582392916e-05,
3
+ "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-15889",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 15889,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
192
  "eval_samples_per_second": 15.248,
193
  "eval_steps_per_second": 0.953,
194
  "step": 11916
195
+ },
196
+ {
197
+ "epoch": 3.020957895399333,
198
+ "grad_norm": 0.0028076171875,
199
+ "learning_rate": 1.672753744810037e-05,
200
+ "loss": 0.0002,
201
+ "step": 12000
202
+ },
203
+ {
204
+ "epoch": 3.146831141040972,
205
+ "grad_norm": 0.00225830078125,
206
+ "learning_rate": 1.6413686942993405e-05,
207
+ "loss": 0.0001,
208
+ "step": 12500
209
+ },
210
+ {
211
+ "epoch": 3.2727043866826104,
212
+ "grad_norm": 0.00579833984375,
213
+ "learning_rate": 1.608871550088606e-05,
214
+ "loss": 0.0002,
215
+ "step": 13000
216
+ },
217
+ {
218
+ "epoch": 3.3985776323242494,
219
+ "grad_norm": 0.0086669921875,
220
+ "learning_rate": 1.5753186602186207e-05,
221
+ "loss": 0.0002,
222
+ "step": 13500
223
+ },
224
+ {
225
+ "epoch": 3.5244508779658883,
226
+ "grad_norm": 0.0022735595703125,
227
+ "learning_rate": 1.540768203327934e-05,
228
+ "loss": 0.0001,
229
+ "step": 14000
230
+ },
231
+ {
232
+ "epoch": 3.6503241236075272,
233
+ "grad_norm": 0.01202392578125,
234
+ "learning_rate": 1.5052800877746915e-05,
235
+ "loss": 0.0001,
236
+ "step": 14500
237
+ },
238
+ {
239
+ "epoch": 3.776197369249166,
240
+ "grad_norm": 0.01190185546875,
241
+ "learning_rate": 1.4689158477592433e-05,
242
+ "loss": 0.0001,
243
+ "step": 15000
244
+ },
245
+ {
246
+ "epoch": 3.902070614890805,
247
+ "grad_norm": 0.017822265625,
248
+ "learning_rate": 1.4317385366276393e-05,
249
+ "loss": 0.0001,
250
+ "step": 15500
251
+ },
252
+ {
253
+ "epoch": 4.0,
254
+ "eval_loss": 6.0006157582392916e-05,
255
+ "eval_runtime": 16792.4882,
256
+ "eval_samples_per_second": 15.238,
257
+ "eval_steps_per_second": 0.952,
258
+ "step": 15889
259
  }
260
  ],
261
  "logging_steps": 500,
 
284
  "attributes": {}
285
  }
286
  },
287
+ "total_flos": 1.0669336571309322e+19,
288
  "train_batch_size": 32,
289
  "trial_name": null,
290
  "trial_params": null