angelafearn commited on
Commit
085c5be
·
verified ·
1 Parent(s): 136ba52

Training in progress, step 3709, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f273e86ecaf143a463e010f3c1027db2e47f7d3526532cfba4f77145ee79f040
3
  size 3132668808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2faae301c51bf9afe27a5efb035500ac0f1f3e8aba96489672b75ac92bd21a3
3
  size 3132668808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49bacab0523adf25453aa4ba8d85ba8b23d85c7604b730d44f0edc6a3f5fa162
3
  size 6265681949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:136f0df911cbcdc8a57afb16b156978365648865f387a2f4b397f9ce1779d141
3
  size 6265681949
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad667a164a35e2820f486626bc7f4c131332a14307c3cfc69a9c34f68e2eeae8
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fb09fa8a03f019938357b8e6dd70c665a6f839ceffe9c4f41bfe23cb0ea6e77
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2176708524de952d0af3e052980f65d3f09eb5ddcc79198ab72c8c435ec6630d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25d19e1a77f01614e1751ba7d46a821b26e1f95dee82257ac6ee387fe4714209
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 3500,
3
  "best_metric": 0.021653667092323303,
4
  "best_model_checkpoint": "gs://text-language-finetuning-vertex/outputs/lim_gec/full/full/checkpoint-3500",
5
- "epoch": 0.9438732513062532,
6
  "eval_steps": 500,
7
- "global_step": 3500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -309,6 +309,20 @@
309
  "eval_samples_per_second": 127.579,
310
  "eval_steps_per_second": 3.989,
311
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  }
313
  ],
314
  "logging_steps": 100,
@@ -323,12 +337,12 @@
323
  "should_evaluate": false,
324
  "should_log": false,
325
  "should_save": true,
326
- "should_training_stop": false
327
  },
328
  "attributes": {}
329
  }
330
  },
331
- "total_flos": 8.878542338654208e+17,
332
  "train_batch_size": 32,
333
  "trial_name": null,
334
  "trial_params": null
 
2
  "best_global_step": 3500,
3
  "best_metric": 0.021653667092323303,
4
  "best_model_checkpoint": "gs://text-language-finetuning-vertex/outputs/lim_gec/full/full/checkpoint-3500",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 3709,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
309
  "eval_samples_per_second": 127.579,
310
  "eval_steps_per_second": 3.989,
311
  "step": 3500
312
+ },
313
+ {
314
+ "epoch": 0.9708410584864319,
315
+ "grad_norm": 0.02992485836148262,
316
+ "learning_rate": 1.0283577438454347e-05,
317
+ "loss": 0.20760541915893554,
318
+ "step": 3600
319
+ },
320
+ {
321
+ "epoch": 0.9978088656666105,
322
+ "grad_norm": 0.05102963000535965,
323
+ "learning_rate": 9.348706762231224e-07,
324
+ "loss": 0.19801679611206055,
325
+ "step": 3700
326
  }
327
  ],
328
  "logging_steps": 100,
 
337
  "should_evaluate": false,
338
  "should_log": false,
339
  "should_save": true,
340
+ "should_training_stop": true
341
  },
342
  "attributes": {}
343
  }
344
  },
345
+ "total_flos": 9.407808395601961e+17,
346
  "train_batch_size": 32,
347
  "trial_name": null,
348
  "trial_params": null