hmankar01 commited on
Commit
8fcf844
·
verified ·
1 Parent(s): 28f9098

Training in progress, step 4740, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4829cae586c828baa6189416c087aa87d1b9e6a2603d20caa3563af7661a847
3
  size 5844445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ab1687b37edfa6cf6463582f81b30a681bd63ff74cf8bc4efe9aa9bd720530
3
  size 5844445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab4b8106faa7cc8bb099741f2b26d3fa0672ca996eaa6b3b5cf4f3f57f527b8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa7382b12bd08b7f2f5ccd8c29ce080ce75699cda0f2989c82e1d4f1074b7dd
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7650cad6b450c227a8c834823d4b701c5904ba96c883d23d8fc7eceb217f63f4
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe295d6cc46bbcfa1928fae249b1b4ad99da00bc5e60959482bdd922e1479694
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dfcb0a331b9ecb9f4d1d087b66f61339c269b0375a009ec6de8c9912426f996
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4daf6d365b44310c9047ae774effc5e7cada4389a255cbf1504844af1e27d58
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.8985446108415946,
6
  "eval_steps": 500,
7
- "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -330,6 +330,20 @@
330
  "learning_rate": 2.9324894514767934e-06,
331
  "loss": 3.0355,
332
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  }
334
  ],
335
  "logging_steps": 100,
@@ -344,12 +358,12 @@
344
  "should_evaluate": false,
345
  "should_log": false,
346
  "should_save": true,
347
- "should_training_stop": false
348
  },
349
  "attributes": {}
350
  }
351
  },
352
- "total_flos": 1.4689906152977203e+17,
353
  "train_batch_size": 4,
354
  "trial_name": null,
355
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9997890740350137,
6
  "eval_steps": 500,
7
+ "global_step": 4740,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
330
  "learning_rate": 2.9324894514767934e-06,
331
  "loss": 3.0355,
332
  "step": 4500
333
+ },
334
+ {
335
+ "epoch": 1.9407298038388525,
336
+ "grad_norm": 0.0,
337
+ "learning_rate": 1.888185654008439e-06,
338
+ "loss": 3.0888,
339
+ "step": 4600
340
+ },
341
+ {
342
+ "epoch": 1.9829149968361106,
343
+ "grad_norm": 0.0,
344
+ "learning_rate": 8.333333333333333e-07,
345
+ "loss": 3.0669,
346
+ "step": 4700
347
  }
348
  ],
349
  "logging_steps": 100,
 
358
  "should_evaluate": false,
359
  "should_log": false,
360
  "should_save": true,
361
+ "should_training_stop": true
362
  },
363
  "attributes": {}
364
  }
365
  },
366
+ "total_flos": 1.546882110676992e+17,
367
  "train_batch_size": 4,
368
  "trial_name": null,
369
  "trial_params": null