johannes-garstenauer commited on
Commit
c8264cf
·
1 Parent(s): bff532c

Training in progress, step 27056, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4698de0e23f77c43c7d7c19f6112e42a63822002183dc8b1a2ab285ac21f0aa
3
  size 532568837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa9f8158077b301841093e14494740418c7ed23f85bb3a12bff9d61d96227c9
3
  size 532568837
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1e1c130d8012bc2aa8497ca7a46fa49a2cc033f88285aeb5bc215484a0b231
3
  size 266276525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56456e4607933ddfb6ae28bb6789bd78bd0c9fa782a1e8089a4b8925f0830dc
3
  size 266276525
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbd85b5ac578db379ec49d21f17ad20c2076e3867dfd9b5c0f6cde2cb74292b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7682d3f2001e37b37753d22113bcd3d790f7d57cddf931598175c9b18c3eb4f3
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917afbe0ef4f28efbf0be27d57a29021ae93de87ada90c0ec81ec05030e9d7f9
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf54b39ea7fc685cb78099ee828320fc3969e2b58d837edef2ef10533162d734
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.7015177065767286,
5
  "eval_steps": 500,
6
- "global_step": 25632,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -341,13 +341,31 @@
341
  "learning_rate": 5.206576728499157e-06,
342
  "loss": 0.007,
343
  "step": 25500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  }
345
  ],
346
  "logging_steps": 500,
347
  "max_steps": 28464,
348
  "num_train_epochs": 3,
349
  "save_steps": 1424,
350
- "total_flos": 4.346236176262656e+17,
351
  "trial_name": null,
352
  "trial_params": null
353
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.851602023608769,
5
  "eval_steps": 500,
6
+ "global_step": 27056,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
341
  "learning_rate": 5.206576728499157e-06,
342
  "loss": 0.007,
343
  "step": 25500
344
+ },
345
+ {
346
+ "epoch": 2.74,
347
+ "learning_rate": 4.328274311410905e-06,
348
+ "loss": 0.0065,
349
+ "step": 26000
350
+ },
351
+ {
352
+ "epoch": 2.79,
353
+ "learning_rate": 3.4499718943226534e-06,
354
+ "loss": 0.0075,
355
+ "step": 26500
356
+ },
357
+ {
358
+ "epoch": 2.85,
359
+ "learning_rate": 2.5716694772344016e-06,
360
+ "loss": 0.0062,
361
+ "step": 27000
362
  }
363
  ],
364
  "logging_steps": 500,
365
  "max_steps": 28464,
366
  "num_train_epochs": 3,
367
  "save_steps": 1424,
368
+ "total_flos": 4.587700070947123e+17,
369
  "trial_name": null,
370
  "trial_params": null
371
  }