jflotz commited on
Commit
b27adcc
·
1 Parent(s): 70dd462

Training in progress, step 960000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c2cdf0990859411fb7c85ca63e432d10f1471e48dc5f4dc74184b1445318034
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82bf941331b6147f0c38426d0407fa25ba10c0bd3b73ef74a1673cd375f5dea3
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45d64f58ccbc19a103ee2b486e3ae0d8fd8e258fc7af4c2eaad0b83f3fc572a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73634de375042b3baa7b5c117beb24655dd2f7f5f57009b1eef654c82b3b44b5
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37baaaf1d34b48eab4b9f1b1e6566c4b0dfab731d43bb497206f05b08fc421b1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb4c489f0f7eeedc1b3b1654e89c9a4aafbf4af00e935321e2351196b10ff6c
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cabc42515409358ec344dd617c3827e15301aec86dd40b0703aaa747b9ab648c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:060c7b3ac0997105e228b3a17b751784076ba7d3219bd9bc28aad1940ff45553
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a0e8f2e6be0cbf1f9833f696c2eada7987f3d4cdaf496d37f24cbf254d548cb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612d68332bbb7f66da9a1c4eee686f9c7adc9fb542398fcfd0c492b56e914c02
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47f83c0e7dd1b3e03445f4411dbb9c9cc1bcbb9c018fe7bb512c0dfe29ba0b84
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70baf209a4631065e5f1d839e29da7241e0065c3f0cd0e2e1c6f4c4e169d312e
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0346809d8d3cd0e408dd0cf4407790a6097435d9d23dfae50689beef17f52894
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc2e296643bbf328fc4ab16e724c938d6325131f9cb567ebc676b5d1a649c3a
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e01ce41a891cf7dcd8a18eccba168a8c04bb813917e1e626e5c83157e4ba5c0
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc74f55e3f6e86286f729f34589914d40a1b187feaf939dd73f214761d85e9c
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c9b4e343a2af7c2bb37729c1a96b7743275839cf7669689259960b84916a4f7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a192d4f815ba365d126dfc7fc40698d69e696351b09b7c12fff827e40276ec96
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ad6f8d9094ac28168658283f3ee5d2511e53f4b22c1d6e5c9b4e90d7a8c2ccb
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6eea93722769fd2cdfccc4deac474dd6ab3e6b96299bf9d74b4a0082fc65937
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4384531373314946,
5
- "global_step": 950000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11406,11 +11406,131 @@
11406
  "learning_rate": 1.0954708761809438e-05,
11407
  "loss": 0.2843,
11408
  "step": 950000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11409
  }
11410
  ],
11411
  "max_steps": 1000000,
11412
  "num_train_epochs": 2,
11413
- "total_flos": 6.422672857591212e+22,
11414
  "trial_name": null,
11415
  "trial_params": null
11416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.458382825392017,
5
+ "global_step": 960000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11406
  "learning_rate": 1.0954708761809438e-05,
11407
  "loss": 0.2843,
11408
  "step": 950000
11409
+ },
11410
+ {
11411
+ "epoch": 1.44,
11412
+ "learning_rate": 1.0935752500982175e-05,
11413
+ "loss": 0.2834,
11414
+ "step": 950500
11415
+ },
11416
+ {
11417
+ "epoch": 1.44,
11418
+ "learning_rate": 1.091698505917036e-05,
11419
+ "loss": 0.2853,
11420
+ "step": 951000
11421
+ },
11422
+ {
11423
+ "epoch": 1.44,
11424
+ "learning_rate": 1.0898406487683472e-05,
11425
+ "loss": 0.284,
11426
+ "step": 951500
11427
+ },
11428
+ {
11429
+ "epoch": 1.44,
11430
+ "learning_rate": 1.0880016837314599e-05,
11431
+ "loss": 0.2833,
11432
+ "step": 952000
11433
+ },
11434
+ {
11435
+ "epoch": 1.44,
11436
+ "learning_rate": 1.0861816158340365e-05,
11437
+ "loss": 0.2835,
11438
+ "step": 952500
11439
+ },
11440
+ {
11441
+ "epoch": 1.44,
11442
+ "learning_rate": 1.084380450052071e-05,
11443
+ "loss": 0.284,
11444
+ "step": 953000
11445
+ },
11446
+ {
11447
+ "epoch": 1.45,
11448
+ "learning_rate": 1.0825981913098828e-05,
11449
+ "loss": 0.2835,
11450
+ "step": 953500
11451
+ },
11452
+ {
11453
+ "epoch": 1.45,
11454
+ "learning_rate": 1.0808348444801e-05,
11455
+ "loss": 0.2836,
11456
+ "step": 954000
11457
+ },
11458
+ {
11459
+ "epoch": 1.45,
11460
+ "learning_rate": 1.0790904143836438e-05,
11461
+ "loss": 0.2834,
11462
+ "step": 954500
11463
+ },
11464
+ {
11465
+ "epoch": 1.45,
11466
+ "learning_rate": 1.0773649057897206e-05,
11467
+ "loss": 0.2833,
11468
+ "step": 955000
11469
+ },
11470
+ {
11471
+ "epoch": 1.45,
11472
+ "learning_rate": 1.0756583234158057e-05,
11473
+ "loss": 0.2839,
11474
+ "step": 955500
11475
+ },
11476
+ {
11477
+ "epoch": 1.45,
11478
+ "learning_rate": 1.073970671927628e-05,
11479
+ "loss": 0.2834,
11480
+ "step": 956000
11481
+ },
11482
+ {
11483
+ "epoch": 1.45,
11484
+ "learning_rate": 1.0723019559391643e-05,
11485
+ "loss": 0.2843,
11486
+ "step": 956500
11487
+ },
11488
+ {
11489
+ "epoch": 1.45,
11490
+ "learning_rate": 1.0706521800126198e-05,
11491
+ "loss": 0.2843,
11492
+ "step": 957000
11493
+ },
11494
+ {
11495
+ "epoch": 1.45,
11496
+ "learning_rate": 1.0690213486584175e-05,
11497
+ "loss": 0.284,
11498
+ "step": 957500
11499
+ },
11500
+ {
11501
+ "epoch": 1.45,
11502
+ "learning_rate": 1.0674094663351906e-05,
11503
+ "loss": 0.2833,
11504
+ "step": 958000
11505
+ },
11506
+ {
11507
+ "epoch": 1.46,
11508
+ "learning_rate": 1.0658165374497611e-05,
11509
+ "loss": 0.2836,
11510
+ "step": 958500
11511
+ },
11512
+ {
11513
+ "epoch": 1.46,
11514
+ "learning_rate": 1.0642425663571383e-05,
11515
+ "loss": 0.2839,
11516
+ "step": 959000
11517
+ },
11518
+ {
11519
+ "epoch": 1.46,
11520
+ "learning_rate": 1.062687557360497e-05,
11521
+ "loss": 0.2834,
11522
+ "step": 959500
11523
+ },
11524
+ {
11525
+ "epoch": 1.46,
11526
+ "learning_rate": 1.0611515147111736e-05,
11527
+ "loss": 0.2832,
11528
+ "step": 960000
11529
  }
11530
  ],
11531
  "max_steps": 1000000,
11532
  "num_train_epochs": 2,
11533
+ "total_flos": 6.4902759073727495e+22,
11534
  "trial_name": null,
11535
  "trial_params": null
11536
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
3
  size 449450757