jflotz commited on
Commit
70dd462
·
1 Parent(s): a24ab0e

Training in progress, step 950000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eecce8a24d008f6560478b115a93f60ad26f968d3ddf31f980be259930161927
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c2cdf0990859411fb7c85ca63e432d10f1471e48dc5f4dc74184b1445318034
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e5a9ccad5520e92d8772f310d7fdda3e07cfbb13ef5c7d62c7867e7bebc124b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45d64f58ccbc19a103ee2b486e3ae0d8fd8e258fc7af4c2eaad0b83f3fc572a
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c122c7c46f7a52340c6d76de6ecac3033b4eb22c4f622df7095c80bbdc58bbda
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37baaaf1d34b48eab4b9f1b1e6566c4b0dfab731d43bb497206f05b08fc421b1
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed89074af6bf56092de6f7f69bec6b0962f68e0ef26b7f849107336565f843e4
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabc42515409358ec344dd617c3827e15301aec86dd40b0703aaa747b9ab648c
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c180309b549d4feb0af8c96f555ed5574acd58bfc58b660812ae5e9d9e08c50
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0e8f2e6be0cbf1f9833f696c2eada7987f3d4cdaf496d37f24cbf254d548cb
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bb0af833997f5b0c50461291524b2fc678bcc9d26f6e79d5bfe28f62abb339
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f83c0e7dd1b3e03445f4411dbb9c9cc1bcbb9c018fe7bb512c0dfe29ba0b84
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:824645f1c25785303da3dc203bf2689aba1f62a78c6bdfef5a484af4a0860aef
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0346809d8d3cd0e408dd0cf4407790a6097435d9d23dfae50689beef17f52894
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e242b745198753e15159abe0972314bc254dd07db526f1dc4d19cc5c285dfb8e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e01ce41a891cf7dcd8a18eccba168a8c04bb813917e1e626e5c83157e4ba5c0
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e565516de72353a91a8afad67f1979b57e142666bec47e27765241642f3d4f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c9b4e343a2af7c2bb37729c1a96b7743275839cf7669689259960b84916a4f7
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a619368d8fa94ba3412b069e884d3c02325231635774dd381c11c0f2a15299d7
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad6f8d9094ac28168658283f3ee5d2511e53f4b22c1d6e5c9b4e90d7a8c2ccb
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.418523449270972,
5
- "global_step": 940000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11286,11 +11286,131 @@
11286
  "learning_rate": 1.1373402388763346e-05,
11287
  "loss": 0.2834,
11288
  "step": 940000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11289
  }
11290
  ],
11291
  "max_steps": 1000000,
11292
  "num_train_epochs": 2,
11293
- "total_flos": 6.355066839128279e+22,
11294
  "trial_name": null,
11295
  "trial_params": null
11296
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4384531373314946,
5
+ "global_step": 950000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11286
  "learning_rate": 1.1373402388763346e-05,
11287
  "loss": 0.2834,
11288
  "step": 940000
11289
+ },
11290
+ {
11291
+ "epoch": 1.42,
11292
+ "learning_rate": 1.1350681316526965e-05,
11293
+ "loss": 0.2837,
11294
+ "step": 940500
11295
+ },
11296
+ {
11297
+ "epoch": 1.42,
11298
+ "learning_rate": 1.1328147928906494e-05,
11299
+ "loss": 0.2842,
11300
+ "step": 941000
11301
+ },
11302
+ {
11303
+ "epoch": 1.42,
11304
+ "learning_rate": 1.1305802287507358e-05,
11305
+ "loss": 0.2839,
11306
+ "step": 941500
11307
+ },
11308
+ {
11309
+ "epoch": 1.42,
11310
+ "learning_rate": 1.1283644453421678e-05,
11311
+ "loss": 0.284,
11312
+ "step": 942000
11313
+ },
11314
+ {
11315
+ "epoch": 1.42,
11316
+ "learning_rate": 1.1261674487228149e-05,
11317
+ "loss": 0.2839,
11318
+ "step": 942500
11319
+ },
11320
+ {
11321
+ "epoch": 1.42,
11322
+ "learning_rate": 1.1239892448991798e-05,
11323
+ "loss": 0.2834,
11324
+ "step": 943000
11325
+ },
11326
+ {
11327
+ "epoch": 1.43,
11328
+ "learning_rate": 1.1218298398263894e-05,
11329
+ "loss": 0.2833,
11330
+ "step": 943500
11331
+ },
11332
+ {
11333
+ "epoch": 1.43,
11334
+ "learning_rate": 1.1196892394081743e-05,
11335
+ "loss": 0.284,
11336
+ "step": 944000
11337
+ },
11338
+ {
11339
+ "epoch": 1.43,
11340
+ "learning_rate": 1.1175674494968552e-05,
11341
+ "loss": 0.2833,
11342
+ "step": 944500
11343
+ },
11344
+ {
11345
+ "epoch": 1.43,
11346
+ "learning_rate": 1.1154644758933235e-05,
11347
+ "loss": 0.2835,
11348
+ "step": 945000
11349
+ },
11350
+ {
11351
+ "epoch": 1.43,
11352
+ "learning_rate": 1.11338032434703e-05,
11353
+ "loss": 0.2832,
11354
+ "step": 945500
11355
+ },
11356
+ {
11357
+ "epoch": 1.43,
11358
+ "learning_rate": 1.1113150005559644e-05,
11359
+ "loss": 0.2836,
11360
+ "step": 946000
11361
+ },
11362
+ {
11363
+ "epoch": 1.43,
11364
+ "learning_rate": 1.1092685101666438e-05,
11365
+ "loss": 0.284,
11366
+ "step": 946500
11367
+ },
11368
+ {
11369
+ "epoch": 1.43,
11370
+ "learning_rate": 1.1072408587740942e-05,
11371
+ "loss": 0.2836,
11372
+ "step": 947000
11373
+ },
11374
+ {
11375
+ "epoch": 1.43,
11376
+ "learning_rate": 1.1052320519218383e-05,
11377
+ "loss": 0.2833,
11378
+ "step": 947500
11379
+ },
11380
+ {
11381
+ "epoch": 1.43,
11382
+ "learning_rate": 1.1032420951018755e-05,
11383
+ "loss": 0.2841,
11384
+ "step": 948000
11385
+ },
11386
+ {
11387
+ "epoch": 1.44,
11388
+ "learning_rate": 1.1012709937546722e-05,
11389
+ "loss": 0.2837,
11390
+ "step": 948500
11391
+ },
11392
+ {
11393
+ "epoch": 1.44,
11394
+ "learning_rate": 1.0993187532691458e-05,
11395
+ "loss": 0.2844,
11396
+ "step": 949000
11397
+ },
11398
+ {
11399
+ "epoch": 1.44,
11400
+ "learning_rate": 1.0973853789826454e-05,
11401
+ "loss": 0.2842,
11402
+ "step": 949500
11403
+ },
11404
+ {
11405
+ "epoch": 1.44,
11406
+ "learning_rate": 1.0954708761809438e-05,
11407
+ "loss": 0.2843,
11408
+ "step": 950000
11409
  }
11410
  ],
11411
  "max_steps": 1000000,
11412
  "num_train_epochs": 2,
11413
+ "total_flos": 6.422672857591212e+22,
11414
  "trial_name": null,
11415
  "trial_params": null
11416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
3
  size 449450757