jflotz commited on
Commit
f16df8c
·
1 Parent(s): 0c30c6d

Training in progress, step 870000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c2dd3be5413f957dd0bc28a15d74ed06d6494592da1e0b0b7a9b5cff98543f
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de9e7a475185d5a4f5233f2eaa1e382e39bf61b0b26422c710d12b1c428f053
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71f43fa4af460f6d355ec42e55c1a209c0d00e00e1e71a83db48fda6386bd08
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc5f078f248ee95608611561bbd9aea19a49de30cb9f351661140392a113dcb
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e95df6a4d037778eea05d14b6a856440647a0d0d1cd96b14fe0089db3d2ca05
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1e6c47e318d84077f6a967794dd3042b980af9f2017841afbd1886d7db9904
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e461a2edfce20979aaf8643fc972588013ea8710a13005a233ed1e93cff795c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4623ab6fbb26e814bfa3dd96c774e4b841513bb02553b1e19ff1a3408dd2f2ba
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ce93d230c2b3e0029a85fd7329dfec40cd7b3ce89657776854f91684f3563c3
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84195dd425f6794c5806a9bed14c1dd56437e46af995a7b88fe65b75813426e
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e389a444854e3a8498410c2bcb75d082ab0efa10fdcbb6bf4f0273b2bb86e5d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a330644903d9cebc0a0cef1ede19ebdfcc46633ad2affcbf69e9f23412d356cb
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5700265db9033170469a10a97e864c396caa87c59262a13775cece6d18e13d0d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d23d54ddb4a3e42a55be419de46e8b2afb37f587b9bc4ebb416244cb1aa1460d
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0f1d070f7247d43e02284bd789b296fa89aaa5d91db9a24f1226ebc2a6cb000
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c90440bc56cdec79071a507653e9cc50024a2dbba647fc3d934431503f32da
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b4a68e81a6718c76544b602cd99c9a2dc4ab2f4f768d0dfcb99f521ee1fd60e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41c8eaa88d8a5dc08e3c2e3def98b7a8b86cad84a79d3f83ed3a1a7c681beb0
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84068892fd087eddda1aa0aa2f97caf64ae34a34d55297ff6867be4553c6f26
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1faf83b262a97abbbb6024851cdc5f7c1c92261d8aa4e9260f21590a330cc4e
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31e18eaa468ae4a1684f551cb018df5d6a9cb8eadcd4fdd17b09a3c12ad301d8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843b249180999e01b18cb497ce97e55f21bbe186ae977c32256fbedc40db0e39
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2590859447867921,
5
- "global_step": 860000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10326,11 +10326,131 @@
10326
  "learning_rate": 1.7368946912546556e-05,
10327
  "loss": 0.2878,
10328
  "step": 860000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10329
  }
10330
  ],
10331
  "max_steps": 1000000,
10332
  "num_train_epochs": 2,
10333
- "total_flos": 5.8141983412238066e+22,
10334
  "trial_name": null,
10335
  "trial_params": null
10336
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2790156328473148,
5
+ "global_step": 870000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10326
  "learning_rate": 1.7368946912546556e-05,
10327
  "loss": 0.2878,
10328
  "step": 860000
10329
+ },
10330
+ {
10331
+ "epoch": 1.26,
10332
+ "learning_rate": 1.7317340758783407e-05,
10333
+ "loss": 0.2875,
10334
+ "step": 860500
10335
+ },
10336
+ {
10337
+ "epoch": 1.26,
10338
+ "learning_rate": 1.726590597701708e-05,
10339
+ "loss": 0.2874,
10340
+ "step": 861000
10341
+ },
10342
+ {
10343
+ "epoch": 1.26,
10344
+ "learning_rate": 1.7214642707868325e-05,
10345
+ "loss": 0.2863,
10346
+ "step": 861500
10347
+ },
10348
+ {
10349
+ "epoch": 1.26,
10350
+ "learning_rate": 1.7163551091488952e-05,
10351
+ "loss": 0.2869,
10352
+ "step": 862000
10353
+ },
10354
+ {
10355
+ "epoch": 1.26,
10356
+ "learning_rate": 1.711263126756148e-05,
10357
+ "loss": 0.2867,
10358
+ "step": 862500
10359
+ },
10360
+ {
10361
+ "epoch": 1.27,
10362
+ "learning_rate": 1.7061883375298788e-05,
10363
+ "loss": 0.2874,
10364
+ "step": 863000
10365
+ },
10366
+ {
10367
+ "epoch": 1.27,
10368
+ "learning_rate": 1.7011307553443647e-05,
10369
+ "loss": 0.286,
10370
+ "step": 863500
10371
+ },
10372
+ {
10373
+ "epoch": 1.27,
10374
+ "learning_rate": 1.6960903940268456e-05,
10375
+ "loss": 0.2873,
10376
+ "step": 864000
10377
+ },
10378
+ {
10379
+ "epoch": 1.27,
10380
+ "learning_rate": 1.6910672673574746e-05,
10381
+ "loss": 0.2867,
10382
+ "step": 864500
10383
+ },
10384
+ {
10385
+ "epoch": 1.27,
10386
+ "learning_rate": 1.6860613890692876e-05,
10387
+ "loss": 0.2872,
10388
+ "step": 865000
10389
+ },
10390
+ {
10391
+ "epoch": 1.27,
10392
+ "learning_rate": 1.6810727728481673e-05,
10393
+ "loss": 0.2864,
10394
+ "step": 865500
10395
+ },
10396
+ {
10397
+ "epoch": 1.27,
10398
+ "learning_rate": 1.6761014323327962e-05,
10399
+ "loss": 0.2866,
10400
+ "step": 866000
10401
+ },
10402
+ {
10403
+ "epoch": 1.27,
10404
+ "learning_rate": 1.6711473811146333e-05,
10405
+ "loss": 0.2867,
10406
+ "step": 866500
10407
+ },
10408
+ {
10409
+ "epoch": 1.27,
10410
+ "learning_rate": 1.6662106327378645e-05,
10411
+ "loss": 0.2869,
10412
+ "step": 867000
10413
+ },
10414
+ {
10415
+ "epoch": 1.27,
10416
+ "learning_rate": 1.6612912006993688e-05,
10417
+ "loss": 0.2867,
10418
+ "step": 867500
10419
+ },
10420
+ {
10421
+ "epoch": 1.28,
10422
+ "learning_rate": 1.6563890984486884e-05,
10423
+ "loss": 0.2866,
10424
+ "step": 868000
10425
+ },
10426
+ {
10427
+ "epoch": 1.28,
10428
+ "learning_rate": 1.6515043393879825e-05,
10429
+ "loss": 0.287,
10430
+ "step": 868500
10431
+ },
10432
+ {
10433
+ "epoch": 1.28,
10434
+ "learning_rate": 1.6466369368719955e-05,
10435
+ "loss": 0.2868,
10436
+ "step": 869000
10437
+ },
10438
+ {
10439
+ "epoch": 1.28,
10440
+ "learning_rate": 1.641786904208022e-05,
10441
+ "loss": 0.2865,
10442
+ "step": 869500
10443
+ },
10444
+ {
10445
+ "epoch": 1.28,
10446
+ "learning_rate": 1.6369542546558626e-05,
10447
+ "loss": 0.2864,
10448
+ "step": 870000
10449
  }
10450
  ],
10451
  "max_steps": 1000000,
10452
  "num_train_epochs": 2,
10453
+ "total_flos": 5.881811674297794e+22,
10454
  "trial_name": null,
10455
  "trial_params": null
10456
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71f43fa4af460f6d355ec42e55c1a209c0d00e00e1e71a83db48fda6386bd08
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc5f078f248ee95608611561bbd9aea19a49de30cb9f351661140392a113dcb
3
  size 449450757