jflotz commited on
Commit
5bb91f4
·
1 Parent(s): 6a110e4

Training in progress, step 430000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c86f20ff2f16bc74cf4ee66c25e77d588ff8cbcd2b5cb9a79d80faea6a868a0
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf00e3d4f0141ee77d7fac9590f426eb5970750593815683fd979f14be837d4f
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e5e19ff37c6ce7607be01898171697733e36744c9c85c433e17ed4553954f2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93aa30fc145e0d59395556dbcdda066166e18281fd9035e50e621fea7af14d91
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad8e71a98d36ad99a4f70d2c0839c4fb9061d716502b425596b1471b57d638f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fa0c13fc0e3348f6228394f5e318945295debe26ba21ec91b2c06a47593869
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.36830835117773,
5
- "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8406,11 +8406,211 @@
8406
  "eval_samples_per_second": 1103.391,
8407
  "eval_steps_per_second": 17.293,
8408
  "step": 420000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8409
  }
8410
  ],
8411
  "max_steps": 500000,
8412
  "num_train_epochs": 12,
8413
- "total_flos": 1.3418203740176262e+22,
8414
  "trial_name": null,
8415
  "trial_params": null
8416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.591363311920057,
5
+ "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8406
  "eval_samples_per_second": 1103.391,
8407
  "eval_steps_per_second": 17.293,
8408
  "step": 420000
8409
+ },
8410
+ {
8411
+ "epoch": 9.38,
8412
+ "learning_rate": 2.9586456905128618e-05,
8413
+ "loss": 0.2562,
8414
+ "step": 420500
8415
+ },
8416
+ {
8417
+ "epoch": 9.39,
8418
+ "learning_rate": 2.9346475051519687e-05,
8419
+ "loss": 0.2583,
8420
+ "step": 421000
8421
+ },
8422
+ {
8423
+ "epoch": 9.39,
8424
+ "eval_loss": 0.2419823408126831,
8425
+ "eval_runtime": 2.1088,
8426
+ "eval_samples_per_second": 1089.249,
8427
+ "eval_steps_per_second": 17.071,
8428
+ "step": 421000
8429
+ },
8430
+ {
8431
+ "epoch": 9.4,
8432
+ "learning_rate": 2.910786732472815e-05,
8433
+ "loss": 0.257,
8434
+ "step": 421500
8435
+ },
8436
+ {
8437
+ "epoch": 9.41,
8438
+ "learning_rate": 2.887063633412981e-05,
8439
+ "loss": 0.2565,
8440
+ "step": 422000
8441
+ },
8442
+ {
8443
+ "epoch": 9.41,
8444
+ "eval_loss": 0.240878626704216,
8445
+ "eval_runtime": 2.008,
8446
+ "eval_samples_per_second": 1143.9,
8447
+ "eval_steps_per_second": 17.928,
8448
+ "step": 422000
8449
+ },
8450
+ {
8451
+ "epoch": 9.42,
8452
+ "learning_rate": 2.863478467404478e-05,
8453
+ "loss": 0.2563,
8454
+ "step": 422500
8455
+ },
8456
+ {
8457
+ "epoch": 9.44,
8458
+ "learning_rate": 2.8400314923709112e-05,
8459
+ "loss": 0.2562,
8460
+ "step": 423000
8461
+ },
8462
+ {
8463
+ "epoch": 9.44,
8464
+ "eval_loss": 0.2374911606311798,
8465
+ "eval_runtime": 2.1002,
8466
+ "eval_samples_per_second": 1093.715,
8467
+ "eval_steps_per_second": 17.141,
8468
+ "step": 423000
8469
+ },
8470
+ {
8471
+ "epoch": 9.45,
8472
+ "learning_rate": 2.816722964724636e-05,
8473
+ "loss": 0.256,
8474
+ "step": 423500
8475
+ },
8476
+ {
8477
+ "epoch": 9.46,
8478
+ "learning_rate": 2.793553139363981e-05,
8479
+ "loss": 0.2556,
8480
+ "step": 424000
8481
+ },
8482
+ {
8483
+ "epoch": 9.46,
8484
+ "eval_loss": 0.2397317737340927,
8485
+ "eval_runtime": 2.1055,
8486
+ "eval_samples_per_second": 1090.975,
8487
+ "eval_steps_per_second": 17.098,
8488
+ "step": 424000
8489
+ },
8490
+ {
8491
+ "epoch": 9.47,
8492
+ "learning_rate": 2.7705222696704366e-05,
8493
+ "loss": 0.256,
8494
+ "step": 424500
8495
+ },
8496
+ {
8497
+ "epoch": 9.48,
8498
+ "learning_rate": 2.7476306075059096e-05,
8499
+ "loss": 0.2562,
8500
+ "step": 425000
8501
+ },
8502
+ {
8503
+ "epoch": 9.48,
8504
+ "eval_loss": 0.23977774381637573,
8505
+ "eval_runtime": 2.06,
8506
+ "eval_samples_per_second": 1115.059,
8507
+ "eval_steps_per_second": 17.476,
8508
+ "step": 425000
8509
+ },
8510
+ {
8511
+ "epoch": 9.49,
8512
+ "learning_rate": 2.7248784032099478e-05,
8513
+ "loss": 0.2574,
8514
+ "step": 425500
8515
+ },
8516
+ {
8517
+ "epoch": 9.5,
8518
+ "learning_rate": 2.7022659055970144e-05,
8519
+ "loss": 0.2584,
8520
+ "step": 426000
8521
+ },
8522
+ {
8523
+ "epoch": 9.5,
8524
+ "eval_loss": 0.2388191670179367,
8525
+ "eval_runtime": 2.1089,
8526
+ "eval_samples_per_second": 1089.195,
8527
+ "eval_steps_per_second": 17.071,
8528
+ "step": 426000
8529
+ },
8530
+ {
8531
+ "epoch": 9.51,
8532
+ "learning_rate": 2.6797933619537604e-05,
8533
+ "loss": 0.2572,
8534
+ "step": 426500
8535
+ },
8536
+ {
8537
+ "epoch": 9.52,
8538
+ "learning_rate": 2.6574610180363166e-05,
8539
+ "loss": 0.2566,
8540
+ "step": 427000
8541
+ },
8542
+ {
8543
+ "epoch": 9.52,
8544
+ "eval_loss": 0.24121782183647156,
8545
+ "eval_runtime": 2.0738,
8546
+ "eval_samples_per_second": 1107.617,
8547
+ "eval_steps_per_second": 17.359,
8548
+ "step": 427000
8549
+ },
8550
+ {
8551
+ "epoch": 9.54,
8552
+ "learning_rate": 2.6352691180676286e-05,
8553
+ "loss": 0.2568,
8554
+ "step": 427500
8555
+ },
8556
+ {
8557
+ "epoch": 9.55,
8558
+ "learning_rate": 2.6132179047347505e-05,
8559
+ "loss": 0.256,
8560
+ "step": 428000
8561
+ },
8562
+ {
8563
+ "epoch": 9.55,
8564
+ "eval_loss": 0.24006181955337524,
8565
+ "eval_runtime": 1.9904,
8566
+ "eval_samples_per_second": 1154.041,
8567
+ "eval_steps_per_second": 18.087,
8568
+ "step": 428000
8569
+ },
8570
+ {
8571
+ "epoch": 9.56,
8572
+ "learning_rate": 2.5913076191862238e-05,
8573
+ "loss": 0.2564,
8574
+ "step": 428500
8575
+ },
8576
+ {
8577
+ "epoch": 9.57,
8578
+ "learning_rate": 2.5695385010294165e-05,
8579
+ "loss": 0.2564,
8580
+ "step": 429000
8581
+ },
8582
+ {
8583
+ "epoch": 9.57,
8584
+ "eval_loss": 0.24071797728538513,
8585
+ "eval_runtime": 1.9595,
8586
+ "eval_samples_per_second": 1172.235,
8587
+ "eval_steps_per_second": 18.372,
8588
+ "step": 429000
8589
+ },
8590
+ {
8591
+ "epoch": 9.58,
8592
+ "learning_rate": 2.5479107883279144e-05,
8593
+ "loss": 0.2564,
8594
+ "step": 429500
8595
+ },
8596
+ {
8597
+ "epoch": 9.59,
8598
+ "learning_rate": 2.5264247175989292e-05,
8599
+ "loss": 0.2564,
8600
+ "step": 430000
8601
+ },
8602
+ {
8603
+ "epoch": 9.59,
8604
+ "eval_loss": 0.23974178731441498,
8605
+ "eval_runtime": 2.0589,
8606
+ "eval_samples_per_second": 1115.617,
8607
+ "eval_steps_per_second": 17.485,
8608
+ "step": 430000
8609
  }
8610
  ],
8611
  "max_steps": 500000,
8612
  "num_train_epochs": 12,
8613
+ "total_flos": 1.3737690878950762e+22,
8614
  "trial_name": null,
8615
  "trial_params": null
8616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e5e19ff37c6ce7607be01898171697733e36744c9c85c433e17ed4553954f2
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93aa30fc145e0d59395556dbcdda066166e18281fd9035e50e621fea7af14d91
3
  size 102501541