jflotz commited on
Commit
cb0c9ac
·
1 Parent(s): c704192

Training in progress, step 630000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a097619eaa9ef720984e4ddd9dde8f3b697ba4e3a54ad0e09caff9a338f70f3
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017625820feef9696ace4a5cbefe218b931336a9991e2245257a1d1342a0f729
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ef2eb325ca73aeb9167731426720e192b4dc476427d6c01affc7b3b2a3e583b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31d0ded159b5f3b1a8c1ce1b7b826e4fbdda0cc5ba59eaba62ee8809a462e8f
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ef8f093c13597a9033ad0961b449077de3ab17c6b5e598e7ffe900737a37b62
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398f2318b4ded20f61f24fd00e4055ea625eaa86f27bce6d1e31ca0965b80a81
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20445f0abf4410bec575adab612ba675a2a9e22a555ffb2b2fb85961556a332
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb6422a1fad93ed77bc18b18a2c8499aa6774c77ffada9e53f436cd9d13ca0c
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20bb77018a33c479ea4b69c28339611c22b8e8641554d8590d8198df56a8bc21
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce446a79d89ee06223f9e2ae5f2f4290f0fb0ec1cfcfeee86b4b4ba2420ef30e
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34450b80230704bfaa6dfc6f8672078f09277102e090e32e505412e8b0a06323
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbfbffabb738f483b0924b2fcbbbf4a31bdaab1f9760b5a622efffc7c59e8d2
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f073590871f68cf52e503ea996a2b041d76f54fc155c63b39985473c3a2c6e9
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6b1230026b3f360d114a9d0f5608343d3dbe5979744e0c2b45d14032617ff1
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb10ba8d357eb68fad272ff5bc8fb10ce9e5818ee9a6bd185f2331209c9c5eee
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c92afb5535b80215526b380f6cb7f75fa76f1d0853152e112df8d84246f00fed
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bdc5c8f1d9880eb7b2e1404af5eeae63b870215c24de51ca47db7d2d9d87809
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5fe487d9251494c826a0bd20a1c2515c3d527bc1906f192546685af4384e7fe
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e0be0a1dddd0483c31953c97f497ac534ef42fa519c13cb1ceaab964eeaafea
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5538e16e2cb8a022511fa1c4ff3a30d17572708626a194d4d5db3edb9bc5de72
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2394100408205695,
5
- "global_step": 620000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7446,11 +7446,131 @@
7446
  "learning_rate": 5.8368810393753684e-05,
7447
  "loss": 0.3021,
7448
  "step": 620000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7449
  }
7450
  ],
7451
  "max_steps": 1000000,
7452
  "num_train_epochs": 2,
7453
- "total_flos": 4.191638959928546e+22,
7454
  "trial_name": null,
7455
  "trial_params": null
7456
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2594005253499334,
5
+ "global_step": 630000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7446
  "learning_rate": 5.8368810393753684e-05,
7447
  "loss": 0.3021,
7448
  "step": 620000
7449
+ },
7450
+ {
7451
+ "epoch": 1.24,
7452
+ "learning_rate": 5.8258761986213015e-05,
7453
+ "loss": 0.3027,
7454
+ "step": 620500
7455
+ },
7456
+ {
7457
+ "epoch": 1.24,
7458
+ "learning_rate": 5.814877301838688e-05,
7459
+ "loss": 0.3021,
7460
+ "step": 621000
7461
+ },
7462
+ {
7463
+ "epoch": 1.24,
7464
+ "learning_rate": 5.803884379098094e-05,
7465
+ "loss": 0.3022,
7466
+ "step": 621500
7467
+ },
7468
+ {
7469
+ "epoch": 1.24,
7470
+ "learning_rate": 5.7928974604537494e-05,
7471
+ "loss": 0.3022,
7472
+ "step": 622000
7473
+ },
7474
+ {
7475
+ "epoch": 1.24,
7476
+ "learning_rate": 5.781916575943469e-05,
7477
+ "loss": 0.3022,
7478
+ "step": 622500
7479
+ },
7480
+ {
7481
+ "epoch": 1.25,
7482
+ "learning_rate": 5.770941755588573e-05,
7483
+ "loss": 0.3023,
7484
+ "step": 623000
7485
+ },
7486
+ {
7487
+ "epoch": 1.25,
7488
+ "learning_rate": 5.7599730293938e-05,
7489
+ "loss": 0.302,
7490
+ "step": 623500
7491
+ },
7492
+ {
7493
+ "epoch": 1.25,
7494
+ "learning_rate": 5.749010427347233e-05,
7495
+ "loss": 0.3021,
7496
+ "step": 624000
7497
+ },
7498
+ {
7499
+ "epoch": 1.25,
7500
+ "learning_rate": 5.738053979420199e-05,
7501
+ "loss": 0.3019,
7502
+ "step": 624500
7503
+ },
7504
+ {
7505
+ "epoch": 1.25,
7506
+ "learning_rate": 5.7271037155672156e-05,
7507
+ "loss": 0.3015,
7508
+ "step": 625000
7509
+ },
7510
+ {
7511
+ "epoch": 1.25,
7512
+ "learning_rate": 5.716159665725883e-05,
7513
+ "loss": 0.3016,
7514
+ "step": 625500
7515
+ },
7516
+ {
7517
+ "epoch": 1.25,
7518
+ "learning_rate": 5.7052218598168154e-05,
7519
+ "loss": 0.3017,
7520
+ "step": 626000
7521
+ },
7522
+ {
7523
+ "epoch": 1.25,
7524
+ "learning_rate": 5.69429032774356e-05,
7525
+ "loss": 0.3021,
7526
+ "step": 626500
7527
+ },
7528
+ {
7529
+ "epoch": 1.25,
7530
+ "learning_rate": 5.6833650993925016e-05,
7531
+ "loss": 0.3015,
7532
+ "step": 627000
7533
+ },
7534
+ {
7535
+ "epoch": 1.25,
7536
+ "learning_rate": 5.6724462046328025e-05,
7537
+ "loss": 0.3021,
7538
+ "step": 627500
7539
+ },
7540
+ {
7541
+ "epoch": 1.26,
7542
+ "learning_rate": 5.661533673316303e-05,
7543
+ "loss": 0.3026,
7544
+ "step": 628000
7545
+ },
7546
+ {
7547
+ "epoch": 1.26,
7548
+ "learning_rate": 5.6506275352774447e-05,
7549
+ "loss": 0.3009,
7550
+ "step": 628500
7551
+ },
7552
+ {
7553
+ "epoch": 1.26,
7554
+ "learning_rate": 5.639727820333198e-05,
7555
+ "loss": 0.3017,
7556
+ "step": 629000
7557
+ },
7558
+ {
7559
+ "epoch": 1.26,
7560
+ "learning_rate": 5.62883455828296e-05,
7561
+ "loss": 0.3016,
7562
+ "step": 629500
7563
+ },
7564
+ {
7565
+ "epoch": 1.26,
7566
+ "learning_rate": 5.617947778908498e-05,
7567
+ "loss": 0.3015,
7568
+ "step": 630000
7569
  }
7570
  ],
7571
  "max_steps": 1000000,
7572
  "num_train_epochs": 2,
7573
+ "total_flos": 4.2592477348671294e+22,
7574
  "trial_name": null,
7575
  "trial_params": null
7576
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
3
  size 449450757