jflotz commited on
Commit
9f854bb
·
1 Parent(s): cb0c9ac

Training in progress, step 640000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:017625820feef9696ace4a5cbefe218b931336a9991e2245257a1d1342a0f729
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570fd9971dd127676195908f1f0168c560d379e06053db1ec1c6889a24e76909
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31d0ded159b5f3b1a8c1ce1b7b826e4fbdda0cc5ba59eaba62ee8809a462e8f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069c7b8d28935c1bdaf707018f31232b5c5d0b17ca264ac835e0cab62f47f60b
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:398f2318b4ded20f61f24fd00e4055ea625eaa86f27bce6d1e31ca0965b80a81
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acfb6c5ca1e2a8aae6849b592c5e4c4b839246ca00f42f46fa8da24fee6f7051
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb6422a1fad93ed77bc18b18a2c8499aa6774c77ffada9e53f436cd9d13ca0c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed15d29f5bdaa33109b0c66a9aa2dbc57339a469e3f71f40bec5ec342e0d6d49
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce446a79d89ee06223f9e2ae5f2f4290f0fb0ec1cfcfeee86b4b4ba2420ef30e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:962d2f5974f30660f555e22cdf0c12b334de1b8fc49a6a5192e63c3a6ee6eebe
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fbfbffabb738f483b0924b2fcbbbf4a31bdaab1f9760b5a622efffc7c59e8d2
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2e736f0ce5f395a825ebebcf342c762745706534807a9b43b2a6a713704726
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a6b1230026b3f360d114a9d0f5608343d3dbe5979744e0c2b45d14032617ff1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bff10bd0517565104b7a365f7830fc50ca6a2c535ddf94460fc2737ad38c9a7
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c92afb5535b80215526b380f6cb7f75fa76f1d0853152e112df8d84246f00fed
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fffbd4923bc1f675f8117f531217edf35f82264eb436b97401dab9e4eeeaa0
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5fe487d9251494c826a0bd20a1c2515c3d527bc1906f192546685af4384e7fe
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d716524e67d0b69cb1b5ffc99aa56ed5a73b186c4b6bfbd6ff0ef38267147113
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5538e16e2cb8a022511fa1c4ff3a30d17572708626a194d4d5db3edb9bc5de72
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a35091ba68234fa026466686321e8ce53cfe05ba57973184932ffc7464e369
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2594005253499334,
5
- "global_step": 630000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7566,11 +7566,131 @@
7566
  "learning_rate": 5.617947778908498e-05,
7567
  "loss": 0.3015,
7568
  "step": 630000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7569
  }
7570
  ],
7571
  "max_steps": 1000000,
7572
  "num_train_epochs": 2,
7573
- "total_flos": 4.2592477348671294e+22,
7574
  "trial_name": null,
7575
  "trial_params": null
7576
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2793910098792973,
5
+ "global_step": 640000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7566
  "learning_rate": 5.617947778908498e-05,
7567
  "loss": 0.3015,
7568
  "step": 630000
7569
+ },
7570
+ {
7571
+ "epoch": 1.26,
7572
+ "learning_rate": 5.60706751197385e-05,
7573
+ "loss": 0.3014,
7574
+ "step": 630500
7575
+ },
7576
+ {
7577
+ "epoch": 1.26,
7578
+ "learning_rate": 5.596193787225254e-05,
7579
+ "loss": 0.3008,
7580
+ "step": 631000
7581
+ },
7582
+ {
7583
+ "epoch": 1.26,
7584
+ "learning_rate": 5.585326634391049e-05,
7585
+ "loss": 0.3008,
7586
+ "step": 631500
7587
+ },
7588
+ {
7589
+ "epoch": 1.26,
7590
+ "learning_rate": 5.574466083181624e-05,
7591
+ "loss": 0.3014,
7592
+ "step": 632000
7593
+ },
7594
+ {
7595
+ "epoch": 1.26,
7596
+ "learning_rate": 5.563612163289308e-05,
7597
+ "loss": 0.3008,
7598
+ "step": 632500
7599
+ },
7600
+ {
7601
+ "epoch": 1.27,
7602
+ "learning_rate": 5.552764904388305e-05,
7603
+ "loss": 0.3016,
7604
+ "step": 633000
7605
+ },
7606
+ {
7607
+ "epoch": 1.27,
7608
+ "learning_rate": 5.541924336134609e-05,
7609
+ "loss": 0.3014,
7610
+ "step": 633500
7611
+ },
7612
+ {
7613
+ "epoch": 1.27,
7614
+ "learning_rate": 5.5310904881659116e-05,
7615
+ "loss": 0.301,
7616
+ "step": 634000
7617
+ },
7618
+ {
7619
+ "epoch": 1.27,
7620
+ "learning_rate": 5.5202633901015464e-05,
7621
+ "loss": 0.3008,
7622
+ "step": 634500
7623
+ },
7624
+ {
7625
+ "epoch": 1.27,
7626
+ "learning_rate": 5.5094430715423835e-05,
7627
+ "loss": 0.3017,
7628
+ "step": 635000
7629
+ },
7630
+ {
7631
+ "epoch": 1.27,
7632
+ "learning_rate": 5.4986295620707626e-05,
7633
+ "loss": 0.3005,
7634
+ "step": 635500
7635
+ },
7636
+ {
7637
+ "epoch": 1.27,
7638
+ "learning_rate": 5.487822891250406e-05,
7639
+ "loss": 0.3004,
7640
+ "step": 636000
7641
+ },
7642
+ {
7643
+ "epoch": 1.27,
7644
+ "learning_rate": 5.477023088626334e-05,
7645
+ "loss": 0.3008,
7646
+ "step": 636500
7647
+ },
7648
+ {
7649
+ "epoch": 1.27,
7650
+ "learning_rate": 5.4662301837247985e-05,
7651
+ "loss": 0.301,
7652
+ "step": 637000
7653
+ },
7654
+ {
7655
+ "epoch": 1.27,
7656
+ "learning_rate": 5.45544420605319e-05,
7657
+ "loss": 0.3008,
7658
+ "step": 637500
7659
+ },
7660
+ {
7661
+ "epoch": 1.28,
7662
+ "learning_rate": 5.4446651850999604e-05,
7663
+ "loss": 0.3012,
7664
+ "step": 638000
7665
+ },
7666
+ {
7667
+ "epoch": 1.28,
7668
+ "learning_rate": 5.433893150334538e-05,
7669
+ "loss": 0.3009,
7670
+ "step": 638500
7671
+ },
7672
+ {
7673
+ "epoch": 1.28,
7674
+ "learning_rate": 5.4231281312072544e-05,
7675
+ "loss": 0.301,
7676
+ "step": 639000
7677
+ },
7678
+ {
7679
+ "epoch": 1.28,
7680
+ "learning_rate": 5.4123701571492636e-05,
7681
+ "loss": 0.3009,
7682
+ "step": 639500
7683
+ },
7684
+ {
7685
+ "epoch": 1.28,
7686
+ "learning_rate": 5.401619257572453e-05,
7687
+ "loss": 0.3007,
7688
+ "step": 640000
7689
  }
7690
  ],
7691
  "max_steps": 1000000,
7692
  "num_train_epochs": 2,
7693
+ "total_flos": 4.326855661422541e+22,
7694
  "trial_name": null,
7695
  "trial_params": null
7696
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
3
  size 449450757