jflotz commited on
Commit
800e724
·
1 Parent(s): b27adcc

Training in progress, step 970000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82bf941331b6147f0c38426d0407fa25ba10c0bd3b73ef74a1673cd375f5dea3
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51baa4ddc0d5650abf5371aac2f77196b05031ccca7029b3d99ba99af85e57f
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73634de375042b3baa7b5c117beb24655dd2f7f5f57009b1eef654c82b3b44b5
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a45db7e85e08c084e49c40cab0c2c6092d92f81b5fa24290a645085ef74f75b
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb4c489f0f7eeedc1b3b1654e89c9a4aafbf4af00e935321e2351196b10ff6c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d647c17f4fed38d972bfade7f44a26e438ac9b6b775a7bbc225c5be1e112bd
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:060c7b3ac0997105e228b3a17b751784076ba7d3219bd9bc28aad1940ff45553
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9cbd943c7dcfb1555090abbcd45a86173e47d10be2fa2e7308539ca596dff0
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:612d68332bbb7f66da9a1c4eee686f9c7adc9fb542398fcfd0c492b56e914c02
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5917abb04845a366f52356ca50f06ea044267bd039a587ed19cc120ed161e748
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70baf209a4631065e5f1d839e29da7241e0065c3f0cd0e2e1c6f4c4e169d312e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582d77403f5af050452c09ec279770dab4724f234e767ab55c84c502beea2905
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acc2e296643bbf328fc4ab16e724c938d6325131f9cb567ebc676b5d1a649c3a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c6bb99cfe9f0c710fe2cc6cec0d5d888a917b4fa016be56cafcfbbd47bac76
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bc74f55e3f6e86286f729f34589914d40a1b187feaf939dd73f214761d85e9c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f81952f7451d63a6ff6bf67269698e7e674adab210fce43113020157f4cf03d
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a192d4f815ba365d126dfc7fc40698d69e696351b09b7c12fff827e40276ec96
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4f92b2c66061ec16f42f6ff8db5a75108eff8cc62884e9d5c3c7875be42d2c
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6eea93722769fd2cdfccc4deac474dd6ab3e6b96299bf9d74b4a0082fc65937
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf98a74aa6df8eeab9552258d949bc73dcd837ff3b88682e5ebe82858a949936
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.458382825392017,
5
- "global_step": 960000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11526,11 +11526,131 @@
11526
  "learning_rate": 1.0611515147111736e-05,
11527
  "loss": 0.2832,
11528
  "step": 960000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11529
  }
11530
  ],
11531
  "max_steps": 1000000,
11532
  "num_train_epochs": 2,
11533
- "total_flos": 6.4902759073727495e+22,
11534
  "trial_name": null,
11535
  "trial_params": null
11536
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4783125134525394,
5
+ "global_step": 970000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11526
  "learning_rate": 1.0611515147111736e-05,
11527
  "loss": 0.2832,
11528
  "step": 960000
11529
+ },
11530
+ {
11531
+ "epoch": 1.46,
11532
+ "learning_rate": 1.0596344426086501e-05,
11533
+ "loss": 0.2831,
11534
+ "step": 960500
11535
+ },
11536
+ {
11537
+ "epoch": 1.46,
11538
+ "learning_rate": 1.0581363452005424e-05,
11539
+ "loss": 0.2836,
11540
+ "step": 961000
11541
+ },
11542
+ {
11543
+ "epoch": 1.46,
11544
+ "learning_rate": 1.0566572265825932e-05,
11545
+ "loss": 0.2835,
11546
+ "step": 961500
11547
+ },
11548
+ {
11549
+ "epoch": 1.46,
11550
+ "learning_rate": 1.0551970907986557e-05,
11551
+ "loss": 0.2835,
11552
+ "step": 962000
11553
+ },
11554
+ {
11555
+ "epoch": 1.46,
11556
+ "learning_rate": 1.0537559418406849e-05,
11557
+ "loss": 0.2838,
11558
+ "step": 962500
11559
+ },
11560
+ {
11561
+ "epoch": 1.46,
11562
+ "learning_rate": 1.0523337836487271e-05,
11563
+ "loss": 0.2829,
11564
+ "step": 963000
11565
+ },
11566
+ {
11567
+ "epoch": 1.47,
11568
+ "learning_rate": 1.0509306201109092e-05,
11569
+ "loss": 0.2835,
11570
+ "step": 963500
11571
+ },
11572
+ {
11573
+ "epoch": 1.47,
11574
+ "learning_rate": 1.0495464550634267e-05,
11575
+ "loss": 0.284,
11576
+ "step": 964000
11577
+ },
11578
+ {
11579
+ "epoch": 1.47,
11580
+ "learning_rate": 1.0481812922905339e-05,
11581
+ "loss": 0.2837,
11582
+ "step": 964500
11583
+ },
11584
+ {
11585
+ "epoch": 1.47,
11586
+ "learning_rate": 1.046835135524533e-05,
11587
+ "loss": 0.2834,
11588
+ "step": 965000
11589
+ },
11590
+ {
11591
+ "epoch": 1.47,
11592
+ "learning_rate": 1.0455079884457653e-05,
11593
+ "loss": 0.2832,
11594
+ "step": 965500
11595
+ },
11596
+ {
11597
+ "epoch": 1.47,
11598
+ "learning_rate": 1.044199854682601e-05,
11599
+ "loss": 0.2837,
11600
+ "step": 966000
11601
+ },
11602
+ {
11603
+ "epoch": 1.47,
11604
+ "learning_rate": 1.0429107378114277e-05,
11605
+ "loss": 0.2834,
11606
+ "step": 966500
11607
+ },
11608
+ {
11609
+ "epoch": 1.47,
11610
+ "learning_rate": 1.0416406413566414e-05,
11611
+ "loss": 0.2833,
11612
+ "step": 967000
11613
+ },
11614
+ {
11615
+ "epoch": 1.47,
11616
+ "learning_rate": 1.0403895687906366e-05,
11617
+ "loss": 0.2839,
11618
+ "step": 967500
11619
+ },
11620
+ {
11621
+ "epoch": 1.47,
11622
+ "learning_rate": 1.0391575235337991e-05,
11623
+ "loss": 0.2828,
11624
+ "step": 968000
11625
+ },
11626
+ {
11627
+ "epoch": 1.48,
11628
+ "learning_rate": 1.0379445089544929e-05,
11629
+ "loss": 0.2837,
11630
+ "step": 968500
11631
+ },
11632
+ {
11633
+ "epoch": 1.48,
11634
+ "learning_rate": 1.0367505283690547e-05,
11635
+ "loss": 0.2832,
11636
+ "step": 969000
11637
+ },
11638
+ {
11639
+ "epoch": 1.48,
11640
+ "learning_rate": 1.0355755850417803e-05,
11641
+ "loss": 0.283,
11642
+ "step": 969500
11643
+ },
11644
+ {
11645
+ "epoch": 1.48,
11646
+ "learning_rate": 1.0344196821849202e-05,
11647
+ "loss": 0.2821,
11648
+ "step": 970000
11649
  }
11650
  ],
11651
  "max_steps": 1000000,
11652
  "num_train_epochs": 2,
11653
+ "total_flos": 6.557885636027719e+22,
11654
  "trial_name": null,
11655
  "trial_params": null
11656
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
3
  size 449450757