TweedleDeepLearnings commited on
Commit
5a18595
·
verified ·
1 Parent(s): b41a499

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5933cadc048a0f98ce1186dda8e044600330c8323c3aeb7103f03016dda0f58f
3
  size 661507488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2286073354c955c2cea250cf8625b7a0c9312cbfaaf55547ee67472f9f04f82c
3
  size 661507488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2adae7c3227da6f9cff61afa72fc8e2acb185b9ee9233ea9f4606dd3ad8bf888
3
  size 1304683322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533faf319c29cb9bd6b4a0359ba723109dca179a7b3c53b9406447277fc5f5f0
3
  size 1304683322
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a69626effe2e0481176da89da85564e0a97bc6ef6619523c42ce818b527abcc
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678c52ded593de0bac8adbb98950814c0abc224b38c12bfcb75019cd9abc88e7
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac5f39cff78dbcb4d12b97b9e8b1e16624c544af38c507e787f6f6a19013821
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388b33af9ccc80fdb83f03a3a59d7426230bca392fb13bf15fe996173ee1addc
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11470788717269897,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 4.465116279069767,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 9.739,
3624
  "eval_steps_per_second": 9.739,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 4.6370194800623616e+17,
3646
  "train_batch_size": 16,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.1146140992641449,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 4.604651162790698,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 9.739,
3624
  "eval_steps_per_second": 9.739,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 4.474418604651163,
3629
+ "grad_norm": 1.754858374595642,
3630
+ "learning_rate": 1.6303105993536925e-07,
3631
+ "loss": 0.0311,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 4.4837209302325585,
3636
+ "grad_norm": 2.2970986366271973,
3637
+ "learning_rate": 1.4634005624572287e-07,
3638
+ "loss": 0.0338,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 4.493023255813953,
3643
+ "grad_norm": 1.9835330247879028,
3644
+ "learning_rate": 1.3054737092143243e-07,
3645
+ "loss": 0.0302,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 4.502325581395349,
3650
+ "grad_norm": 2.7069849967956543,
3651
+ "learning_rate": 1.1565365313793708e-07,
3652
+ "loss": 0.0288,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 4.511627906976744,
3657
+ "grad_norm": 1.7393293380737305,
3658
+ "learning_rate": 1.0165951511763893e-07,
3659
+ "loss": 0.0309,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 4.52093023255814,
3664
+ "grad_norm": 1.5180749893188477,
3665
+ "learning_rate": 8.856553210474049e-08,
3666
+ "loss": 0.0288,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 4.530232558139534,
3671
+ "grad_norm": 1.831196904182434,
3672
+ "learning_rate": 7.637224234159889e-08,
3673
+ "loss": 0.0334,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 4.53953488372093,
3678
+ "grad_norm": 1.9779573678970337,
3679
+ "learning_rate": 6.508014704659936e-08,
3680
+ "loss": 0.0293,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 4.5488372093023255,
3685
+ "grad_norm": 2.4252068996429443,
3686
+ "learning_rate": 5.4689710393547974e-08,
3687
+ "loss": 0.0304,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 4.558139534883721,
3692
+ "grad_norm": 1.7382845878601074,
3693
+ "learning_rate": 4.520135949260062e-08,
3694
+ "loss": 0.0308,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 4.567441860465117,
3699
+ "grad_norm": 2.5499613285064697,
3700
+ "learning_rate": 3.661548437270157e-08,
3701
+ "loss": 0.0306,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 4.576744186046511,
3706
+ "grad_norm": 2.0584299564361572,
3707
+ "learning_rate": 2.8932437965543568e-08,
3708
+ "loss": 0.0285,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 4.586046511627907,
3713
+ "grad_norm": 2.0955538749694824,
3714
+ "learning_rate": 2.2152536091079032e-08,
3715
+ "loss": 0.0292,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 4.595348837209302,
3720
+ "grad_norm": 2.126533269882202,
3721
+ "learning_rate": 1.6276057444511328e-08,
3722
+ "loss": 0.0323,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 4.604651162790698,
3727
+ "grad_norm": 1.754727840423584,
3728
+ "learning_rate": 1.130324358486634e-08,
3729
+ "loss": 0.0335,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 4.604651162790698,
3734
+ "eval_loss": 0.1146140992641449,
3735
+ "eval_runtime": 52.1386,
3736
+ "eval_samples_per_second": 9.59,
3737
+ "eval_steps_per_second": 9.59,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 4.7810435876775936e+17,
3759
  "train_batch_size": 16,
3760
  "trial_name": null,
3761
  "trial_params": null