irishprancer commited on
Commit
066be44
·
verified ·
1 Parent(s): deed51f

Training in progress, step 4050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f298dd0a7b849bb55a6d5cb7290507c8064f9b006406a664a429001d19e0e48c
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b1674b110cd3152426bbe05cc5f0d45a53b6ea70c1f07b7ffe3a2e658d6c17d
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6668a3d3adb3a86f62c6ef423882ea9f2f598f32ad5045f34c14199b38fb689f
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e4c61f600a24769d3508958b5be7a636626fc7dd36b36dfe84519c7990f9fa3
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:488cbcf14a0f8a4794af20845693e7c92cc4e0193e27f71cfb8b870a8f1fae2d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c3bba0118a53d6f1dc342017f2ea922f8815570aacf4ca8fc91241ca3c4a16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d162883f1a66ee54c1f60afa7c5bed405d62515f1f02173124202a7368b03a7d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1997849c2025c9ed68622bc6b79aa148345dac46cf13e358512de3e857a9129
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 169.56521739130434,
5
  "eval_steps": 150,
6
- "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3777,6 +3777,151 @@
3777
  "EMA_steps_per_second": 24.093,
3778
  "epoch": 169.56521739130434,
3779
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3780
  }
3781
  ],
3782
  "logging_steps": 10,
@@ -3796,7 +3941,7 @@
3796
  "attributes": {}
3797
  }
3798
  },
3799
- "total_flos": 1.0061040993472512e+17,
3800
  "train_batch_size": 4,
3801
  "trial_name": null,
3802
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 176.08695652173913,
5
  "eval_steps": 150,
6
+ "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3777
  "EMA_steps_per_second": 24.093,
3778
  "epoch": 169.56521739130434,
3779
  "step": 3900
3780
+ },
3781
+ {
3782
+ "epoch": 170.0,
3783
+ "grad_norm": 3.2512736320495605,
3784
+ "learning_rate": 2.5136229028837813e-06,
3785
+ "loss": 0.2311,
3786
+ "step": 3910
3787
+ },
3788
+ {
3789
+ "epoch": 170.43478260869566,
3790
+ "grad_norm": 1.862411618232727,
3791
+ "learning_rate": 2.5136047193121285e-06,
3792
+ "loss": 0.2351,
3793
+ "step": 3920
3794
+ },
3795
+ {
3796
+ "epoch": 170.8695652173913,
3797
+ "grad_norm": 2.6634721755981445,
3798
+ "learning_rate": 2.513586037651761e-06,
3799
+ "loss": 0.2343,
3800
+ "step": 3930
3801
+ },
3802
+ {
3803
+ "epoch": 171.30434782608697,
3804
+ "grad_norm": 2.177884340286255,
3805
+ "learning_rate": 2.5135668579100817e-06,
3806
+ "loss": 0.2296,
3807
+ "step": 3940
3808
+ },
3809
+ {
3810
+ "epoch": 171.7391304347826,
3811
+ "grad_norm": 1.8351444005966187,
3812
+ "learning_rate": 2.5135471800946947e-06,
3813
+ "loss": 0.2501,
3814
+ "step": 3950
3815
+ },
3816
+ {
3817
+ "epoch": 172.17391304347825,
3818
+ "grad_norm": 2.0342533588409424,
3819
+ "learning_rate": 2.513527004213398e-06,
3820
+ "loss": 0.2407,
3821
+ "step": 3960
3822
+ },
3823
+ {
3824
+ "epoch": 172.6086956521739,
3825
+ "grad_norm": 2.0113251209259033,
3826
+ "learning_rate": 2.5135063302741893e-06,
3827
+ "loss": 0.234,
3828
+ "step": 3970
3829
+ },
3830
+ {
3831
+ "epoch": 173.04347826086956,
3832
+ "grad_norm": 1.9626580476760864,
3833
+ "learning_rate": 2.5134851582852637e-06,
3834
+ "loss": 0.2137,
3835
+ "step": 3980
3836
+ },
3837
+ {
3838
+ "epoch": 173.47826086956522,
3839
+ "grad_norm": 2.3283474445343018,
3840
+ "learning_rate": 2.5134634882550122e-06,
3841
+ "loss": 0.2388,
3842
+ "step": 3990
3843
+ },
3844
+ {
3845
+ "epoch": 173.91304347826087,
3846
+ "grad_norm": 2.082240104675293,
3847
+ "learning_rate": 2.5134413201920244e-06,
3848
+ "loss": 0.2468,
3849
+ "step": 4000
3850
+ },
3851
+ {
3852
+ "epoch": 174.34782608695653,
3853
+ "grad_norm": 2.389084815979004,
3854
+ "learning_rate": 2.513418654105087e-06,
3855
+ "loss": 0.2283,
3856
+ "step": 4010
3857
+ },
3858
+ {
3859
+ "epoch": 174.7826086956522,
3860
+ "grad_norm": 2.385908365249634,
3861
+ "learning_rate": 2.5133954900031847e-06,
3862
+ "loss": 0.2295,
3863
+ "step": 4020
3864
+ },
3865
+ {
3866
+ "epoch": 175.2173913043478,
3867
+ "grad_norm": 2.003931760787964,
3868
+ "learning_rate": 2.513371827895498e-06,
3869
+ "loss": 0.2329,
3870
+ "step": 4030
3871
+ },
3872
+ {
3873
+ "epoch": 175.65217391304347,
3874
+ "grad_norm": 2.265186071395874,
3875
+ "learning_rate": 2.5133476677914065e-06,
3876
+ "loss": 0.2145,
3877
+ "step": 4040
3878
+ },
3879
+ {
3880
+ "epoch": 176.08695652173913,
3881
+ "grad_norm": 1.8546191453933716,
3882
+ "learning_rate": 2.5133230097004866e-06,
3883
+ "loss": 0.2419,
3884
+ "step": 4050
3885
+ },
3886
+ {
3887
+ "epoch": 176.08695652173913,
3888
+ "eval_loss": 0.9715728759765625,
3889
+ "eval_runtime": 0.4124,
3890
+ "eval_samples_per_second": 24.249,
3891
+ "eval_steps_per_second": 24.249,
3892
+ "step": 4050
3893
+ },
3894
+ {
3895
+ "Start_State_loss": 0.861186683177948,
3896
+ "Start_State_runtime": 0.4406,
3897
+ "Start_State_samples_per_second": 22.694,
3898
+ "Start_State_steps_per_second": 22.694,
3899
+ "epoch": 176.08695652173913,
3900
+ "step": 4050
3901
+ },
3902
+ {
3903
+ "Raw_Model_loss": 0.9715728759765625,
3904
+ "Raw_Model_runtime": 0.5139,
3905
+ "Raw_Model_samples_per_second": 19.459,
3906
+ "Raw_Model_steps_per_second": 19.459,
3907
+ "epoch": 176.08695652173913,
3908
+ "step": 4050
3909
+ },
3910
+ {
3911
+ "SWA_loss": 0.800355076789856,
3912
+ "SWA_runtime": 0.4756,
3913
+ "SWA_samples_per_second": 21.028,
3914
+ "SWA_steps_per_second": 21.028,
3915
+ "epoch": 176.08695652173913,
3916
+ "step": 4050
3917
+ },
3918
+ {
3919
+ "EMA_loss": 0.8603526949882507,
3920
+ "EMA_runtime": 0.4374,
3921
+ "EMA_samples_per_second": 22.861,
3922
+ "EMA_steps_per_second": 22.861,
3923
+ "epoch": 176.08695652173913,
3924
+ "step": 4050
3925
  }
3926
  ],
3927
  "logging_steps": 10,
 
3941
  "attributes": {}
3942
  }
3943
  },
3944
+ "total_flos": 1.0440404596622131e+17,
3945
  "train_batch_size": 4,
3946
  "trial_name": null,
3947
  "trial_params": null