irishprancer commited on
Commit
5f28b16
·
verified ·
1 Parent(s): 3d5c6ff

Training in progress, step 4050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:460e8e8695016d7fb3fe1981d689ac748c31bfbaa2f5af7cb550b10ae4a2bdf6
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c703da2264e055a297790ac7d288cc98953e35f33bdc95a5685265382fb890e
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f6dd162a8f76ef782100cbd4cc1329e64448d85f627b9bf81ba66e937483b4c
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3222e360dfa7b35a1163b70615c3f1dede1049c9c6a85199690290550db33722
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:488cbcf14a0f8a4794af20845693e7c92cc4e0193e27f71cfb8b870a8f1fae2d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c3bba0118a53d6f1dc342017f2ea922f8815570aacf4ca8fc91241ca3c4a16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e87b12dd89c08406ca001c2f9dadca3ff34969fd8915b971b04b981fc47351e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e5c116adbf0f8029c8a52c2390c4234cb418c18a9d2241c9b1d212add8b091
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 169.56521739130434,
5
  "eval_steps": 150,
6
- "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3777,6 +3777,151 @@
3777
  "EMA_steps_per_second": 25.739,
3778
  "epoch": 169.56521739130434,
3779
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3780
  }
3781
  ],
3782
  "logging_steps": 10,
@@ -3796,7 +3941,7 @@
3796
  "attributes": {}
3797
  }
3798
  },
3799
- "total_flos": 1.0061040993472512e+17,
3800
  "train_batch_size": 4,
3801
  "trial_name": null,
3802
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 176.08695652173913,
5
  "eval_steps": 150,
6
+ "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3777
  "EMA_steps_per_second": 25.739,
3778
  "epoch": 169.56521739130434,
3779
  "step": 3900
3780
+ },
3781
+ {
3782
+ "epoch": 170.0,
3783
+ "grad_norm": 3.183704137802124,
3784
+ "learning_rate": 2.5257092444735684e-06,
3785
+ "loss": 0.2311,
3786
+ "step": 3910
3787
+ },
3788
+ {
3789
+ "epoch": 170.43478260869566,
3790
+ "grad_norm": 1.870707392692566,
3791
+ "learning_rate": 2.5256909734692075e-06,
3792
+ "loss": 0.2348,
3793
+ "step": 3920
3794
+ },
3795
+ {
3796
+ "epoch": 170.8695652173913,
3797
+ "grad_norm": 2.794963836669922,
3798
+ "learning_rate": 2.5256722019811535e-06,
3799
+ "loss": 0.2342,
3800
+ "step": 3930
3801
+ },
3802
+ {
3803
+ "epoch": 171.30434782608697,
3804
+ "grad_norm": 2.1583147048950195,
3805
+ "learning_rate": 2.525652930016847e-06,
3806
+ "loss": 0.2293,
3807
+ "step": 3940
3808
+ },
3809
+ {
3810
+ "epoch": 171.7391304347826,
3811
+ "grad_norm": 1.784059762954712,
3812
+ "learning_rate": 2.525633157583926e-06,
3813
+ "loss": 0.2504,
3814
+ "step": 3950
3815
+ },
3816
+ {
3817
+ "epoch": 172.17391304347825,
3818
+ "grad_norm": 2.0553624629974365,
3819
+ "learning_rate": 2.5256128846902287e-06,
3820
+ "loss": 0.2408,
3821
+ "step": 3960
3822
+ },
3823
+ {
3824
+ "epoch": 172.6086956521739,
3825
+ "grad_norm": 2.012216567993164,
3826
+ "learning_rate": 2.5255921113437894e-06,
3827
+ "loss": 0.2336,
3828
+ "step": 3970
3829
+ },
3830
+ {
3831
+ "epoch": 173.04347826086956,
3832
+ "grad_norm": 1.940037727355957,
3833
+ "learning_rate": 2.5255708375528436e-06,
3834
+ "loss": 0.2135,
3835
+ "step": 3980
3836
+ },
3837
+ {
3838
+ "epoch": 173.47826086956522,
3839
+ "grad_norm": 2.3687658309936523,
3840
+ "learning_rate": 2.525549063325822e-06,
3841
+ "loss": 0.2385,
3842
+ "step": 3990
3843
+ },
3844
+ {
3845
+ "epoch": 173.91304347826087,
3846
+ "grad_norm": 2.059690237045288,
3847
+ "learning_rate": 2.5255267886713553e-06,
3848
+ "loss": 0.2467,
3849
+ "step": 4000
3850
+ },
3851
+ {
3852
+ "epoch": 174.34782608695653,
3853
+ "grad_norm": 2.3199281692504883,
3854
+ "learning_rate": 2.5255040135982736e-06,
3855
+ "loss": 0.228,
3856
+ "step": 4010
3857
+ },
3858
+ {
3859
+ "epoch": 174.7826086956522,
3860
+ "grad_norm": 2.3488564491271973,
3861
+ "learning_rate": 2.5254807381156035e-06,
3862
+ "loss": 0.2294,
3863
+ "step": 4020
3864
+ },
3865
+ {
3866
+ "epoch": 175.2173913043478,
3867
+ "grad_norm": 1.9849615097045898,
3868
+ "learning_rate": 2.5254569622325704e-06,
3869
+ "loss": 0.2328,
3870
+ "step": 4030
3871
+ },
3872
+ {
3873
+ "epoch": 175.65217391304347,
3874
+ "grad_norm": 2.2598798274993896,
3875
+ "learning_rate": 2.525432685958599e-06,
3876
+ "loss": 0.2147,
3877
+ "step": 4040
3878
+ },
3879
+ {
3880
+ "epoch": 176.08695652173913,
3881
+ "grad_norm": 1.856614351272583,
3882
+ "learning_rate": 2.5254079093033117e-06,
3883
+ "loss": 0.2414,
3884
+ "step": 4050
3885
+ },
3886
+ {
3887
+ "epoch": 176.08695652173913,
3888
+ "eval_loss": 0.9707199335098267,
3889
+ "eval_runtime": 0.472,
3890
+ "eval_samples_per_second": 21.186,
3891
+ "eval_steps_per_second": 21.186,
3892
+ "step": 4050
3893
+ },
3894
+ {
3895
+ "Start_State_loss": 0.8609819412231445,
3896
+ "Start_State_runtime": 0.4561,
3897
+ "Start_State_samples_per_second": 21.924,
3898
+ "Start_State_steps_per_second": 21.924,
3899
+ "epoch": 176.08695652173913,
3900
+ "step": 4050
3901
+ },
3902
+ {
3903
+ "Raw_Model_loss": 0.9707199335098267,
3904
+ "Raw_Model_runtime": 0.3939,
3905
+ "Raw_Model_samples_per_second": 25.385,
3906
+ "Raw_Model_steps_per_second": 25.385,
3907
+ "epoch": 176.08695652173913,
3908
+ "step": 4050
3909
+ },
3910
+ {
3911
+ "SWA_loss": 0.8006541132926941,
3912
+ "SWA_runtime": 0.3999,
3913
+ "SWA_samples_per_second": 25.007,
3914
+ "SWA_steps_per_second": 25.007,
3915
+ "epoch": 176.08695652173913,
3916
+ "step": 4050
3917
+ },
3918
+ {
3919
+ "EMA_loss": 0.8589029312133789,
3920
+ "EMA_runtime": 0.3915,
3921
+ "EMA_samples_per_second": 25.545,
3922
+ "EMA_steps_per_second": 25.545,
3923
+ "epoch": 176.08695652173913,
3924
+ "step": 4050
3925
  }
3926
  ],
3927
  "logging_steps": 10,
 
3941
  "attributes": {}
3942
  }
3943
  },
3944
+ "total_flos": 1.0440404596622131e+17,
3945
  "train_batch_size": 4,
3946
  "trial_name": null,
3947
  "trial_params": null