irishprancer commited on
Commit
e579eb9
·
verified ·
1 Parent(s): f704603

Training in progress, step 3900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f2359775ec058105a768a27e0aec2fd7b09c0fef450becc3fea6a2140d5551
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:460e8e8695016d7fb3fe1981d689ac748c31bfbaa2f5af7cb550b10ae4a2bdf6
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22b0e07d88b69f37af8463bb1ac2f6ff8e912db26c18c2ee123c3a1948596d38
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f6dd162a8f76ef782100cbd4cc1329e64448d85f627b9bf81ba66e937483b4c
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da0e93581e91c352d5ee493f505f8757c94a31fb5b16f71a9d85577535431525
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488cbcf14a0f8a4794af20845693e7c92cc4e0193e27f71cfb8b870a8f1fae2d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30b7c8be324c8b4289d82c59d6cbd2a46df58415895691106518590654dd09ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87b12dd89c08406ca001c2f9dadca3ff34969fd8915b971b04b981fc47351e8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 163.04347826086956,
5
  "eval_steps": 150,
6
- "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3632,6 +3632,151 @@
3632
  "EMA_steps_per_second": 25.904,
3633
  "epoch": 163.04347826086956,
3634
  "step": 3750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3635
  }
3636
  ],
3637
  "logging_steps": 10,
@@ -3651,7 +3796,7 @@
3651
  "attributes": {}
3652
  }
3653
  },
3654
- "total_flos": 9.668631592798618e+16,
3655
  "train_batch_size": 4,
3656
  "trial_name": null,
3657
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 169.56521739130434,
5
  "eval_steps": 150,
6
+ "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3632
  "EMA_steps_per_second": 25.904,
3633
  "epoch": 163.04347826086956,
3634
  "step": 3750
3635
+ },
3636
+ {
3637
+ "epoch": 163.47826086956522,
3638
+ "grad_norm": 2.0019257068634033,
3639
+ "learning_rate": 2.5259232470433596e-06,
3640
+ "loss": 0.2364,
3641
+ "step": 3760
3642
+ },
3643
+ {
3644
+ "epoch": 163.91304347826087,
3645
+ "grad_norm": 2.179541826248169,
3646
+ "learning_rate": 2.525912484052346e-06,
3647
+ "loss": 0.2379,
3648
+ "step": 3770
3649
+ },
3650
+ {
3651
+ "epoch": 164.34782608695653,
3652
+ "grad_norm": 1.2639409303665161,
3653
+ "learning_rate": 2.5259012204898387e-06,
3654
+ "loss": 0.2234,
3655
+ "step": 3780
3656
+ },
3657
+ {
3658
+ "epoch": 164.7826086956522,
3659
+ "grad_norm": 2.0140252113342285,
3660
+ "learning_rate": 2.5258894563603038e-06,
3661
+ "loss": 0.2327,
3662
+ "step": 3790
3663
+ },
3664
+ {
3665
+ "epoch": 165.2173913043478,
3666
+ "grad_norm": 2.1268043518066406,
3667
+ "learning_rate": 2.525877191668403e-06,
3668
+ "loss": 0.2606,
3669
+ "step": 3800
3670
+ },
3671
+ {
3672
+ "epoch": 165.65217391304347,
3673
+ "grad_norm": 1.9044567346572876,
3674
+ "learning_rate": 2.525864426418998e-06,
3675
+ "loss": 0.2372,
3676
+ "step": 3810
3677
+ },
3678
+ {
3679
+ "epoch": 166.08695652173913,
3680
+ "grad_norm": 2.487705945968628,
3681
+ "learning_rate": 2.5258511606171487e-06,
3682
+ "loss": 0.1812,
3683
+ "step": 3820
3684
+ },
3685
+ {
3686
+ "epoch": 166.52173913043478,
3687
+ "grad_norm": 2.743502616882324,
3688
+ "learning_rate": 2.525837394268113e-06,
3689
+ "loss": 0.2569,
3690
+ "step": 3830
3691
+ },
3692
+ {
3693
+ "epoch": 166.95652173913044,
3694
+ "grad_norm": 1.6789222955703735,
3695
+ "learning_rate": 2.5258231273773474e-06,
3696
+ "loss": 0.2355,
3697
+ "step": 3840
3698
+ },
3699
+ {
3700
+ "epoch": 167.3913043478261,
3701
+ "grad_norm": 2.9776694774627686,
3702
+ "learning_rate": 2.525808359950507e-06,
3703
+ "loss": 0.2192,
3704
+ "step": 3850
3705
+ },
3706
+ {
3707
+ "epoch": 167.82608695652175,
3708
+ "grad_norm": 3.056367874145508,
3709
+ "learning_rate": 2.525793091993445e-06,
3710
+ "loss": 0.2178,
3711
+ "step": 3860
3712
+ },
3713
+ {
3714
+ "epoch": 168.2608695652174,
3715
+ "grad_norm": 2.032916307449341,
3716
+ "learning_rate": 2.5257773235122136e-06,
3717
+ "loss": 0.2873,
3718
+ "step": 3870
3719
+ },
3720
+ {
3721
+ "epoch": 168.69565217391303,
3722
+ "grad_norm": 1.8345348834991455,
3723
+ "learning_rate": 2.5257610545130627e-06,
3724
+ "loss": 0.2126,
3725
+ "step": 3880
3726
+ },
3727
+ {
3728
+ "epoch": 169.1304347826087,
3729
+ "grad_norm": 2.1053850650787354,
3730
+ "learning_rate": 2.52574428500244e-06,
3731
+ "loss": 0.1979,
3732
+ "step": 3890
3733
+ },
3734
+ {
3735
+ "epoch": 169.56521739130434,
3736
+ "grad_norm": 1.8270847797393799,
3737
+ "learning_rate": 2.525727014986994e-06,
3738
+ "loss": 0.245,
3739
+ "step": 3900
3740
+ },
3741
+ {
3742
+ "epoch": 169.56521739130434,
3743
+ "eval_loss": 0.9649088978767395,
3744
+ "eval_runtime": 0.4578,
3745
+ "eval_samples_per_second": 21.844,
3746
+ "eval_steps_per_second": 21.844,
3747
+ "step": 3900
3748
+ },
3749
+ {
3750
+ "Start_State_loss": 0.8609819412231445,
3751
+ "Start_State_runtime": 0.3852,
3752
+ "Start_State_samples_per_second": 25.962,
3753
+ "Start_State_steps_per_second": 25.962,
3754
+ "epoch": 169.56521739130434,
3755
+ "step": 3900
3756
+ },
3757
+ {
3758
+ "Raw_Model_loss": 0.9649088978767395,
3759
+ "Raw_Model_runtime": 0.3849,
3760
+ "Raw_Model_samples_per_second": 25.982,
3761
+ "Raw_Model_steps_per_second": 25.982,
3762
+ "epoch": 169.56521739130434,
3763
+ "step": 3900
3764
+ },
3765
+ {
3766
+ "SWA_loss": 0.7992776036262512,
3767
+ "SWA_runtime": 0.386,
3768
+ "SWA_samples_per_second": 25.91,
3769
+ "SWA_steps_per_second": 25.91,
3770
+ "epoch": 169.56521739130434,
3771
+ "step": 3900
3772
+ },
3773
+ {
3774
+ "EMA_loss": 0.8593554496765137,
3775
+ "EMA_runtime": 0.3885,
3776
+ "EMA_samples_per_second": 25.739,
3777
+ "EMA_steps_per_second": 25.739,
3778
+ "epoch": 169.56521739130434,
3779
+ "step": 3900
3780
  }
3781
  ],
3782
  "logging_steps": 10,
 
3796
  "attributes": {}
3797
  }
3798
  },
3799
+ "total_flos": 1.0061040993472512e+17,
3800
  "train_batch_size": 4,
3801
  "trial_name": null,
3802
  "trial_params": null