irishprancer commited on
Commit
2eb98e3
·
verified ·
1 Parent(s): 744fe63

Training in progress, step 3900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7586d4afd9fadcaa6083460f8f6841b5a702dba00cbd480cc156933cd79c41b2
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f298dd0a7b849bb55a6d5cb7290507c8064f9b006406a664a429001d19e0e48c
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:474241a6101d37df838210445f01853e23b11101c165e76bd69cf2cda41699a2
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6668a3d3adb3a86f62c6ef423882ea9f2f598f32ad5045f34c14199b38fb689f
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da0e93581e91c352d5ee493f505f8757c94a31fb5b16f71a9d85577535431525
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488cbcf14a0f8a4794af20845693e7c92cc4e0193e27f71cfb8b870a8f1fae2d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cea37f66b9478389c6f1b54e20d4b883ef028f78a1a1497fe4ee340f7d291f09
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d162883f1a66ee54c1f60afa7c5bed405d62515f1f02173124202a7368b03a7d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 163.04347826086956,
5
  "eval_steps": 150,
6
- "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3632,6 +3632,151 @@
3632
  "EMA_steps_per_second": 23.833,
3633
  "epoch": 163.04347826086956,
3634
  "step": 3750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3635
  }
3636
  ],
3637
  "logging_steps": 10,
@@ -3651,7 +3796,7 @@
3651
  "attributes": {}
3652
  }
3653
  },
3654
- "total_flos": 9.668631592798618e+16,
3655
  "train_batch_size": 4,
3656
  "trial_name": null,
3657
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 169.56521739130434,
5
  "eval_steps": 150,
6
+ "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3632
  "EMA_steps_per_second": 23.833,
3633
  "epoch": 163.04347826086956,
3634
  "step": 3750
3635
+ },
3636
+ {
3637
+ "epoch": 163.47826086956522,
3638
+ "grad_norm": 1.9908124208450317,
3639
+ "learning_rate": 2.5138358813815557e-06,
3640
+ "loss": 0.2365,
3641
+ "step": 3760
3642
+ },
3643
+ {
3644
+ "epoch": 163.91304347826087,
3645
+ "grad_norm": 2.154146194458008,
3646
+ "learning_rate": 2.5138251698949603e-06,
3647
+ "loss": 0.2379,
3648
+ "step": 3770
3649
+ },
3650
+ {
3651
+ "epoch": 164.34782608695653,
3652
+ "grad_norm": 1.2993087768554688,
3653
+ "learning_rate": 2.5138139602322698e-06,
3654
+ "loss": 0.2233,
3655
+ "step": 3780
3656
+ },
3657
+ {
3658
+ "epoch": 164.7826086956522,
3659
+ "grad_norm": 2.012446641921997,
3660
+ "learning_rate": 2.513802252397927e-06,
3661
+ "loss": 0.2327,
3662
+ "step": 3790
3663
+ },
3664
+ {
3665
+ "epoch": 165.2173913043478,
3666
+ "grad_norm": 2.131314277648926,
3667
+ "learning_rate": 2.513790046396573e-06,
3668
+ "loss": 0.261,
3669
+ "step": 3800
3670
+ },
3671
+ {
3672
+ "epoch": 165.65217391304347,
3673
+ "grad_norm": 1.9400966167449951,
3674
+ "learning_rate": 2.5137773422330448e-06,
3675
+ "loss": 0.2375,
3676
+ "step": 3810
3677
+ },
3678
+ {
3679
+ "epoch": 166.08695652173913,
3680
+ "grad_norm": 2.3741443157196045,
3681
+ "learning_rate": 2.5137641399123794e-06,
3682
+ "loss": 0.1815,
3683
+ "step": 3820
3684
+ },
3685
+ {
3686
+ "epoch": 166.52173913043478,
3687
+ "grad_norm": 2.0041890144348145,
3688
+ "learning_rate": 2.5137504394398086e-06,
3689
+ "loss": 0.2567,
3690
+ "step": 3830
3691
+ },
3692
+ {
3693
+ "epoch": 166.95652173913044,
3694
+ "grad_norm": 1.6475896835327148,
3695
+ "learning_rate": 2.5137362408207634e-06,
3696
+ "loss": 0.2357,
3697
+ "step": 3840
3698
+ },
3699
+ {
3700
+ "epoch": 167.3913043478261,
3701
+ "grad_norm": 2.971357583999634,
3702
+ "learning_rate": 2.5137215440608716e-06,
3703
+ "loss": 0.2192,
3704
+ "step": 3850
3705
+ },
3706
+ {
3707
+ "epoch": 167.82608695652175,
3708
+ "grad_norm": 3.0954818725585938,
3709
+ "learning_rate": 2.5137063491659585e-06,
3710
+ "loss": 0.218,
3711
+ "step": 3860
3712
+ },
3713
+ {
3714
+ "epoch": 168.2608695652174,
3715
+ "grad_norm": 2.0101890563964844,
3716
+ "learning_rate": 2.513690656142047e-06,
3717
+ "loss": 0.2871,
3718
+ "step": 3870
3719
+ },
3720
+ {
3721
+ "epoch": 168.69565217391303,
3722
+ "grad_norm": 1.8766013383865356,
3723
+ "learning_rate": 2.513674464995357e-06,
3724
+ "loss": 0.2125,
3725
+ "step": 3880
3726
+ },
3727
+ {
3728
+ "epoch": 169.1304347826087,
3729
+ "grad_norm": 2.223860025405884,
3730
+ "learning_rate": 2.5136577757323066e-06,
3731
+ "loss": 0.1979,
3732
+ "step": 3890
3733
+ },
3734
+ {
3735
+ "epoch": 169.56521739130434,
3736
+ "grad_norm": 1.864652395248413,
3737
+ "learning_rate": 2.5136405883595107e-06,
3738
+ "loss": 0.2454,
3739
+ "step": 3900
3740
+ },
3741
+ {
3742
+ "epoch": 169.56521739130434,
3743
+ "eval_loss": 0.967199444770813,
3744
+ "eval_runtime": 0.4027,
3745
+ "eval_samples_per_second": 24.832,
3746
+ "eval_steps_per_second": 24.832,
3747
+ "step": 3900
3748
+ },
3749
+ {
3750
+ "Start_State_loss": 0.861186683177948,
3751
+ "Start_State_runtime": 0.4115,
3752
+ "Start_State_samples_per_second": 24.298,
3753
+ "Start_State_steps_per_second": 24.298,
3754
+ "epoch": 169.56521739130434,
3755
+ "step": 3900
3756
+ },
3757
+ {
3758
+ "Raw_Model_loss": 0.967199444770813,
3759
+ "Raw_Model_runtime": 0.4276,
3760
+ "Raw_Model_samples_per_second": 23.385,
3761
+ "Raw_Model_steps_per_second": 23.385,
3762
+ "epoch": 169.56521739130434,
3763
+ "step": 3900
3764
+ },
3765
+ {
3766
+ "SWA_loss": 0.7983330488204956,
3767
+ "SWA_runtime": 0.4151,
3768
+ "SWA_samples_per_second": 24.09,
3769
+ "SWA_steps_per_second": 24.09,
3770
+ "epoch": 169.56521739130434,
3771
+ "step": 3900
3772
+ },
3773
+ {
3774
+ "EMA_loss": 0.8598569631576538,
3775
+ "EMA_runtime": 0.4151,
3776
+ "EMA_samples_per_second": 24.093,
3777
+ "EMA_steps_per_second": 24.093,
3778
+ "epoch": 169.56521739130434,
3779
+ "step": 3900
3780
  }
3781
  ],
3782
  "logging_steps": 10,
 
3796
  "attributes": {}
3797
  }
3798
  },
3799
+ "total_flos": 1.0061040993472512e+17,
3800
  "train_batch_size": 4,
3801
  "trial_name": null,
3802
  "trial_params": null