ToastyPigeon commited on
Commit
db5b858
·
verified ·
1 Parent(s): 99bf1af

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b3e41169f1e067e1d0fc5b433e28209599b88ec7a1f300f0d620bd5f1fea11
3
  size 1039483968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098e670d3e5545c295765450469b25f278e85309584bd5521b048fbfd0738e33
3
  size 1039483968
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b38c5653577dce8d43daffaa1d997a170a0534000c5adda1ce07181b3ae0588
3
  size 2079349513
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8c189ebc2276dbc5a8793affc7b015267813ce7ff1c1cecdb692a76ff78107
3
  size 2079349513
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3084904f376c934f6e1074299150e1257b4859fec0f98e9e5fc02fd4fd0c9969
3
  size 1039629811
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a1777f717103649abb1d286d70ad5ded7dcba9897f65a6565e80193aecd999f
3
  size 1039629811
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:441648197c9c043681f2314411742b150ffff3ca10b3a89fc2e21d86709ea05c
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccea8299800024b78e5d1ec043b4c36fae734e757f1a43a7a2255a29b414dc4f
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c4728894dd7fa940d14c3112093f8d6d68941114d14e3f2945f3d934cb2228f
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42978a3ff9e161a48be56df954484177012b3d081b4eedca7e2148205852aca3
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43af7aa6b99f5741e0bd6f3de2d6923856f8afd0580427a79e4f0846abad72ae
3
  size 1529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8993fe857a67f14a1c28a148c1bef4b188f7d1af8900ff5c015c57364dd79df
3
  size 1529
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.2025604551920341,
6
  "eval_steps": 88,
7
- "global_step": 528,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3760,6 +3760,160 @@
3760
  "eval_samples_per_second": 3.275,
3761
  "eval_steps_per_second": 0.819,
3762
  "step": 528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3763
  }
3764
  ],
3765
  "logging_steps": 1,
@@ -3779,7 +3933,7 @@
3779
  "attributes": {}
3780
  }
3781
  },
3782
- "total_flos": 1.4403370387041681e+19,
3783
  "train_batch_size": 2,
3784
  "trial_name": null,
3785
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.2526315789473683,
6
  "eval_steps": 88,
7
+ "global_step": 550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3760
  "eval_samples_per_second": 3.275,
3761
  "eval_steps_per_second": 0.819,
3762
  "step": 528
3763
+ },
3764
+ {
3765
+ "epoch": 1.2048364153627311,
3766
+ "grad_norm": 0.4168463349342346,
3767
+ "learning_rate": 8.964330256921927e-06,
3768
+ "loss": 0.4631,
3769
+ "step": 529
3770
+ },
3771
+ {
3772
+ "epoch": 1.207112375533428,
3773
+ "grad_norm": 0.8588109612464905,
3774
+ "learning_rate": 8.960000000000002e-06,
3775
+ "loss": 0.5207,
3776
+ "step": 530
3777
+ },
3778
+ {
3779
+ "epoch": 1.209388335704125,
3780
+ "grad_norm": 0.5915808081626892,
3781
+ "learning_rate": 8.955650212979206e-06,
3782
+ "loss": 0.5108,
3783
+ "step": 531
3784
+ },
3785
+ {
3786
+ "epoch": 1.2116642958748223,
3787
+ "grad_norm": 0.47360390424728394,
3788
+ "learning_rate": 8.951280763435461e-06,
3789
+ "loss": 0.5034,
3790
+ "step": 532
3791
+ },
3792
+ {
3793
+ "epoch": 1.2139402560455193,
3794
+ "grad_norm": 0.39106452465057373,
3795
+ "learning_rate": 8.946891517744779e-06,
3796
+ "loss": 0.5013,
3797
+ "step": 533
3798
+ },
3799
+ {
3800
+ "epoch": 1.2162162162162162,
3801
+ "grad_norm": 0.9861881136894226,
3802
+ "learning_rate": 8.942482341069628e-06,
3803
+ "loss": 0.4678,
3804
+ "step": 534
3805
+ },
3806
+ {
3807
+ "epoch": 1.2184921763869132,
3808
+ "grad_norm": 0.4086526036262512,
3809
+ "learning_rate": 8.938053097345134e-06,
3810
+ "loss": 0.4737,
3811
+ "step": 535
3812
+ },
3813
+ {
3814
+ "epoch": 1.2207681365576102,
3815
+ "grad_norm": 0.7135105729103088,
3816
+ "learning_rate": 8.93360364926508e-06,
3817
+ "loss": 0.4806,
3818
+ "step": 536
3819
+ },
3820
+ {
3821
+ "epoch": 1.2230440967283072,
3822
+ "grad_norm": 0.46862491965293884,
3823
+ "learning_rate": 8.929133858267717e-06,
3824
+ "loss": 0.5002,
3825
+ "step": 537
3826
+ },
3827
+ {
3828
+ "epoch": 1.2253200568990044,
3829
+ "grad_norm": 0.41207781434059143,
3830
+ "learning_rate": 8.924643584521386e-06,
3831
+ "loss": 0.4633,
3832
+ "step": 538
3833
+ },
3834
+ {
3835
+ "epoch": 1.2275960170697013,
3836
+ "grad_norm": 0.5379470586776733,
3837
+ "learning_rate": 8.920132686909926e-06,
3838
+ "loss": 0.5418,
3839
+ "step": 539
3840
+ },
3841
+ {
3842
+ "epoch": 1.2298719772403983,
3843
+ "grad_norm": 0.6593378782272339,
3844
+ "learning_rate": 8.915601023017903e-06,
3845
+ "loss": 0.5227,
3846
+ "step": 540
3847
+ },
3848
+ {
3849
+ "epoch": 1.2321479374110953,
3850
+ "grad_norm": 0.5798733234405518,
3851
+ "learning_rate": 8.911048449115613e-06,
3852
+ "loss": 0.4734,
3853
+ "step": 541
3854
+ },
3855
+ {
3856
+ "epoch": 1.2344238975817923,
3857
+ "grad_norm": 0.3299521803855896,
3858
+ "learning_rate": 8.906474820143887e-06,
3859
+ "loss": 0.4953,
3860
+ "step": 542
3861
+ },
3862
+ {
3863
+ "epoch": 1.2366998577524893,
3864
+ "grad_norm": 0.7939162850379944,
3865
+ "learning_rate": 8.901879989698689e-06,
3866
+ "loss": 0.5127,
3867
+ "step": 543
3868
+ },
3869
+ {
3870
+ "epoch": 1.2389758179231865,
3871
+ "grad_norm": 0.4086650311946869,
3872
+ "learning_rate": 8.897263810015489e-06,
3873
+ "loss": 0.5214,
3874
+ "step": 544
3875
+ },
3876
+ {
3877
+ "epoch": 1.2412517780938834,
3878
+ "grad_norm": 0.589647650718689,
3879
+ "learning_rate": 8.892626131953428e-06,
3880
+ "loss": 0.5008,
3881
+ "step": 545
3882
+ },
3883
+ {
3884
+ "epoch": 1.2435277382645804,
3885
+ "grad_norm": 0.3771437704563141,
3886
+ "learning_rate": 8.887966804979256e-06,
3887
+ "loss": 0.5281,
3888
+ "step": 546
3889
+ },
3890
+ {
3891
+ "epoch": 1.2458036984352774,
3892
+ "grad_norm": 1.033705472946167,
3893
+ "learning_rate": 8.883285677151027e-06,
3894
+ "loss": 0.5148,
3895
+ "step": 547
3896
+ },
3897
+ {
3898
+ "epoch": 1.2480796586059744,
3899
+ "grad_norm": 0.5085345506668091,
3900
+ "learning_rate": 8.878582595101617e-06,
3901
+ "loss": 0.5196,
3902
+ "step": 548
3903
+ },
3904
+ {
3905
+ "epoch": 1.2503556187766713,
3906
+ "grad_norm": 0.2777687609195709,
3907
+ "learning_rate": 8.873857404021936e-06,
3908
+ "loss": 0.509,
3909
+ "step": 549
3910
+ },
3911
+ {
3912
+ "epoch": 1.2526315789473683,
3913
+ "grad_norm": 0.5570908784866333,
3914
+ "learning_rate": 8.869109947643979e-06,
3915
+ "loss": 0.4756,
3916
+ "step": 550
3917
  }
3918
  ],
3919
  "logging_steps": 1,
 
3933
  "attributes": {}
3934
  }
3935
  },
3936
+ "total_flos": 1.5003226796869026e+19,
3937
  "train_batch_size": 2,
3938
  "trial_name": null,
3939
  "trial_params": null