mgh6 commited on
Commit
3305d70
·
verified ·
1 Parent(s): 4e2accc

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:843ffea3e47027a7327b46056614528e573a8eb208925c13ef01de733d872085
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eae45eb43651c4ce612c5b264270a3ccdfbc48e1be2784320e0059c614c3cab
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020e3cbb17c3204164f195677f3b07302a30bdd875a5e4274d98f682a414c00e
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36970303513d3e205403c36051106bf22e33ef86f3a1e71a2f1e2cba961b8110
3
  size 5365108834
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9972da412683217d3e7b5c8b7b27bb7cb54e37fcb06d0959653aa9cad5d36fc8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f4312d4eb4a3834512b8e6a5f558f7335f936ed9768ab54b18216e62eb5a7d3
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64edb59ac4e53d4505685902ba836e67456c610161bcc738cae4fc6ba12a85d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ae4134e19f55d5a540bad8977ebfa7de23a5f70c51215224d0742bb2666b1a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e05485df9c0772c57db6278171bd1d12be10e5f20dbf942e364c40f5fbd3287d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e287e6f80aed910a1d4cb01fb428361df3b7e62045921fccfd519aab7f20c2e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdab421c47fae8409d29d61cb7a02864fe4a42719ec643482d144bf7b2ce3282
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006e670f373067b7e226643b8cade6148c320aff0b769e7d1532179c7f45b76a
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c4950c64cff23a8cf10836c8406c5d9f7e6c7ef15fb647d3bd7f359bce3314c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214df0e2d0c96471516754f237b8e237791d4cac9a44207b49ae1586ecbb810a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4221377680185069,
5
  "eval_steps": 500,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -777,6 +777,76 @@
777
  "learning_rate": 5.7783236106846794e-05,
778
  "loss": 0.9085,
779
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  }
781
  ],
782
  "logging_steps": 50,
@@ -796,7 +866,7 @@
796
  "attributes": {}
797
  }
798
  },
799
- "total_flos": 9.426997587409371e+18,
800
  "train_batch_size": 2,
801
  "trial_name": null,
802
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.46051392874746205,
5
  "eval_steps": 500,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
777
  "learning_rate": 5.7783236106846794e-05,
778
  "loss": 0.9085,
779
  "step": 5500
780
+ },
781
+ {
782
+ "epoch": 0.4259753840914024,
783
+ "grad_norm": 0.4861834645271301,
784
+ "learning_rate": 5.739944734418177e-05,
785
+ "loss": 0.9067,
786
+ "step": 5550
787
+ },
788
+ {
789
+ "epoch": 0.42981300016429796,
790
+ "grad_norm": 0.24226143956184387,
791
+ "learning_rate": 5.7015658581516737e-05,
792
+ "loss": 0.9066,
793
+ "step": 5600
794
+ },
795
+ {
796
+ "epoch": 0.4336506162371935,
797
+ "grad_norm": 0.2108086198568344,
798
+ "learning_rate": 5.6631869818851705e-05,
799
+ "loss": 0.9061,
800
+ "step": 5650
801
+ },
802
+ {
803
+ "epoch": 0.437488232310089,
804
+ "grad_norm": 0.7616965770721436,
805
+ "learning_rate": 5.624808105618667e-05,
806
+ "loss": 0.9041,
807
+ "step": 5700
808
+ },
809
+ {
810
+ "epoch": 0.4413258483829845,
811
+ "grad_norm": 0.3760414719581604,
812
+ "learning_rate": 5.586429229352165e-05,
813
+ "loss": 0.9035,
814
+ "step": 5750
815
+ },
816
+ {
817
+ "epoch": 0.44516346445588,
818
+ "grad_norm": 0.4564415216445923,
819
+ "learning_rate": 5.548050353085662e-05,
820
+ "loss": 0.902,
821
+ "step": 5800
822
+ },
823
+ {
824
+ "epoch": 0.4490010805287755,
825
+ "grad_norm": 0.803648054599762,
826
+ "learning_rate": 5.509671476819159e-05,
827
+ "loss": 0.9011,
828
+ "step": 5850
829
+ },
830
+ {
831
+ "epoch": 0.45283869660167103,
832
+ "grad_norm": 0.7869254350662231,
833
+ "learning_rate": 5.4712926005526565e-05,
834
+ "loss": 0.9007,
835
+ "step": 5900
836
+ },
837
+ {
838
+ "epoch": 0.45667631267456654,
839
+ "grad_norm": 0.8484482765197754,
840
+ "learning_rate": 5.4329137242861526e-05,
841
+ "loss": 0.902,
842
+ "step": 5950
843
+ },
844
+ {
845
+ "epoch": 0.46051392874746205,
846
+ "grad_norm": 0.4946975111961365,
847
+ "learning_rate": 5.39453484801965e-05,
848
+ "loss": 0.8968,
849
+ "step": 6000
850
  }
851
  ],
852
  "logging_steps": 50,
 
866
  "attributes": {}
867
  }
868
  },
869
+ "total_flos": 1.0284708509245243e+19,
870
  "train_batch_size": 2,
871
  "trial_name": null,
872
  "trial_params": null