FormlessAI commited on
Commit
007d450
·
verified ·
1 Parent(s): 0644ab3

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0434ba551e7d0b16b8b3b46f2ed6444d23030597c313804b6fd83ef06e7ab21a
3
  size 515926240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047ccf1116a27aef838b2cf5df06c84e4bb47355dadf04bbc7c769c648c695a1
3
  size 515926240
last-checkpoint/global_step3750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00fcb4ad728e6db2e89317543a69ff85e21e32f4dee2b06bd7b6a430f5d14f5
3
+ size 388949733
last-checkpoint/global_step3750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5169d2293afafcbaf979e1dd49480bfd6c4e0d6f20b335853e598bca09cd960c
3
+ size 388949797
last-checkpoint/global_step3750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecafa13a717e6ff9e3063143aa1ed4e5d14ace6573e8c9bb5789db122bcd5d64
3
+ size 388949797
last-checkpoint/global_step3750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442c8069b48d7cc14e8386947976f9cff945d8ea06ed46ad502dc307465626d3
3
+ size 388949797
last-checkpoint/global_step3750/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc16391daa13a5806f041f469a67b9568f43248f9b6a0589ed7b8453b261f48
3
+ size 982830693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3700
 
1
+ global_step3750
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8c5a54cb456ea3cfaf49d40f30c6908d8a3542dc9aeb1fb231f5c2103ea449
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:299c5961b9e338d8364f29ced7a7fb3e75f50e59821688a9e8a25b3194dbb538
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e30baff3e5d30f9edf3a07ba4fcc69de82d3af1390d23884cedf77ba6a92df
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f719324819b1361b02477a54a0118529d0dd0ce09bbe595cfcd2887f984dce1b
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe660ad5a8aac3be2216d2e12da56a405374c8950e71edcf0dca08a39439b73c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcbfd45a4fefb05cc24a437f3cd43b41e4d635d02a5f6cceac023d5107bddf30
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c1fd64b0dd77456239c504b6e3b5c60505cb8731cd9d5c428f69184a68fb76
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d63c70c685f3c487dd218b7cdb050acfe8f45710a539363f79ab3d74af9dbe6
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bac0fd0463c8aaf6856f39551c2a213499d2ff9cdcf42f61668f14da010a2d5
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d61bfc2f268866ccd39b04a4ca0145d716056cc446969bdbb0838734542c317
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.7134996056556702,
4
  "best_model_checkpoint": null,
5
- "epoch": 8.240534521158128,
6
  "eval_steps": 50,
7
- "global_step": 3700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5780,6 +5780,84 @@
5780
  "eval_samples_per_second": 102.703,
5781
  "eval_steps_per_second": 12.883,
5782
  "step": 3700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5783
  }
5784
  ],
5785
  "logging_steps": 5,
@@ -5808,7 +5886,7 @@
5808
  "attributes": {}
5809
  }
5810
  },
5811
- "total_flos": 1.6660173065069527e+18,
5812
  "train_batch_size": 2,
5813
  "trial_name": null,
5814
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.7094199061393738,
4
  "best_model_checkpoint": null,
5
+ "epoch": 8.351893095768375,
6
  "eval_steps": 50,
7
+ "global_step": 3750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5780
  "eval_samples_per_second": 102.703,
5781
  "eval_steps_per_second": 12.883,
5782
  "step": 3700
5783
+ },
5784
+ {
5785
+ "epoch": 8.251670378619155,
5786
+ "grad_norm": 1.1108819246292114,
5787
+ "learning_rate": 1.218548828243257e-05,
5788
+ "loss": 0.6861,
5789
+ "step": 3705
5790
+ },
5791
+ {
5792
+ "epoch": 8.262806236080179,
5793
+ "grad_norm": 1.2709901332855225,
5794
+ "learning_rate": 1.2173041072641407e-05,
5795
+ "loss": 0.6582,
5796
+ "step": 3710
5797
+ },
5798
+ {
5799
+ "epoch": 8.273942093541203,
5800
+ "grad_norm": 1.2328144311904907,
5801
+ "learning_rate": 1.2160584908401693e-05,
5802
+ "loss": 0.7973,
5803
+ "step": 3715
5804
+ },
5805
+ {
5806
+ "epoch": 8.285077951002227,
5807
+ "grad_norm": 1.4351321458816528,
5808
+ "learning_rate": 1.2148119821071803e-05,
5809
+ "loss": 0.7226,
5810
+ "step": 3720
5811
+ },
5812
+ {
5813
+ "epoch": 8.296213808463252,
5814
+ "grad_norm": 1.3934727907180786,
5815
+ "learning_rate": 1.2135645842032582e-05,
5816
+ "loss": 0.6999,
5817
+ "step": 3725
5818
+ },
5819
+ {
5820
+ "epoch": 8.307349665924276,
5821
+ "grad_norm": 1.5040556192398071,
5822
+ "learning_rate": 1.2123163002687258e-05,
5823
+ "loss": 0.7128,
5824
+ "step": 3730
5825
+ },
5826
+ {
5827
+ "epoch": 8.3184855233853,
5828
+ "grad_norm": 1.6388317346572876,
5829
+ "learning_rate": 1.2110671334461362e-05,
5830
+ "loss": 0.7352,
5831
+ "step": 3735
5832
+ },
5833
+ {
5834
+ "epoch": 8.329621380846325,
5835
+ "grad_norm": 1.1943713426589966,
5836
+ "learning_rate": 1.2098170868802653e-05,
5837
+ "loss": 0.7112,
5838
+ "step": 3740
5839
+ },
5840
+ {
5841
+ "epoch": 8.340757238307349,
5842
+ "grad_norm": 1.1306072473526,
5843
+ "learning_rate": 1.208566163718104e-05,
5844
+ "loss": 0.6745,
5845
+ "step": 3745
5846
+ },
5847
+ {
5848
+ "epoch": 8.351893095768375,
5849
+ "grad_norm": 1.227961540222168,
5850
+ "learning_rate": 1.2073143671088499e-05,
5851
+ "loss": 0.7776,
5852
+ "step": 3750
5853
+ },
5854
+ {
5855
+ "epoch": 8.351893095768375,
5856
+ "eval_loss": 0.7094199061393738,
5857
+ "eval_runtime": 8.4253,
5858
+ "eval_samples_per_second": 102.193,
5859
+ "eval_steps_per_second": 12.819,
5860
+ "step": 3750
5861
  }
5862
  ],
5863
  "logging_steps": 5,
 
5886
  "attributes": {}
5887
  }
5888
  },
5889
+ "total_flos": 1.688518153497215e+18,
5890
  "train_batch_size": 2,
5891
  "trial_name": null,
5892
  "trial_params": null