FormlessAI commited on
Commit
3b1e48a
·
verified ·
1 Parent(s): e0b72f3

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3d3618a6549bfada4acfab50184a00e8169ef527f9fd2d6299b8877447758cb
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31cb8fd392f2131e4bf934e5cfc6c725cef9d5daaa800fec946c0c6bb095c87
3
  size 98088784
last-checkpoint/global_step3151/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19fed4d1e68e884c33108eb4a6d195216062c7261df3be3f1284ee1dfa7f661b
3
+ size 73939813
last-checkpoint/global_step3151/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7c11b224504d4b7ce92dcd86eeac367b83fed77a084e0ca74be6e3f15c087c
3
+ size 73939813
last-checkpoint/global_step3151/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f041e6e4c2fcd5847cb26f89b7e393ed9654a02c6c3e1e64c356157c74568f
3
+ size 73939877
last-checkpoint/global_step3151/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864c9d0a4a6a6df9e106685e32c07abf56966577d2f2ac54cfcdbefffe092ac4
3
+ size 73939877
last-checkpoint/global_step3151/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01fe20a677165e10008e664a981608bcd879f3dd7e16388edee6794e047fb6c8
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3101
 
1
+ global_step3151
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9f2639515e9a4dbb73442f973fea9b0c88c20b795038a946c56e0e2493ca27
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6d22752bf6918604c195c68c726c5f986752b1c7745bc126a895fa5378f563
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecc1dea28e987710088b449c94c516228a22c7362dc59c8c30f086a46639bae7
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b418cf94798b0605942c9f00109a5edef50c6b4d7c3fa15f7ef19ea7a3218590
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5887804b4b795f7a573283c1b112d762af0778415a90f4dd264b13d8ce6b6e73
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea4486e4c7872a12462ac87ce32aa65944ed660174e541883f84fbec5d08986
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d19ad5422c32a302e30ca2b15abb40a89c5c32a5565cad1734400f2ab8b8c060
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f803476ab2d8569ffcfe385fec2e225f971e23d78b9f9bb3450d35d825e9ea35
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40dddd77817570d823fe2af41de3280b2532f2b28178ec1ba2cdd4222f46c420
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b27670a16b804d5788140685f115d1185aa03128bfff2551a4cdbedd7bc74b
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.620843768119812,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.039191671769749,
6
  "eval_steps": 50,
7
- "global_step": 3100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4844,6 +4844,84 @@
4844
  "eval_samples_per_second": 127.13,
4845
  "eval_steps_per_second": 15.899,
4846
  "step": 3100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4847
  }
4848
  ],
4849
  "logging_steps": 5,
@@ -4872,7 +4950,7 @@
4872
  "attributes": {}
4873
  }
4874
  },
4875
- "total_flos": 1.5987517748506788e+18,
4876
  "train_batch_size": 2,
4877
  "trial_name": null,
4878
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6195825934410095,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0881812614819353,
6
  "eval_steps": 50,
7
+ "global_step": 3150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4844
  "eval_samples_per_second": 127.13,
4845
  "eval_steps_per_second": 15.899,
4846
  "step": 3100
4847
+ },
4848
+ {
4849
+ "epoch": 3.0440906307409676,
4850
+ "grad_norm": 0.20413470268249512,
4851
+ "learning_rate": 3.6132616490418216e-05,
4852
+ "loss": 0.652,
4853
+ "step": 3105
4854
+ },
4855
+ {
4856
+ "epoch": 3.0489895897121864,
4857
+ "grad_norm": 0.23027652502059937,
4858
+ "learning_rate": 3.5966209323531355e-05,
4859
+ "loss": 0.6431,
4860
+ "step": 3110
4861
+ },
4862
+ {
4863
+ "epoch": 3.0538885486834046,
4864
+ "grad_norm": 0.24030309915542603,
4865
+ "learning_rate": 3.5800001854923e-05,
4866
+ "loss": 0.6301,
4867
+ "step": 3115
4868
+ },
4869
+ {
4870
+ "epoch": 3.0587875076546234,
4871
+ "grad_norm": 0.24044173955917358,
4872
+ "learning_rate": 3.563399579263203e-05,
4873
+ "loss": 0.6547,
4874
+ "step": 3120
4875
+ },
4876
+ {
4877
+ "epoch": 3.063686466625842,
4878
+ "grad_norm": 0.2295493483543396,
4879
+ "learning_rate": 3.546819284262758e-05,
4880
+ "loss": 0.6483,
4881
+ "step": 3125
4882
+ },
4883
+ {
4884
+ "epoch": 3.068585425597061,
4885
+ "grad_norm": 0.2551316022872925,
4886
+ "learning_rate": 3.53025947087915e-05,
4887
+ "loss": 0.6436,
4888
+ "step": 3130
4889
+ },
4890
+ {
4891
+ "epoch": 3.073484384568279,
4892
+ "grad_norm": 0.22237151861190796,
4893
+ "learning_rate": 3.513720309290082e-05,
4894
+ "loss": 0.638,
4895
+ "step": 3135
4896
+ },
4897
+ {
4898
+ "epoch": 3.078383343539498,
4899
+ "grad_norm": 0.20863863825798035,
4900
+ "learning_rate": 3.497201969461032e-05,
4901
+ "loss": 0.6661,
4902
+ "step": 3140
4903
+ },
4904
+ {
4905
+ "epoch": 3.0832823025107166,
4906
+ "grad_norm": 0.25886034965515137,
4907
+ "learning_rate": 3.480704621143496e-05,
4908
+ "loss": 0.6538,
4909
+ "step": 3145
4910
+ },
4911
+ {
4912
+ "epoch": 3.0881812614819353,
4913
+ "grad_norm": 0.24331150949001312,
4914
+ "learning_rate": 3.464228433873255e-05,
4915
+ "loss": 0.6569,
4916
+ "step": 3150
4917
+ },
4918
+ {
4919
+ "epoch": 3.0881812614819353,
4920
+ "eval_loss": 0.6195825934410095,
4921
+ "eval_runtime": 15.4247,
4922
+ "eval_samples_per_second": 127.004,
4923
+ "eval_steps_per_second": 15.884,
4924
+ "step": 3150
4925
  }
4926
  ],
4927
  "logging_steps": 5,
 
4950
  "attributes": {}
4951
  }
4952
  },
4953
+ "total_flos": 1.6246141644313723e+18,
4954
  "train_batch_size": 2,
4955
  "trial_name": null,
4956
  "trial_params": null