FormlessAI commited on
Commit
c5cf584
·
verified ·
1 Parent(s): 9e59d17

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0217b4b0c3c7f1987944f70686eb3cc84294e0febf0ed767a56782cb9017db42
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0a21dc5f4f9acf3af3f8980785056b6b5ada5cb15eef4540db5bca39c790390
3
  size 1037269336
last-checkpoint/global_step4500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d31ac560812e943c8c288747e5d2039a96a47afb0abcb9be2e38f7662b5aa10
3
+ size 781993445
last-checkpoint/global_step4500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da41387f3470e28461291b4767a43d23bd0fe5626bc769fdd43568a69ed5c8d
3
+ size 781993509
last-checkpoint/global_step4500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0c7d889d827a14f364818784bf4f3e8f06fc8163b92def019bbd3d708ff6ee
3
+ size 781993509
last-checkpoint/global_step4500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7da489d965d3264064e452ec2b0f1161d9926dfe1c6aab8ef3eb9c5e3f77e8c
3
+ size 781993509
last-checkpoint/global_step4500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c5f490e1ca842d837adac5ce91ab10507f4511448a022fde5f577639115024
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step4400
 
1
+ global_step4500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec6429d51b78e62a781ea28a18634f451844f66fee400b9be20b2072a6fac5e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3630f5c7f559df2743db6022b9a9e3a578f1caa3a824d427deb7eb53b5753113
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e3e04848cc38a3a002981db4be3e84294dc9e5c12327b6e3c23b02534523094
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a975041d5ccbda078ebb49cae6863f266b7176846aea763c1f5991e324beb6a
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a5a84f5b27ded1de3f5ceb77963092ac6c45b3bb6acfbc406627cbc633009a1
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e16d82cf7cd32b948d7f53723214355031cb0c2f352b62b817e45196b5c3bed
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b169b571920e7c4ea3cfadebde4b2c5412429683ec1e5c89095379be2aeec0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2faeda1cf20088c59a4c59ca63cd8875d237d2179a7055592aef1e315f61c7ea
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f6239c54a9e14ade75dd1dbb72d423d68c7c1273e9d5fb21d6effe590197848
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e70fa6096403ae68870c39096182c6dd70befee0d4111f312991f4b6364fbfa
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.9043115377426147,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6396278528855939,
6
  "eval_steps": 50,
7
- "global_step": 4400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6872,6 +6872,162 @@
6872
  "eval_samples_per_second": 174.461,
6873
  "eval_steps_per_second": 10.94,
6874
  "step": 4400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6875
  }
6876
  ],
6877
  "logging_steps": 5,
@@ -6900,7 +7056,7 @@
6900
  "attributes": {}
6901
  }
6902
  },
6903
- "total_flos": 1.1471040004625531e+18,
6904
  "train_batch_size": 4,
6905
  "trial_name": null,
6906
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.89194917678833,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6541648495420846,
6
  "eval_steps": 50,
7
+ "global_step": 4500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6872
  "eval_samples_per_second": 174.461,
6873
  "eval_steps_per_second": 10.94,
6874
  "step": 4400
6875
+ },
6876
+ {
6877
+ "epoch": 0.6403547027184183,
6878
+ "grad_norm": 2.5073940753936768,
6879
+ "learning_rate": 6.029659568988577e-05,
6880
+ "loss": 2.1659,
6881
+ "step": 4405
6882
+ },
6883
+ {
6884
+ "epoch": 0.6410815525512429,
6885
+ "grad_norm": 2.4761996269226074,
6886
+ "learning_rate": 6.021875803667634e-05,
6887
+ "loss": 1.9834,
6888
+ "step": 4410
6889
+ },
6890
+ {
6891
+ "epoch": 0.6418084023840674,
6892
+ "grad_norm": 2.326512336730957,
6893
+ "learning_rate": 6.014089490539603e-05,
6894
+ "loss": 2.0396,
6895
+ "step": 4415
6896
+ },
6897
+ {
6898
+ "epoch": 0.642535252216892,
6899
+ "grad_norm": 2.1320672035217285,
6900
+ "learning_rate": 6.0063006492065156e-05,
6901
+ "loss": 2.1046,
6902
+ "step": 4420
6903
+ },
6904
+ {
6905
+ "epoch": 0.6432621020497166,
6906
+ "grad_norm": 2.4910004138946533,
6907
+ "learning_rate": 5.998509299276773e-05,
6908
+ "loss": 2.0383,
6909
+ "step": 4425
6910
+ },
6911
+ {
6912
+ "epoch": 0.643988951882541,
6913
+ "grad_norm": 2.5594482421875,
6914
+ "learning_rate": 5.990715460365091e-05,
6915
+ "loss": 1.9068,
6916
+ "step": 4430
6917
+ },
6918
+ {
6919
+ "epoch": 0.6447158017153656,
6920
+ "grad_norm": 2.314884662628174,
6921
+ "learning_rate": 5.9829191520924444e-05,
6922
+ "loss": 1.9658,
6923
+ "step": 4435
6924
+ },
6925
+ {
6926
+ "epoch": 0.6454426515481901,
6927
+ "grad_norm": 2.5968551635742188,
6928
+ "learning_rate": 5.975120394086035e-05,
6929
+ "loss": 2.0312,
6930
+ "step": 4440
6931
+ },
6932
+ {
6933
+ "epoch": 0.6461695013810147,
6934
+ "grad_norm": 2.324385166168213,
6935
+ "learning_rate": 5.967319205979226e-05,
6936
+ "loss": 1.9554,
6937
+ "step": 4445
6938
+ },
6939
+ {
6940
+ "epoch": 0.6468963512138393,
6941
+ "grad_norm": 2.4644150733947754,
6942
+ "learning_rate": 5.9595156074114964e-05,
6943
+ "loss": 2.1902,
6944
+ "step": 4450
6945
+ },
6946
+ {
6947
+ "epoch": 0.6468963512138393,
6948
+ "eval_loss": 1.904667854309082,
6949
+ "eval_runtime": 21.5453,
6950
+ "eval_samples_per_second": 153.212,
6951
+ "eval_steps_per_second": 9.608,
6952
+ "step": 4450
6953
+ },
6954
+ {
6955
+ "epoch": 0.6476232010466637,
6956
+ "grad_norm": 2.2570154666900635,
6957
+ "learning_rate": 5.9517096180283985e-05,
6958
+ "loss": 2.1495,
6959
+ "step": 4455
6960
+ },
6961
+ {
6962
+ "epoch": 0.6483500508794883,
6963
+ "grad_norm": 2.676832914352417,
6964
+ "learning_rate": 5.9439012574815014e-05,
6965
+ "loss": 2.1315,
6966
+ "step": 4460
6967
+ },
6968
+ {
6969
+ "epoch": 0.6490769007123128,
6970
+ "grad_norm": 2.706221103668213,
6971
+ "learning_rate": 5.9360905454283424e-05,
6972
+ "loss": 2.0839,
6973
+ "step": 4465
6974
+ },
6975
+ {
6976
+ "epoch": 0.6498037505451374,
6977
+ "grad_norm": 2.5857553482055664,
6978
+ "learning_rate": 5.92827750153238e-05,
6979
+ "loss": 2.006,
6980
+ "step": 4470
6981
+ },
6982
+ {
6983
+ "epoch": 0.650530600377962,
6984
+ "grad_norm": 2.223796844482422,
6985
+ "learning_rate": 5.9204621454629433e-05,
6986
+ "loss": 2.0653,
6987
+ "step": 4475
6988
+ },
6989
+ {
6990
+ "epoch": 0.6512574502107864,
6991
+ "grad_norm": 2.7300755977630615,
6992
+ "learning_rate": 5.9126444968951824e-05,
6993
+ "loss": 2.0131,
6994
+ "step": 4480
6995
+ },
6996
+ {
6997
+ "epoch": 0.651984300043611,
6998
+ "grad_norm": 2.2716171741485596,
6999
+ "learning_rate": 5.904824575510018e-05,
7000
+ "loss": 2.0303,
7001
+ "step": 4485
7002
+ },
7003
+ {
7004
+ "epoch": 0.6527111498764355,
7005
+ "grad_norm": 2.462047815322876,
7006
+ "learning_rate": 5.8970024009940926e-05,
7007
+ "loss": 1.9954,
7008
+ "step": 4490
7009
+ },
7010
+ {
7011
+ "epoch": 0.6534379997092601,
7012
+ "grad_norm": 2.3554959297180176,
7013
+ "learning_rate": 5.88917799303972e-05,
7014
+ "loss": 1.9951,
7015
+ "step": 4495
7016
+ },
7017
+ {
7018
+ "epoch": 0.6541648495420846,
7019
+ "grad_norm": 2.245417833328247,
7020
+ "learning_rate": 5.881351371344841e-05,
7021
+ "loss": 1.9328,
7022
+ "step": 4500
7023
+ },
7024
+ {
7025
+ "epoch": 0.6541648495420846,
7026
+ "eval_loss": 1.89194917678833,
7027
+ "eval_runtime": 19.3578,
7028
+ "eval_samples_per_second": 170.525,
7029
+ "eval_steps_per_second": 10.693,
7030
+ "step": 4500
7031
  }
7032
  ],
7033
  "logging_steps": 5,
 
7056
  "attributes": {}
7057
  }
7058
  },
7059
+ "total_flos": 1.1728234225273405e+18,
7060
  "train_batch_size": 4,
7061
  "trial_name": null,
7062
  "trial_params": null