FormlessAI commited on
Commit
a81fbb8
·
verified ·
1 Parent(s): 80e8718

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8574164bb4d11eaf453dcb6ad3966428cd591430ae9c31f0937299ed1a487081
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b523eb134094d0fe3ed4dfef81ffed7224784825c6f64b8661fe04d195a546
3
  size 1037269336
last-checkpoint/global_step5100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab4d9e4164ee16fa83dbfd1f018e2622dd032b0baa3a0dabc64f7e73cfab6fe8
3
+ size 781993445
last-checkpoint/global_step5100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d52195365c651c3e5364df3d7926ee4819beb46d64296fcbebb5ce8b0e7502
3
+ size 781993509
last-checkpoint/global_step5100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79943dd9bf44806e010c4fd6c1ac0a48f716e1b8fb74075f38c8a90fde71eea1
3
+ size 781993509
last-checkpoint/global_step5100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f3ec07625e370675911a8ad7ada8a9b7c40c5d92eb84e352baa294434990fe
3
+ size 781993509
last-checkpoint/global_step5100/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba5de234ff5bea5001e9e180ee326f5b49b985809bee0bce1e2e5aeab57e319a
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5000
 
1
+ global_step5100
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd00f37ba9aa2f280e60110d762d55bd77f2e19074544210642612fc0d0c6aed
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56865be07eff6f66c69791fc2b9b609f0e20d2a4499e1c484d2daf5499c42b5c
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da7f2a246e741148e024dc29f274d353214e019d5f548b483c4905c46044d9c6
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc331c1dd0e2fd6a26f5faf857be1fe7603138c25d38c533d290076fd5c63d2
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59fe33085db221039a6aa12c757a1cedc0cc5b1d3be922c202529c8eb1b8058a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68160d4ba6654984de0d46bc96a7fe87a66866d7126298837a820322efc5e287
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15166ad530c105df387795709025f21626f6ea307321c73af1fa12ffc3d040d0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e51d3128d1b9d77da6840ea0cc45f49e7d431d13998e4e4edcf5f6460d262d
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ccb65ec1efdeb7bb899bcfdbd59da40edf4d90e5de5df4ddf919745dfd59ebe
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd32ddbd680624dcd914b61c50d077bc8f0cb703973d6bb57f048563ab5de57
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.8494781255722046,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7268498328245384,
6
  "eval_steps": 50,
7
- "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7808,6 +7808,162 @@
7808
  "eval_samples_per_second": 173.245,
7809
  "eval_steps_per_second": 10.864,
7810
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7811
  }
7812
  ],
7813
  "logging_steps": 5,
@@ -7836,7 +7992,7 @@
7836
  "attributes": {}
7837
  }
7838
  },
7839
- "total_flos": 1.3038086059374674e+18,
7840
  "train_batch_size": 4,
7841
  "trial_name": null,
7842
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.8430671691894531,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7413868294810292,
6
  "eval_steps": 50,
7
+ "global_step": 5100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7808
  "eval_samples_per_second": 173.245,
7809
  "eval_steps_per_second": 10.864,
7810
  "step": 5000
7811
+ },
7812
+ {
7813
+ "epoch": 0.727576682657363,
7814
+ "grad_norm": 2.492719888687134,
7815
+ "learning_rate": 5.082964248443748e-05,
7816
+ "loss": 1.9047,
7817
+ "step": 5005
7818
+ },
7819
+ {
7820
+ "epoch": 0.7283035324901875,
7821
+ "grad_norm": 2.1797542572021484,
7822
+ "learning_rate": 5.0750160841935874e-05,
7823
+ "loss": 1.9196,
7824
+ "step": 5010
7825
+ },
7826
+ {
7827
+ "epoch": 0.7290303823230121,
7828
+ "grad_norm": 2.329383134841919,
7829
+ "learning_rate": 5.067067755854552e-05,
7830
+ "loss": 1.9559,
7831
+ "step": 5015
7832
+ },
7833
+ {
7834
+ "epoch": 0.7297572321558367,
7835
+ "grad_norm": 2.5835447311401367,
7836
+ "learning_rate": 5.059119283436551e-05,
7837
+ "loss": 2.0918,
7838
+ "step": 5020
7839
+ },
7840
+ {
7841
+ "epoch": 0.7304840819886611,
7842
+ "grad_norm": 2.5015668869018555,
7843
+ "learning_rate": 5.0511706869498554e-05,
7844
+ "loss": 1.837,
7845
+ "step": 5025
7846
+ },
7847
+ {
7848
+ "epoch": 0.7312109318214857,
7849
+ "grad_norm": 2.416215419769287,
7850
+ "learning_rate": 5.043221986405045e-05,
7851
+ "loss": 2.039,
7852
+ "step": 5030
7853
+ },
7854
+ {
7855
+ "epoch": 0.7319377816543102,
7856
+ "grad_norm": 2.211178779602051,
7857
+ "learning_rate": 5.035273201812967e-05,
7858
+ "loss": 2.0104,
7859
+ "step": 5035
7860
+ },
7861
+ {
7862
+ "epoch": 0.7326646314871348,
7863
+ "grad_norm": 1.9860055446624756,
7864
+ "learning_rate": 5.0273243531846745e-05,
7865
+ "loss": 1.8645,
7866
+ "step": 5040
7867
+ },
7868
+ {
7869
+ "epoch": 0.7333914813199593,
7870
+ "grad_norm": 2.3574624061584473,
7871
+ "learning_rate": 5.0193754605313855e-05,
7872
+ "loss": 2.1474,
7873
+ "step": 5045
7874
+ },
7875
+ {
7876
+ "epoch": 0.7341183311527838,
7877
+ "grad_norm": 2.370668888092041,
7878
+ "learning_rate": 5.01142654386443e-05,
7879
+ "loss": 1.97,
7880
+ "step": 5050
7881
+ },
7882
+ {
7883
+ "epoch": 0.7341183311527838,
7884
+ "eval_loss": 1.8463149070739746,
7885
+ "eval_runtime": 20.4628,
7886
+ "eval_samples_per_second": 161.317,
7887
+ "eval_steps_per_second": 10.116,
7888
+ "step": 5050
7889
+ },
7890
+ {
7891
+ "epoch": 0.7348451809856084,
7892
+ "grad_norm": 2.351590394973755,
7893
+ "learning_rate": 5.0034776231951914e-05,
7894
+ "loss": 1.9559,
7895
+ "step": 5055
7896
+ },
7897
+ {
7898
+ "epoch": 0.7355720308184329,
7899
+ "grad_norm": 2.62426495552063,
7900
+ "learning_rate": 4.995528718535072e-05,
7901
+ "loss": 2.0377,
7902
+ "step": 5060
7903
+ },
7904
+ {
7905
+ "epoch": 0.7362988806512575,
7906
+ "grad_norm": 2.2182393074035645,
7907
+ "learning_rate": 4.9875798498954274e-05,
7908
+ "loss": 2.0352,
7909
+ "step": 5065
7910
+ },
7911
+ {
7912
+ "epoch": 0.737025730484082,
7913
+ "grad_norm": 2.1070525646209717,
7914
+ "learning_rate": 4.979631037287528e-05,
7915
+ "loss": 1.9733,
7916
+ "step": 5070
7917
+ },
7918
+ {
7919
+ "epoch": 0.7377525803169065,
7920
+ "grad_norm": 2.4891433715820312,
7921
+ "learning_rate": 4.971682300722495e-05,
7922
+ "loss": 2.1022,
7923
+ "step": 5075
7924
+ },
7925
+ {
7926
+ "epoch": 0.738479430149731,
7927
+ "grad_norm": 2.638141632080078,
7928
+ "learning_rate": 4.9637336602112685e-05,
7929
+ "loss": 2.1233,
7930
+ "step": 5080
7931
+ },
7932
+ {
7933
+ "epoch": 0.7392062799825556,
7934
+ "grad_norm": 2.174260377883911,
7935
+ "learning_rate": 4.9557851357645386e-05,
7936
+ "loss": 2.0232,
7937
+ "step": 5085
7938
+ },
7939
+ {
7940
+ "epoch": 0.7399331298153802,
7941
+ "grad_norm": 2.112396717071533,
7942
+ "learning_rate": 4.947836747392708e-05,
7943
+ "loss": 1.8431,
7944
+ "step": 5090
7945
+ },
7946
+ {
7947
+ "epoch": 0.7406599796482047,
7948
+ "grad_norm": 2.135979413986206,
7949
+ "learning_rate": 4.939888515105832e-05,
7950
+ "loss": 1.8346,
7951
+ "step": 5095
7952
+ },
7953
+ {
7954
+ "epoch": 0.7413868294810292,
7955
+ "grad_norm": 2.140866756439209,
7956
+ "learning_rate": 4.931940458913579e-05,
7957
+ "loss": 1.9499,
7958
+ "step": 5100
7959
+ },
7960
+ {
7961
+ "epoch": 0.7413868294810292,
7962
+ "eval_loss": 1.8430671691894531,
7963
+ "eval_runtime": 19.1198,
7964
+ "eval_samples_per_second": 172.648,
7965
+ "eval_steps_per_second": 10.826,
7966
+ "step": 5100
7967
  }
7968
  ],
7969
  "logging_steps": 5,
 
7992
  "attributes": {}
7993
  }
7994
  },
7995
+ "total_flos": 1.3296709309594337e+18,
7996
  "train_batch_size": 4,
7997
  "trial_name": null,
7998
  "trial_params": null