FormlessAI commited on
Commit
67dea51
·
verified ·
1 Parent(s): 6571ac0

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c9cc357e04f459d6e728fe04ff6176e928e17e21e7c228bcc4bca607fbf757a
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c06c7c3813fddb5ac98100afaa4381d3baef788a23ae2c7f74869fe4672a6cf
3
  size 1037269336
last-checkpoint/global_step2550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331533aa0273d7ed844034657286226b3238718c45c184054676c41ff94315dd
3
+ size 781993445
last-checkpoint/global_step2550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a023f96ecf4f996521f9cfb06b8f8627eb5336444c379a4bce76a9ef3dcade
3
+ size 781993509
last-checkpoint/global_step2550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6200ae39dfd5a06d648527ccaf311a71c7b1d147fe5d7fc58fbb95ac931b0cad
3
+ size 781993509
last-checkpoint/global_step2550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:943ace536a9d78bb2de6e5316a56efdfd48708f6d372b403c76fc312c6969138
3
+ size 781993509
last-checkpoint/global_step2550/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc68d7703a7eef8d86cc41de4437cc78da7c72c2af5952b0b228df8dee727a7
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2450
 
1
+ global_step2550
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e12c0961fe1a3b6ec66625b2224f2302a5c2d693cd735db7d68a016f2cb85bb7
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b0ee28e509aba9c3f4fd9aa443f72e6d6a57f938b3ceddd2bc7bbaf5cf585f
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:998e7d3fa5d395467fa8ba58740083e47b02ee8207a4533fbc08b237a76b32a3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc20aa2b364f08ed1de312499fe9555a2f7695c5ffc4d37b3434fdc9e8e70c8
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3264de8822ea300f1e72128e882e4c804497201681a6db28fc352a4ced8e33b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638e7b950d339cd9bb18ebffa7e43a1c200644eb1f4d72e3469233185fb09e21
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e369a49f4f14acec0bf41d3ef4a616c220de847d1f58b11471e2a844e9316393
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:166aa68437e50207c09f7d061179b22a36411d587c77f77e4583f632d4d5ebe2
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ad0002308e7a85d5ed9a363df790b3450d629316e7cdf2272144d0dd8ab391d
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726509c8d205ff37449ab18ba69ba9d65a16125d4029696d3a738278dbe2b999
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 2.0702576637268066,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.3561564180840238,
6
  "eval_steps": 50,
7
- "global_step": 2450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3830,6 +3830,162 @@
3830
  "eval_samples_per_second": 174.376,
3831
  "eval_steps_per_second": 10.935,
3832
  "step": 2450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3833
  }
3834
  ],
3835
  "logging_steps": 5,
@@ -3858,7 +4014,7 @@
3858
  "attributes": {}
3859
  }
3860
  },
3861
- "total_flos": 6.378124456451113e+17,
3862
  "train_batch_size": 4,
3863
  "trial_name": null,
3864
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 2.0672757625579834,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.3706934147405146,
6
  "eval_steps": 50,
7
+ "global_step": 2550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3830
  "eval_samples_per_second": 174.376,
3831
  "eval_steps_per_second": 10.935,
3832
  "step": 2450
3833
+ },
3834
+ {
3835
+ "epoch": 0.3568832679168484,
3836
+ "grad_norm": 2.373387098312378,
3837
+ "learning_rate": 8.68570360607412e-05,
3838
+ "loss": 2.087,
3839
+ "step": 2455
3840
+ },
3841
+ {
3842
+ "epoch": 0.35761011774967294,
3843
+ "grad_norm": 2.527256488800049,
3844
+ "learning_rate": 8.680298155139663e-05,
3845
+ "loss": 2.1579,
3846
+ "step": 2460
3847
+ },
3848
+ {
3849
+ "epoch": 0.35833696758249745,
3850
+ "grad_norm": 2.339224100112915,
3851
+ "learning_rate": 8.674883463823014e-05,
3852
+ "loss": 2.3154,
3853
+ "step": 2465
3854
+ },
3855
+ {
3856
+ "epoch": 0.359063817415322,
3857
+ "grad_norm": 2.3437659740448,
3858
+ "learning_rate": 8.669459545755653e-05,
3859
+ "loss": 2.1505,
3860
+ "step": 2470
3861
+ },
3862
+ {
3863
+ "epoch": 0.3597906672481465,
3864
+ "grad_norm": 2.31026291847229,
3865
+ "learning_rate": 8.664026414592286e-05,
3866
+ "loss": 2.4049,
3867
+ "step": 2475
3868
+ },
3869
+ {
3870
+ "epoch": 0.3605175170809711,
3871
+ "grad_norm": 2.880200147628784,
3872
+ "learning_rate": 8.658584084010815e-05,
3873
+ "loss": 2.3197,
3874
+ "step": 2480
3875
+ },
3876
+ {
3877
+ "epoch": 0.36124436691379563,
3878
+ "grad_norm": 2.4933598041534424,
3879
+ "learning_rate": 8.653132567712298e-05,
3880
+ "loss": 2.2212,
3881
+ "step": 2485
3882
+ },
3883
+ {
3884
+ "epoch": 0.36197121674662014,
3885
+ "grad_norm": 2.8316283226013184,
3886
+ "learning_rate": 8.647671879420927e-05,
3887
+ "loss": 2.302,
3888
+ "step": 2490
3889
+ },
3890
+ {
3891
+ "epoch": 0.3626980665794447,
3892
+ "grad_norm": 2.6056923866271973,
3893
+ "learning_rate": 8.64220203288397e-05,
3894
+ "loss": 2.3031,
3895
+ "step": 2495
3896
+ },
3897
+ {
3898
+ "epoch": 0.3634249164122692,
3899
+ "grad_norm": 2.3827829360961914,
3900
+ "learning_rate": 8.636723041871766e-05,
3901
+ "loss": 2.3328,
3902
+ "step": 2500
3903
+ },
3904
+ {
3905
+ "epoch": 0.3634249164122692,
3906
+ "eval_loss": 2.0754590034484863,
3907
+ "eval_runtime": 25.4986,
3908
+ "eval_samples_per_second": 129.458,
3909
+ "eval_steps_per_second": 8.118,
3910
+ "step": 2500
3911
+ },
3912
+ {
3913
+ "epoch": 0.36415176624509377,
3914
+ "grad_norm": 2.934666872024536,
3915
+ "learning_rate": 8.631234920177665e-05,
3916
+ "loss": 2.1663,
3917
+ "step": 2505
3918
+ },
3919
+ {
3920
+ "epoch": 0.3648786160779183,
3921
+ "grad_norm": 2.3056254386901855,
3922
+ "learning_rate": 8.625737681618008e-05,
3923
+ "loss": 2.1278,
3924
+ "step": 2510
3925
+ },
3926
+ {
3927
+ "epoch": 0.36560546591074283,
3928
+ "grad_norm": 2.4940974712371826,
3929
+ "learning_rate": 8.620231340032087e-05,
3930
+ "loss": 2.0522,
3931
+ "step": 2515
3932
+ },
3933
+ {
3934
+ "epoch": 0.3663323157435674,
3935
+ "grad_norm": 2.724717855453491,
3936
+ "learning_rate": 8.614715909282107e-05,
3937
+ "loss": 2.1553,
3938
+ "step": 2520
3939
+ },
3940
+ {
3941
+ "epoch": 0.3670591655763919,
3942
+ "grad_norm": 2.628826379776001,
3943
+ "learning_rate": 8.609191403253163e-05,
3944
+ "loss": 1.991,
3945
+ "step": 2525
3946
+ },
3947
+ {
3948
+ "epoch": 0.36778601540921646,
3949
+ "grad_norm": 2.2899041175842285,
3950
+ "learning_rate": 8.603657835853188e-05,
3951
+ "loss": 1.974,
3952
+ "step": 2530
3953
+ },
3954
+ {
3955
+ "epoch": 0.368512865242041,
3956
+ "grad_norm": 2.5030078887939453,
3957
+ "learning_rate": 8.598115221012935e-05,
3958
+ "loss": 2.3256,
3959
+ "step": 2535
3960
+ },
3961
+ {
3962
+ "epoch": 0.3692397150748655,
3963
+ "grad_norm": 2.282642364501953,
3964
+ "learning_rate": 8.592563572685929e-05,
3965
+ "loss": 2.1428,
3966
+ "step": 2540
3967
+ },
3968
+ {
3969
+ "epoch": 0.3699665649076901,
3970
+ "grad_norm": 2.9469528198242188,
3971
+ "learning_rate": 8.587002904848438e-05,
3972
+ "loss": 2.1632,
3973
+ "step": 2545
3974
+ },
3975
+ {
3976
+ "epoch": 0.3706934147405146,
3977
+ "grad_norm": 2.4431910514831543,
3978
+ "learning_rate": 8.581433231499436e-05,
3979
+ "loss": 2.2365,
3980
+ "step": 2550
3981
+ },
3982
+ {
3983
+ "epoch": 0.3706934147405146,
3984
+ "eval_loss": 2.0672757625579834,
3985
+ "eval_runtime": 19.0363,
3986
+ "eval_samples_per_second": 173.406,
3987
+ "eval_steps_per_second": 10.874,
3988
+ "step": 2550
3989
  }
3990
  ],
3991
  "logging_steps": 5,
 
4014
  "attributes": {}
4015
  }
4016
  },
4017
+ "total_flos": 6.639732802893906e+17,
4018
  "train_batch_size": 4,
4019
  "trial_name": null,
4020
  "trial_params": null