FormlessAI commited on
Commit
73b95dd
·
verified ·
1 Parent(s): a7e9e29

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16ae46475882ed1ae504b00be01101d18dae43a1c433764f84c84d69eac63252
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adcc2abd2157fb60ae9fb647b506419ffcf749d96d577540797eb95581326fa
3
  size 98088784
last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442f4c30ec475090cd43f553a9f68a16138c3c60ff13adcc8dfb1a367ebc6600
3
+ size 73939813
last-checkpoint/global_step2500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:951d8778340da268ef236cb8a9a23d9d6623d4fe1f188a61fbe08f9098254ed3
3
+ size 73939813
last-checkpoint/global_step2500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a4143fcbe528553de8b626a9a55ab08a186726536821f8d0397e74143dcc38
3
+ size 73939877
last-checkpoint/global_step2500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba209f5c515db450266bb3a7413d4377e882192a8de72815283904b18e5edf8
3
+ size 73939877
last-checkpoint/global_step2500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f249fa57c54d60decf5afeb96a3baa12044d9c4ac4799cfb6858f33b1839eb
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2450
 
1
+ global_step2500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:938b777ff3e116c00987a18e1c538cc468155465ad2b958ce3e59986f87ea0ab
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03622e737b0b8bc31e0961f16fb24ed1f36ca068fbf92172955016d36744ba8f
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e8cc35976a56bcd3464c4fbaea9853bffe1648d13afd49995ac47e0e617dffe
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8631d6a3a14a1d8ffe0ecd6bcdf565bac306a775dbc9ee116e9cb0ae92390769
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43e56364978e3306562b0ad7fc87783d26d5a5559524dec42a78f5fc2a0f1264
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6ff2489fd57ce8b4c3e4b6a15142a4393a7355a3116f159e4e45618f906165
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38a0a528ef5376d2cc03f230714813daf0eae1d4ead8b3c7e184eb5c7d3228fd
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929048071bdf5eb3856c325b4b779be5c26afd2cebae2821e0d54a31b74a2ad1
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f0cf9f55a2343664c39bbf5f7927b992cc95f4e1e75fa5babbb6ea5b76262f
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9eaed5707858cfa6cbc4cad23f1229a605fc99dd72f9963e5671dd9966384a
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6362767219543457,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.4017146356399266,
6
  "eval_steps": 50,
7
- "global_step": 2450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3830,6 +3830,84 @@
3830
  "eval_samples_per_second": 126.802,
3831
  "eval_steps_per_second": 15.858,
3832
  "step": 2450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3833
  }
3834
  ],
3835
  "logging_steps": 5,
@@ -3858,7 +3936,7 @@
3858
  "attributes": {}
3859
  }
3860
  },
3861
- "total_flos": 1.262558980287234e+18,
3862
  "train_batch_size": 2,
3863
  "trial_name": null,
3864
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6351883411407471,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.4507042253521125,
6
  "eval_steps": 50,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3830
  "eval_samples_per_second": 126.802,
3831
  "eval_steps_per_second": 15.858,
3832
  "step": 2450
3833
+ },
3834
+ {
3835
+ "epoch": 2.4066135946111453,
3836
+ "grad_norm": 0.21273045241832733,
3837
+ "learning_rate": 5.8806248666594436e-05,
3838
+ "loss": 0.6671,
3839
+ "step": 2455
3840
+ },
3841
+ {
3842
+ "epoch": 2.4115125535823636,
3843
+ "grad_norm": 0.21046888828277588,
3844
+ "learning_rate": 5.862897595233799e-05,
3845
+ "loss": 0.6632,
3846
+ "step": 2460
3847
+ },
3848
+ {
3849
+ "epoch": 2.4164115125535823,
3850
+ "grad_norm": 0.2251223474740982,
3851
+ "learning_rate": 5.845167004136867e-05,
3852
+ "loss": 0.6548,
3853
+ "step": 2465
3854
+ },
3855
+ {
3856
+ "epoch": 2.421310471524801,
3857
+ "grad_norm": 0.2118569016456604,
3858
+ "learning_rate": 5.827433275577903e-05,
3859
+ "loss": 0.6709,
3860
+ "step": 2470
3861
+ },
3862
+ {
3863
+ "epoch": 2.42620943049602,
3864
+ "grad_norm": 0.20193223655223846,
3865
+ "learning_rate": 5.809696591798407e-05,
3866
+ "loss": 0.6616,
3867
+ "step": 2475
3868
+ },
3869
+ {
3870
+ "epoch": 2.431108389467238,
3871
+ "grad_norm": 0.24873106181621552,
3872
+ "learning_rate": 5.7919571350702466e-05,
3873
+ "loss": 0.6734,
3874
+ "step": 2480
3875
+ },
3876
+ {
3877
+ "epoch": 2.436007348438457,
3878
+ "grad_norm": 0.19868969917297363,
3879
+ "learning_rate": 5.774215087693786e-05,
3880
+ "loss": 0.6802,
3881
+ "step": 2485
3882
+ },
3883
+ {
3884
+ "epoch": 2.4409063074096755,
3885
+ "grad_norm": 0.22367221117019653,
3886
+ "learning_rate": 5.7564706319960134e-05,
3887
+ "loss": 0.664,
3888
+ "step": 2490
3889
+ },
3890
+ {
3891
+ "epoch": 2.4458052663808942,
3892
+ "grad_norm": 0.2064606100320816,
3893
+ "learning_rate": 5.7387239503286674e-05,
3894
+ "loss": 0.6595,
3895
+ "step": 2495
3896
+ },
3897
+ {
3898
+ "epoch": 2.4507042253521125,
3899
+ "grad_norm": 0.23037855327129364,
3900
+ "learning_rate": 5.7209752250663576e-05,
3901
+ "loss": 0.6669,
3902
+ "step": 2500
3903
+ },
3904
+ {
3905
+ "epoch": 2.4507042253521125,
3906
+ "eval_loss": 0.6351883411407471,
3907
+ "eval_runtime": 15.5444,
3908
+ "eval_samples_per_second": 126.026,
3909
+ "eval_steps_per_second": 15.761,
3910
+ "step": 2500
3911
  }
3912
  ],
3913
  "logging_steps": 5,
 
3936
  "attributes": {}
3937
  }
3938
  },
3939
+ "total_flos": 1.288170288764158e+18,
3940
  "train_batch_size": 2,
3941
  "trial_name": null,
3942
  "trial_params": null