FormlessAI commited on
Commit
302ba8e
·
verified ·
1 Parent(s): 9d4eee7

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7894fb59c0c858b4b78899234347218eafb756379f3feaa6c4791094c33f31d9
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0240b6e222ded106342ff50b761da38ad2b38fcb3808077be942ca362e7e7671
3
  size 1037269336
last-checkpoint/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502a73599633b052195a6a26af0bedf02509a127fbc1570b8b556d21bdf5d271
3
+ size 781993445
last-checkpoint/global_step3900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908ea792c39c911c85d9d5492e49693edb28c198dc634bde11de57ff5240ffae
3
+ size 781993509
last-checkpoint/global_step3900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5996472e52d43ed85c4ac34d5c558f5521176e0c8c6d5d172e3c29ef4c51ab1
3
+ size 781993509
last-checkpoint/global_step3900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9475970e91582e8a4622d6cb5268e5d8dfe47471a26d20b6489bde951f01d3
3
+ size 781993509
last-checkpoint/global_step3900/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4e30b1691db220ebbbf7a914ce92bda79ed6b8493e61bfbe649f84608bb961
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3800
 
1
+ global_step3900
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d65b0cc0b56c6a307232088098dbab3d86f71cd764c1988d42f96c384dafbbc0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b12b40563b99c2baee008fe86357b2292b938122b66c4fd030619ed3a7e249c2
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ac58272e122edfb1a4c58c2b90ee5648645eaa16340e3b0a8b37cc453cc2f64
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c324bba1f61bf365a138212f43772e0143abdeacc0a0a8df262a19f5484c461
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9841fcb5dea347cc559ad1484633c35e94e89849517d837deebc376cd07c9636
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d131ba9a870afc277bffc705ecd17f99202d034a2e308e14148808e10f8866
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58ed277697e9f67dcd8ca8e4c5928bb43817b76ce52df52970c03e778e31281f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ad6bb7a439bb1c3f9f1f35e584026ae43dfcd4373e8b47d872d00c633752f2
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07b15ad691da352c69e0f16dcc959dd2ee78afb5ec13b6759fd2096d0d578e0c
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5fd6e854e3b09e0cbb5e0b9ed1447e26fda6e84966f68c365186f77f59549fc
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.9534403085708618,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5524058729466492,
6
  "eval_steps": 50,
7
- "global_step": 3800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -5936,6 +5936,162 @@
5936
  "eval_samples_per_second": 171.941,
5937
  "eval_steps_per_second": 10.782,
5938
  "step": 3800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5939
  }
5940
  ],
5941
  "logging_steps": 5,
@@ -5964,7 +6120,7 @@
5964
  "attributes": {}
5965
  }
5966
  },
5967
- "total_flos": 9.911726619733524e+17,
5968
  "train_batch_size": 4,
5969
  "trial_name": null,
5970
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.9406747817993164,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.56694286960314,
6
  "eval_steps": 50,
7
+ "global_step": 3900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
5936
  "eval_samples_per_second": 171.941,
5937
  "eval_steps_per_second": 10.782,
5938
  "step": 3800
5939
+ },
5940
+ {
5941
+ "epoch": 0.5531327227794738,
5942
+ "grad_norm": 3.0287039279937744,
5943
+ "learning_rate": 6.939495836072836e-05,
5944
+ "loss": 2.1146,
5945
+ "step": 3805
5946
+ },
5947
+ {
5948
+ "epoch": 0.5538595726122983,
5949
+ "grad_norm": 2.5071959495544434,
5950
+ "learning_rate": 6.932157795240215e-05,
5951
+ "loss": 2.0004,
5952
+ "step": 3810
5953
+ },
5954
+ {
5955
+ "epoch": 0.5545864224451228,
5956
+ "grad_norm": 2.4799954891204834,
5957
+ "learning_rate": 6.924814914966674e-05,
5958
+ "loss": 2.0815,
5959
+ "step": 3815
5960
+ },
5961
+ {
5962
+ "epoch": 0.5553132722779474,
5963
+ "grad_norm": 2.5911128520965576,
5964
+ "learning_rate": 6.917467213737908e-05,
5965
+ "loss": 2.1649,
5966
+ "step": 3820
5967
+ },
5968
+ {
5969
+ "epoch": 0.5560401221107719,
5970
+ "grad_norm": 2.4524548053741455,
5971
+ "learning_rate": 6.910114710051744e-05,
5972
+ "loss": 2.0344,
5973
+ "step": 3825
5974
+ },
5975
+ {
5976
+ "epoch": 0.5567669719435965,
5977
+ "grad_norm": 2.5558533668518066,
5978
+ "learning_rate": 6.902757422418104e-05,
5979
+ "loss": 2.2114,
5980
+ "step": 3830
5981
+ },
5982
+ {
5983
+ "epoch": 0.557493821776421,
5984
+ "grad_norm": 2.460690498352051,
5985
+ "learning_rate": 6.895395369358949e-05,
5986
+ "loss": 2.0785,
5987
+ "step": 3835
5988
+ },
5989
+ {
5990
+ "epoch": 0.5582206716092455,
5991
+ "grad_norm": 2.2994587421417236,
5992
+ "learning_rate": 6.888028569408238e-05,
5993
+ "loss": 2.0985,
5994
+ "step": 3840
5995
+ },
5996
+ {
5997
+ "epoch": 0.55894752144207,
5998
+ "grad_norm": 2.4622437953948975,
5999
+ "learning_rate": 6.880657041111886e-05,
6000
+ "loss": 2.1873,
6001
+ "step": 3845
6002
+ },
6003
+ {
6004
+ "epoch": 0.5596743712748946,
6005
+ "grad_norm": 2.566040515899658,
6006
+ "learning_rate": 6.873280803027698e-05,
6007
+ "loss": 2.0761,
6008
+ "step": 3850
6009
+ },
6010
+ {
6011
+ "epoch": 0.5596743712748946,
6012
+ "eval_loss": 1.9471417665481567,
6013
+ "eval_runtime": 22.8564,
6014
+ "eval_samples_per_second": 144.423,
6015
+ "eval_steps_per_second": 9.057,
6016
+ "step": 3850
6017
+ },
6018
+ {
6019
+ "epoch": 0.5604012211077192,
6020
+ "grad_norm": 2.9277586936950684,
6021
+ "learning_rate": 6.865899873725354e-05,
6022
+ "loss": 2.1336,
6023
+ "step": 3855
6024
+ },
6025
+ {
6026
+ "epoch": 0.5611280709405437,
6027
+ "grad_norm": 2.224175214767456,
6028
+ "learning_rate": 6.858514271786328e-05,
6029
+ "loss": 1.9701,
6030
+ "step": 3860
6031
+ },
6032
+ {
6033
+ "epoch": 0.5618549207733682,
6034
+ "grad_norm": 2.4121415615081787,
6035
+ "learning_rate": 6.851124015803867e-05,
6036
+ "loss": 2.0505,
6037
+ "step": 3865
6038
+ },
6039
+ {
6040
+ "epoch": 0.5625817706061927,
6041
+ "grad_norm": 2.3348071575164795,
6042
+ "learning_rate": 6.843729124382931e-05,
6043
+ "loss": 2.0927,
6044
+ "step": 3870
6045
+ },
6046
+ {
6047
+ "epoch": 0.5633086204390173,
6048
+ "grad_norm": 2.6254260540008545,
6049
+ "learning_rate": 6.836329616140152e-05,
6050
+ "loss": 2.0223,
6051
+ "step": 3875
6052
+ },
6053
+ {
6054
+ "epoch": 0.5640354702718419,
6055
+ "grad_norm": 2.551982879638672,
6056
+ "learning_rate": 6.82892550970378e-05,
6057
+ "loss": 2.1343,
6058
+ "step": 3880
6059
+ },
6060
+ {
6061
+ "epoch": 0.5647623201046664,
6062
+ "grad_norm": 2.6784307956695557,
6063
+ "learning_rate": 6.821516823713646e-05,
6064
+ "loss": 2.1829,
6065
+ "step": 3885
6066
+ },
6067
+ {
6068
+ "epoch": 0.5654891699374909,
6069
+ "grad_norm": 2.5187387466430664,
6070
+ "learning_rate": 6.81410357682111e-05,
6071
+ "loss": 2.1628,
6072
+ "step": 3890
6073
+ },
6074
+ {
6075
+ "epoch": 0.5662160197703154,
6076
+ "grad_norm": 2.4125635623931885,
6077
+ "learning_rate": 6.806685787689007e-05,
6078
+ "loss": 2.1097,
6079
+ "step": 3895
6080
+ },
6081
+ {
6082
+ "epoch": 0.56694286960314,
6083
+ "grad_norm": 2.3090174198150635,
6084
+ "learning_rate": 6.799263474991618e-05,
6085
+ "loss": 1.9982,
6086
+ "step": 3900
6087
+ },
6088
+ {
6089
+ "epoch": 0.56694286960314,
6090
+ "eval_loss": 1.9406747817993164,
6091
+ "eval_runtime": 19.0434,
6092
+ "eval_samples_per_second": 173.341,
6093
+ "eval_steps_per_second": 10.87,
6094
+ "step": 3900
6095
  }
6096
  ],
6097
  "logging_steps": 5,
 
6120
  "attributes": {}
6121
  }
6122
  },
6123
+ "total_flos": 1.0171604246022062e+18,
6124
  "train_batch_size": 4,
6125
  "trial_name": null,
6126
  "trial_params": null