FormlessAI commited on
Commit
194f8d2
·
verified ·
1 Parent(s): d0dd918

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0240b6e222ded106342ff50b761da38ad2b38fcb3808077be942ca362e7e7671
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771f7595253335d0f7b3e5d9548620ff920977b25d1013493890387e97d73a3d
3
  size 1037269336
last-checkpoint/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db33c2934109344e9790a6a24923827069a5cbff5be4dfeed5abed210416129
3
+ size 781993445
last-checkpoint/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:364e2e4fe8bf27c48c8f22149e5bd025ff98fd03141a0e70f24ca3970e7aaa3c
3
+ size 781993509
last-checkpoint/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a2042c1c983f42df9ed5fb3f3fafa7a0335dbe50cfa52e7369122eaabfc304
3
+ size 781993509
last-checkpoint/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28737b93e49b1723caea50a45e6cadf6ea49a3cc984e46af27919d5adfe9bb18
3
+ size 781993509
last-checkpoint/global_step4000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e0eca13b582c26cfb2623ba0e865f3d272dfc90cc1e2db92804e4596e915c3
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3900
 
1
+ global_step4000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b12b40563b99c2baee008fe86357b2292b938122b66c4fd030619ed3a7e249c2
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7738b79cde91732aa1ae36546c20e2adfb138db06ede459f3546964f4c72f003
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c324bba1f61bf365a138212f43772e0143abdeacc0a0a8df262a19f5484c461
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c87bb0bbd4a5d934e9e0ee64426668f65a3c0671e53f80788bd09202aaa80ce
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d131ba9a870afc277bffc705ecd17f99202d034a2e308e14148808e10f8866
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3438bbb08774094f199cd5833a18b6fec0ce5cda0f318f97029e7d59620cafc6
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ad6bb7a439bb1c3f9f1f35e584026ae43dfcd4373e8b47d872d00c633752f2
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4893134b5c11d042dab70821374bd20a7f7800fefcc8fad1ea78520c80bfcce6
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5fd6e854e3b09e0cbb5e0b9ed1447e26fda6e84966f68c365186f77f59549fc
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7278ee28e675006b1a18eabb528c5e753ec5c79a4c5c843c134b5fc72246eac3
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.9406747817993164,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.56694286960314,
6
  "eval_steps": 50,
7
- "global_step": 3900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6092,6 +6092,162 @@
6092
  "eval_samples_per_second": 173.341,
6093
  "eval_steps_per_second": 10.87,
6094
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6095
  }
6096
  ],
6097
  "logging_steps": 5,
@@ -6120,7 +6276,7 @@
6120
  "attributes": {}
6121
  }
6122
  },
6123
- "total_flos": 1.0171604246022062e+18,
6124
  "train_batch_size": 4,
6125
  "trial_name": null,
6126
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.9395991563796997,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5814798662596308,
6
  "eval_steps": 50,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6092
  "eval_samples_per_second": 173.341,
6093
  "eval_steps_per_second": 10.87,
6094
  "step": 3900
6095
+ },
6096
+ {
6097
+ "epoch": 0.5676697194359646,
6098
+ "grad_norm": 2.6036624908447266,
6099
+ "learning_rate": 6.791836657414602e-05,
6100
+ "loss": 2.1123,
6101
+ "step": 3905
6102
+ },
6103
+ {
6104
+ "epoch": 0.5683965692687891,
6105
+ "grad_norm": 2.3896546363830566,
6106
+ "learning_rate": 6.784405353654967e-05,
6107
+ "loss": 1.9911,
6108
+ "step": 3910
6109
+ },
6110
+ {
6111
+ "epoch": 0.5691234191016136,
6112
+ "grad_norm": 2.328312635421753,
6113
+ "learning_rate": 6.776969582421008e-05,
6114
+ "loss": 2.15,
6115
+ "step": 3915
6116
+ },
6117
+ {
6118
+ "epoch": 0.5698502689344381,
6119
+ "grad_norm": 2.710876941680908,
6120
+ "learning_rate": 6.769529362432273e-05,
6121
+ "loss": 1.9971,
6122
+ "step": 3920
6123
+ },
6124
+ {
6125
+ "epoch": 0.5705771187672627,
6126
+ "grad_norm": 2.569784164428711,
6127
+ "learning_rate": 6.762084712419506e-05,
6128
+ "loss": 2.0124,
6129
+ "step": 3925
6130
+ },
6131
+ {
6132
+ "epoch": 0.5713039686000873,
6133
+ "grad_norm": 2.488879919052124,
6134
+ "learning_rate": 6.754635651124603e-05,
6135
+ "loss": 2.0063,
6136
+ "step": 3930
6137
+ },
6138
+ {
6139
+ "epoch": 0.5720308184329118,
6140
+ "grad_norm": 2.3385536670684814,
6141
+ "learning_rate": 6.747182197300568e-05,
6142
+ "loss": 1.9629,
6143
+ "step": 3935
6144
+ },
6145
+ {
6146
+ "epoch": 0.5727576682657363,
6147
+ "grad_norm": 2.078852415084839,
6148
+ "learning_rate": 6.739724369711464e-05,
6149
+ "loss": 1.8292,
6150
+ "step": 3940
6151
+ },
6152
+ {
6153
+ "epoch": 0.5734845180985608,
6154
+ "grad_norm": 2.723219156265259,
6155
+ "learning_rate": 6.732262187132362e-05,
6156
+ "loss": 1.9587,
6157
+ "step": 3945
6158
+ },
6159
+ {
6160
+ "epoch": 0.5742113679313854,
6161
+ "grad_norm": 2.4456677436828613,
6162
+ "learning_rate": 6.724795668349295e-05,
6163
+ "loss": 2.1195,
6164
+ "step": 3950
6165
+ },
6166
+ {
6167
+ "epoch": 0.5742113679313854,
6168
+ "eval_loss": 1.9503754377365112,
6169
+ "eval_runtime": 21.006,
6170
+ "eval_samples_per_second": 157.145,
6171
+ "eval_steps_per_second": 9.854,
6172
+ "step": 3950
6173
+ },
6174
+ {
6175
+ "epoch": 0.57493821776421,
6176
+ "grad_norm": 2.2807230949401855,
6177
+ "learning_rate": 6.71732483215922e-05,
6178
+ "loss": 2.0122,
6179
+ "step": 3955
6180
+ },
6181
+ {
6182
+ "epoch": 0.5756650675970345,
6183
+ "grad_norm": 2.6762518882751465,
6184
+ "learning_rate": 6.709849697369953e-05,
6185
+ "loss": 2.1176,
6186
+ "step": 3960
6187
+ },
6188
+ {
6189
+ "epoch": 0.576391917429859,
6190
+ "grad_norm": 2.549398899078369,
6191
+ "learning_rate": 6.70237028280014e-05,
6192
+ "loss": 2.1504,
6193
+ "step": 3965
6194
+ },
6195
+ {
6196
+ "epoch": 0.5771187672626835,
6197
+ "grad_norm": 2.400339365005493,
6198
+ "learning_rate": 6.6948866072792e-05,
6199
+ "loss": 2.1282,
6200
+ "step": 3970
6201
+ },
6202
+ {
6203
+ "epoch": 0.5778456170955081,
6204
+ "grad_norm": 2.5607948303222656,
6205
+ "learning_rate": 6.687398689647273e-05,
6206
+ "loss": 2.0596,
6207
+ "step": 3975
6208
+ },
6209
+ {
6210
+ "epoch": 0.5785724669283326,
6211
+ "grad_norm": 2.790510892868042,
6212
+ "learning_rate": 6.679906548755185e-05,
6213
+ "loss": 2.0354,
6214
+ "step": 3980
6215
+ },
6216
+ {
6217
+ "epoch": 0.5792993167611571,
6218
+ "grad_norm": 2.543358325958252,
6219
+ "learning_rate": 6.672410203464392e-05,
6220
+ "loss": 2.2136,
6221
+ "step": 3985
6222
+ },
6223
+ {
6224
+ "epoch": 0.5800261665939817,
6225
+ "grad_norm": 2.59621524810791,
6226
+ "learning_rate": 6.664909672646934e-05,
6227
+ "loss": 2.1201,
6228
+ "step": 3990
6229
+ },
6230
+ {
6231
+ "epoch": 0.5807530164268062,
6232
+ "grad_norm": 2.42059063911438,
6233
+ "learning_rate": 6.657404975185387e-05,
6234
+ "loss": 2.0,
6235
+ "step": 3995
6236
+ },
6237
+ {
6238
+ "epoch": 0.5814798662596308,
6239
+ "grad_norm": 2.4144132137298584,
6240
+ "learning_rate": 6.64989612997282e-05,
6241
+ "loss": 2.146,
6242
+ "step": 4000
6243
+ },
6244
+ {
6245
+ "epoch": 0.5814798662596308,
6246
+ "eval_loss": 1.9395991563796997,
6247
+ "eval_runtime": 19.1182,
6248
+ "eval_samples_per_second": 172.663,
6249
+ "eval_steps_per_second": 10.827,
6250
+ "step": 4000
6251
  }
6252
  ],
6253
  "logging_steps": 5,
 
6276
  "attributes": {}
6277
  }
6278
  },
6279
+ "total_flos": 1.0434609863437844e+18,
6280
  "train_batch_size": 4,
6281
  "trial_name": null,
6282
  "trial_params": null