shorecode commited on
Commit
19bf5fc
·
verified ·
1 Parent(s): 27a34b3

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f75126c2a34822f1bd2758385a8fd1ca1669f4dff9de1317e4f5b25887d0a0d
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5e19390f915af7d82e4e4d4ab59cbb91dfd4e2451e13a4fd5b2fcb2756dcd2
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b94ba484a1c3a45fc8e9c69eeda0cff023c2452e3377a4ee8a9ae36f58df97f
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08646ac1adb0510b1dd9481036497e5ccbd6dc461543f2ea2ce32d8d76f3f42
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b7133fcea647449b2caaa65223ebbc0c180189bbd59dbe842634047836d81e2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0ec4d2188868fd24263efa2856258953fca7ad21aed2b50e22b491f1d8939f
3
  size 14645
checkpoint-latest/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e26664050c6d90fb565d76db26661576aa404ce53418da0b68344264e2ee7e47
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67
3
  size 1383
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac8a8c1f0ca136b116df977b16704d013fad49cab357d7231675ec3945e85ad2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a902ca58dea28be10847ac21293e6d27c44fc74bd49d763b881d90cbd1e58f0a
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5278901988386415,
6
  "eval_steps": 500,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -224,6 +224,78 @@
224
  "eval_samples_per_second": 166.428,
225
  "eval_steps_per_second": 6.404,
226
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
  ],
229
  "logging_steps": 125,
@@ -243,7 +315,7 @@
243
  "attributes": {}
244
  }
245
  },
246
- "total_flos": 1760138625024000.0,
247
  "train_batch_size": 26,
248
  "trial_name": null,
249
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7038535984515221,
6
  "eval_steps": 500,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
224
  "eval_samples_per_second": 166.428,
225
  "eval_steps_per_second": 6.404,
226
  "step": 3000
227
+ },
228
+ {
229
+ "epoch": 0.5498856237902516,
230
+ "grad_norm": 0.0,
231
+ "learning_rate": 0.0002450290339609361,
232
+ "loss": 4.0045,
233
+ "step": 3125
234
+ },
235
+ {
236
+ "epoch": 0.5718810487418617,
237
+ "grad_norm": NaN,
238
+ "learning_rate": 0.00024282949146577507,
239
+ "loss": 4.0077,
240
+ "step": 3250
241
+ },
242
+ {
243
+ "epoch": 0.5938764736934717,
244
+ "grad_norm": NaN,
245
+ "learning_rate": 0.0002406299489706141,
246
+ "loss": 3.5634,
247
+ "step": 3375
248
+ },
249
+ {
250
+ "epoch": 0.6158718986450818,
251
+ "grad_norm": NaN,
252
+ "learning_rate": 0.0002384304064754531,
253
+ "loss": 0.0,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 0.6158718986450818,
258
+ "eval_loss": NaN,
259
+ "eval_runtime": 58.2581,
260
+ "eval_samples_per_second": 169.075,
261
+ "eval_steps_per_second": 6.506,
262
+ "step": 3500
263
+ },
264
+ {
265
+ "epoch": 0.6378673235966918,
266
+ "grad_norm": NaN,
267
+ "learning_rate": 0.00023623086398029207,
268
+ "loss": 0.0,
269
+ "step": 3625
270
+ },
271
+ {
272
+ "epoch": 0.659862748548302,
273
+ "grad_norm": NaN,
274
+ "learning_rate": 0.00023403132148513107,
275
+ "loss": 0.0,
276
+ "step": 3750
277
+ },
278
+ {
279
+ "epoch": 0.6818581734999121,
280
+ "grad_norm": NaN,
281
+ "learning_rate": 0.00023183177898997007,
282
+ "loss": 0.0,
283
+ "step": 3875
284
+ },
285
+ {
286
+ "epoch": 0.7038535984515221,
287
+ "grad_norm": NaN,
288
+ "learning_rate": 0.00022963223649480905,
289
+ "loss": 0.0,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 0.7038535984515221,
294
+ "eval_loss": NaN,
295
+ "eval_runtime": 58.3487,
296
+ "eval_samples_per_second": 168.813,
297
+ "eval_steps_per_second": 6.495,
298
+ "step": 4000
299
  }
300
  ],
301
  "logging_steps": 125,
 
315
  "attributes": {}
316
  }
317
  },
318
+ "total_flos": 2346851500032000.0,
319
  "train_batch_size": 26,
320
  "trial_name": null,
321
  "trial_params": null