ljcamargo commited on
Commit
fd4e669
·
verified ·
1 Parent(s): 691f012

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:389139e8293cf0265bd0dd8a65a2482d21cc51a5fa821642143b7daf82f108b0
3
  size 2558403928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b6b0c15c719b8eaea77ad30f2872552d8062d2ef3ddf2596e34108954f23ab
3
  size 2558403928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7507fef582f26fa6dd26593efcff1fe00c92de5de40a9e14d4bcfae3d4660cf5
3
- size 1313044361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50806c2d1cb1d5eaefc1cbc0fb07371b9ab885876b2f6b143d28741862abf943
3
+ size 1313638993
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6bfeac7f2161ffd59c7dec4e192c42f83cdf28d8494d04d02013314243750b1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d59e617532ca7477c4cbf7e762ce90bb19c54009f36c062b2a5b94fe25c293
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30af866df24edce708e1eb20700878b402fa05707fa9bc5f332496baf440dbbb
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7fde5111803012042c93a73aa191336bb6e10b3ad44f6bd1d94fc7008a22b6
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e7f847589ff5ebc49461e971738b280435583eb06a67d9e67099912165ff9ed
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824648ef06ea0ce4b952c142f576a70b350abd12eb9776a23ec3817669fa8ff9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.12,
6
  "eval_steps": 500,
7
- "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -225,6 +225,216 @@
225
  "learning_rate": 4.873647561199115e-05,
226
  "loss": 4.1776,
227
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
229
  ],
230
  "logging_steps": 10,
@@ -244,7 +454,7 @@
244
  "attributes": {}
245
  }
246
  },
247
- "total_flos": 1.41041809391616e+19,
248
  "train_batch_size": 8,
249
  "trial_name": null,
250
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.24,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
225
  "learning_rate": 4.873647561199115e-05,
226
  "loss": 4.1776,
227
  "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.124,
231
+ "grad_norm": 15.050427436828613,
232
+ "learning_rate": 4.863390271098922e-05,
233
+ "loss": 3.5808,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.128,
238
+ "grad_norm": 8.734102249145508,
239
+ "learning_rate": 4.852744386123061e-05,
240
+ "loss": 3.9796,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.132,
245
+ "grad_norm": 8.711186408996582,
246
+ "learning_rate": 4.84171165669608e-05,
247
+ "loss": 4.2317,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.136,
252
+ "grad_norm": 6.751059055328369,
253
+ "learning_rate": 4.8302938968485144e-05,
254
+ "loss": 3.7145,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.14,
259
+ "grad_norm": 10.623860359191895,
260
+ "learning_rate": 4.8184929839186196e-05,
261
+ "loss": 3.9616,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.144,
266
+ "grad_norm": 7.5071330070495605,
267
+ "learning_rate": 4.806310858243694e-05,
268
+ "loss": 4.0164,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.148,
273
+ "grad_norm": 8.607765197753906,
274
+ "learning_rate": 4.793749522841042e-05,
275
+ "loss": 4.4924,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.152,
280
+ "grad_norm": 8.406026840209961,
281
+ "learning_rate": 4.780811043078636e-05,
282
+ "loss": 3.4254,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.156,
287
+ "grad_norm": 9.387131690979004,
288
+ "learning_rate": 4.767497546335519e-05,
289
+ "loss": 3.9158,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.16,
294
+ "grad_norm": 7.5071258544921875,
295
+ "learning_rate": 4.753811221652017e-05,
296
+ "loss": 4.1042,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.164,
301
+ "grad_norm": 6.716228008270264,
302
+ "learning_rate": 4.739754319369814e-05,
303
+ "loss": 3.8632,
304
+ "step": 410
305
+ },
306
+ {
307
+ "epoch": 0.168,
308
+ "grad_norm": 9.47385311126709,
309
+ "learning_rate": 4.7253291507619404e-05,
310
+ "loss": 3.7837,
311
+ "step": 420
312
+ },
313
+ {
314
+ "epoch": 0.172,
315
+ "grad_norm": 7.547070026397705,
316
+ "learning_rate": 4.710538087652748e-05,
317
+ "loss": 4.0398,
318
+ "step": 430
319
+ },
320
+ {
321
+ "epoch": 0.176,
322
+ "grad_norm": 13.61339282989502,
323
+ "learning_rate": 4.695383562027933e-05,
324
+ "loss": 3.7789,
325
+ "step": 440
326
+ },
327
+ {
328
+ "epoch": 0.18,
329
+ "grad_norm": 6.434921741485596,
330
+ "learning_rate": 4.679868065634656e-05,
331
+ "loss": 3.9506,
332
+ "step": 450
333
+ },
334
+ {
335
+ "epoch": 0.184,
336
+ "grad_norm": 7.865591049194336,
337
+ "learning_rate": 4.663994149571849e-05,
338
+ "loss": 3.4036,
339
+ "step": 460
340
+ },
341
+ {
342
+ "epoch": 0.188,
343
+ "grad_norm": 9.311790466308594,
344
+ "learning_rate": 4.647764423870751e-05,
345
+ "loss": 4.1299,
346
+ "step": 470
347
+ },
348
+ {
349
+ "epoch": 0.192,
350
+ "grad_norm": 8.277907371520996,
351
+ "learning_rate": 4.631181557065761e-05,
352
+ "loss": 4.0614,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.196,
357
+ "grad_norm": 12.398967742919922,
358
+ "learning_rate": 4.614248275755676e-05,
359
+ "loss": 3.7492,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.2,
364
+ "grad_norm": 7.308017730712891,
365
+ "learning_rate": 4.5969673641553685e-05,
366
+ "loss": 4.1606,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.204,
371
+ "grad_norm": 6.510436058044434,
372
+ "learning_rate": 4.579341663638004e-05,
373
+ "loss": 3.5708,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.208,
378
+ "grad_norm": 10.914970397949219,
379
+ "learning_rate": 4.5613740722678525e-05,
380
+ "loss": 3.4741,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.212,
385
+ "grad_norm": 8.786978721618652,
386
+ "learning_rate": 4.5430675443237817e-05,
387
+ "loss": 3.6204,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.216,
392
+ "grad_norm": 10.566540718078613,
393
+ "learning_rate": 4.524425089813507e-05,
394
+ "loss": 3.9298,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.22,
399
+ "grad_norm": 8.051084518432617,
400
+ "learning_rate": 4.505449773978677e-05,
401
+ "loss": 3.7783,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.224,
406
+ "grad_norm": 11.182727813720703,
407
+ "learning_rate": 4.4861447167908824e-05,
408
+ "loss": 3.8174,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.228,
413
+ "grad_norm": 11.375614166259766,
414
+ "learning_rate": 4.466513092438653e-05,
415
+ "loss": 4.0511,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.232,
420
+ "grad_norm": 8.66441535949707,
421
+ "learning_rate": 4.446558128805561e-05,
422
+ "loss": 3.7058,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.236,
427
+ "grad_norm": 5.743879795074463,
428
+ "learning_rate": 4.426283106939474e-05,
429
+ "loss": 3.817,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.24,
434
+ "grad_norm": 10.231585502624512,
435
+ "learning_rate": 4.4056913605130804e-05,
436
+ "loss": 3.9779,
437
+ "step": 600
438
  }
439
  ],
440
  "logging_steps": 10,
 
454
  "attributes": {}
455
  }
456
  },
457
+ "total_flos": 2.82083618783232e+19,
458
  "train_batch_size": 8,
459
  "trial_name": null,
460
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96555ac3c0979f5dce122c8f700ea90690bfa57f9771c6c52bfcefa74d7b8abe
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7aa5aa52e27a2537453961ad4bf444627e0fda1a2dc872db4046ff91151bfac
3
  size 5841