mgh6 commited on
Commit
dec2ef1
·
verified ·
1 Parent(s): c72fe44

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18fbd19ed0f067cccc4ceafafda68adefb55ef97f98832eadcfde47b3a2daf58
3
  size 2708729576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fcf860fc9c15233fdc3a04320c65f940c567bc7bcf7a9fcbd99bf407b520f99
3
  size 2708729576
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51187af1a65ecfea4a3ff1628941e84133eca5a8394cb465996a300d69d54302
3
  size 52499200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c01749687975865b82527a4cee8b9d8ce1f2bc9bc740d2d45a7e075f6d8fba8
3
  size 52499200
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d15855cc66c2961c5cc037b181eb659abb2ac26d92d4c28480e27683e47e34e
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a66bbd279eb4ab0a90bcfec226c4b910506d73ea2bdc06576ad77b7202ecee
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e8b7bfccb9c07860bceffabef3a52060cc451a19d3abd90d284c09f7120dba6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541d8b16c337fd96e05a077a2be76d2de8216d931de0afb127b2cab79028732a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 10,
6
- "global_step": 258,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -357,6 +357,188 @@
357
  "eval_samples_per_second": 21.815,
358
  "eval_steps_per_second": 5.454,
359
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  }
361
  ],
362
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 10,
6
+ "global_step": 387,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
357
  "eval_samples_per_second": 21.815,
358
  "eval_steps_per_second": 5.454,
359
  "step": 250
360
+ },
361
+ {
362
+ "epoch": 2.015617374328941,
363
+ "grad_norm": 502681.75,
364
+ "learning_rate": 7.96875e-05,
365
+ "loss": 3382.1129,
366
+ "step": 260
367
+ },
368
+ {
369
+ "epoch": 2.015617374328941,
370
+ "eval_runtime": 19.6846,
371
+ "eval_samples_per_second": 21.946,
372
+ "eval_steps_per_second": 5.487,
373
+ "step": 260
374
+ },
375
+ {
376
+ "epoch": 2.0937042459736457,
377
+ "grad_norm": 191029.828125,
378
+ "learning_rate": 7.890625000000001e-05,
379
+ "loss": 2202.6236,
380
+ "step": 270
381
+ },
382
+ {
383
+ "epoch": 2.0937042459736457,
384
+ "eval_runtime": 19.7242,
385
+ "eval_samples_per_second": 21.902,
386
+ "eval_steps_per_second": 5.476,
387
+ "step": 270
388
+ },
389
+ {
390
+ "epoch": 2.1717911176183504,
391
+ "grad_norm": 262116.109375,
392
+ "learning_rate": 7.8125e-05,
393
+ "loss": 1782.3984,
394
+ "step": 280
395
+ },
396
+ {
397
+ "epoch": 2.1717911176183504,
398
+ "eval_runtime": 19.7531,
399
+ "eval_samples_per_second": 21.87,
400
+ "eval_steps_per_second": 5.467,
401
+ "step": 280
402
+ },
403
+ {
404
+ "epoch": 2.249877989263055,
405
+ "grad_norm": 389979.4375,
406
+ "learning_rate": 7.734375e-05,
407
+ "loss": 1784.0506,
408
+ "step": 290
409
+ },
410
+ {
411
+ "epoch": 2.249877989263055,
412
+ "eval_runtime": 19.7431,
413
+ "eval_samples_per_second": 21.881,
414
+ "eval_steps_per_second": 5.47,
415
+ "step": 290
416
+ },
417
+ {
418
+ "epoch": 2.32796486090776,
419
+ "grad_norm": 392025.96875,
420
+ "learning_rate": 7.65625e-05,
421
+ "loss": 1743.6416,
422
+ "step": 300
423
+ },
424
+ {
425
+ "epoch": 2.32796486090776,
426
+ "eval_runtime": 19.7493,
427
+ "eval_samples_per_second": 21.874,
428
+ "eval_steps_per_second": 5.469,
429
+ "step": 300
430
+ },
431
+ {
432
+ "epoch": 2.4060517325524646,
433
+ "grad_norm": 424892.03125,
434
+ "learning_rate": 7.578125e-05,
435
+ "loss": 1824.1576,
436
+ "step": 310
437
+ },
438
+ {
439
+ "epoch": 2.4060517325524646,
440
+ "eval_runtime": 19.7454,
441
+ "eval_samples_per_second": 21.879,
442
+ "eval_steps_per_second": 5.47,
443
+ "step": 310
444
+ },
445
+ {
446
+ "epoch": 2.4841386041971694,
447
+ "grad_norm": 319042.1875,
448
+ "learning_rate": 7.500000000000001e-05,
449
+ "loss": 1743.5293,
450
+ "step": 320
451
+ },
452
+ {
453
+ "epoch": 2.4841386041971694,
454
+ "eval_runtime": 19.756,
455
+ "eval_samples_per_second": 21.867,
456
+ "eval_steps_per_second": 5.467,
457
+ "step": 320
458
+ },
459
+ {
460
+ "epoch": 2.562225475841874,
461
+ "grad_norm": 519256.40625,
462
+ "learning_rate": 7.421875e-05,
463
+ "loss": 1764.0594,
464
+ "step": 330
465
+ },
466
+ {
467
+ "epoch": 2.562225475841874,
468
+ "eval_runtime": 19.7543,
469
+ "eval_samples_per_second": 21.869,
470
+ "eval_steps_per_second": 5.467,
471
+ "step": 330
472
+ },
473
+ {
474
+ "epoch": 2.640312347486579,
475
+ "grad_norm": 210543.75,
476
+ "learning_rate": 7.34375e-05,
477
+ "loss": 1172.6759,
478
+ "step": 340
479
+ },
480
+ {
481
+ "epoch": 2.640312347486579,
482
+ "eval_runtime": 19.7374,
483
+ "eval_samples_per_second": 21.887,
484
+ "eval_steps_per_second": 5.472,
485
+ "step": 340
486
+ },
487
+ {
488
+ "epoch": 2.7183992191312836,
489
+ "grad_norm": 259726.9375,
490
+ "learning_rate": 7.265625000000001e-05,
491
+ "loss": 1670.5373,
492
+ "step": 350
493
+ },
494
+ {
495
+ "epoch": 2.7183992191312836,
496
+ "eval_runtime": 19.7387,
497
+ "eval_samples_per_second": 21.886,
498
+ "eval_steps_per_second": 5.471,
499
+ "step": 350
500
+ },
501
+ {
502
+ "epoch": 2.7964860907759883,
503
+ "grad_norm": 575974.4375,
504
+ "learning_rate": 7.1875e-05,
505
+ "loss": 2305.6137,
506
+ "step": 360
507
+ },
508
+ {
509
+ "epoch": 2.7964860907759883,
510
+ "eval_runtime": 19.7355,
511
+ "eval_samples_per_second": 21.889,
512
+ "eval_steps_per_second": 5.472,
513
+ "step": 360
514
+ },
515
+ {
516
+ "epoch": 2.874572962420693,
517
+ "grad_norm": 529809.125,
518
+ "learning_rate": 7.109375e-05,
519
+ "loss": 2236.2371,
520
+ "step": 370
521
+ },
522
+ {
523
+ "epoch": 2.874572962420693,
524
+ "eval_runtime": 19.7607,
525
+ "eval_samples_per_second": 21.862,
526
+ "eval_steps_per_second": 5.465,
527
+ "step": 370
528
+ },
529
+ {
530
+ "epoch": 2.952659834065398,
531
+ "grad_norm": 649306.875,
532
+ "learning_rate": 7.031250000000001e-05,
533
+ "loss": 1477.9074,
534
+ "step": 380
535
+ },
536
+ {
537
+ "epoch": 2.952659834065398,
538
+ "eval_runtime": 19.7461,
539
+ "eval_samples_per_second": 21.878,
540
+ "eval_steps_per_second": 5.469,
541
+ "step": 380
542
  }
543
  ],
544
  "logging_steps": 10,