mgh6 commited on
Commit
370d8e3
·
verified ·
1 Parent(s): 2eb1e3e

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fcf860fc9c15233fdc3a04320c65f940c567bc7bcf7a9fcbd99bf407b520f99
3
  size 2708729576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45a0f32b96311d18145a36a2b7ac7077d7c147fa2552926df5bcbb94fa477dcc
3
  size 2708729576
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c01749687975865b82527a4cee8b9d8ce1f2bc9bc740d2d45a7e075f6d8fba8
3
  size 52499200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e0cbd5a6d8a5ccf63de302ffe56531268f9153e400c4a49dbb62f54a65e3c0c
3
  size 52499200
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a66bbd279eb4ab0a90bcfec226c4b910506d73ea2bdc06576ad77b7202ecee
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a87652f696aae07a6616b1088616ece95dadb9b70d2197d96fbef272ac3457
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541d8b16c337fd96e05a077a2be76d2de8216d931de0afb127b2cab79028732a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0267fbad859e57b7ff33d6dbd4fbd9fdf3cbf25f82f07754a3b6e19cff3ef2f5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 10,
6
- "global_step": 387,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -539,6 +539,188 @@
539
  "eval_samples_per_second": 21.878,
540
  "eval_steps_per_second": 5.469,
541
  "step": 380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  }
543
  ],
544
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 10,
6
+ "global_step": 516,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
539
  "eval_samples_per_second": 21.878,
540
  "eval_steps_per_second": 5.469,
541
  "step": 380
542
+ },
543
+ {
544
+ "epoch": 3.0234260614934114,
545
+ "grad_norm": 468387.1875,
546
+ "learning_rate": 6.953125e-05,
547
+ "loss": 1111.9008,
548
+ "step": 390
549
+ },
550
+ {
551
+ "epoch": 3.0234260614934114,
552
+ "eval_runtime": 19.6555,
553
+ "eval_samples_per_second": 21.979,
554
+ "eval_steps_per_second": 5.495,
555
+ "step": 390
556
+ },
557
+ {
558
+ "epoch": 3.101512933138116,
559
+ "grad_norm": 396892.71875,
560
+ "learning_rate": 6.875e-05,
561
+ "loss": 1161.8062,
562
+ "step": 400
563
+ },
564
+ {
565
+ "epoch": 3.101512933138116,
566
+ "eval_runtime": 19.6988,
567
+ "eval_samples_per_second": 21.93,
568
+ "eval_steps_per_second": 5.483,
569
+ "step": 400
570
+ },
571
+ {
572
+ "epoch": 3.179599804782821,
573
+ "grad_norm": 267550.6875,
574
+ "learning_rate": 6.796875000000001e-05,
575
+ "loss": 1332.8658,
576
+ "step": 410
577
+ },
578
+ {
579
+ "epoch": 3.179599804782821,
580
+ "eval_runtime": 19.7594,
581
+ "eval_samples_per_second": 21.863,
582
+ "eval_steps_per_second": 5.466,
583
+ "step": 410
584
+ },
585
+ {
586
+ "epoch": 3.2576866764275256,
587
+ "grad_norm": 383417.75,
588
+ "learning_rate": 6.71875e-05,
589
+ "loss": 1149.9299,
590
+ "step": 420
591
+ },
592
+ {
593
+ "epoch": 3.2576866764275256,
594
+ "eval_runtime": 19.7802,
595
+ "eval_samples_per_second": 21.84,
596
+ "eval_steps_per_second": 5.46,
597
+ "step": 420
598
+ },
599
+ {
600
+ "epoch": 3.3357735480722304,
601
+ "grad_norm": 197298.359375,
602
+ "learning_rate": 6.640625e-05,
603
+ "loss": 1427.1258,
604
+ "step": 430
605
+ },
606
+ {
607
+ "epoch": 3.3357735480722304,
608
+ "eval_runtime": 19.7807,
609
+ "eval_samples_per_second": 21.84,
610
+ "eval_steps_per_second": 5.46,
611
+ "step": 430
612
+ },
613
+ {
614
+ "epoch": 3.413860419716935,
615
+ "grad_norm": 196738.484375,
616
+ "learning_rate": 6.562500000000001e-05,
617
+ "loss": 1029.203,
618
+ "step": 440
619
+ },
620
+ {
621
+ "epoch": 3.413860419716935,
622
+ "eval_runtime": 19.7502,
623
+ "eval_samples_per_second": 21.873,
624
+ "eval_steps_per_second": 5.468,
625
+ "step": 440
626
+ },
627
+ {
628
+ "epoch": 3.49194729136164,
629
+ "grad_norm": 415209.96875,
630
+ "learning_rate": 6.484375e-05,
631
+ "loss": 1040.8042,
632
+ "step": 450
633
+ },
634
+ {
635
+ "epoch": 3.49194729136164,
636
+ "eval_runtime": 19.7611,
637
+ "eval_samples_per_second": 21.861,
638
+ "eval_steps_per_second": 5.465,
639
+ "step": 450
640
+ },
641
+ {
642
+ "epoch": 3.5700341630063446,
643
+ "grad_norm": 328070.78125,
644
+ "learning_rate": 6.40625e-05,
645
+ "loss": 977.6191,
646
+ "step": 460
647
+ },
648
+ {
649
+ "epoch": 3.5700341630063446,
650
+ "eval_runtime": 19.7543,
651
+ "eval_samples_per_second": 21.869,
652
+ "eval_steps_per_second": 5.467,
653
+ "step": 460
654
+ },
655
+ {
656
+ "epoch": 3.6481210346510493,
657
+ "grad_norm": 327954.34375,
658
+ "learning_rate": 6.328125e-05,
659
+ "loss": 893.5069,
660
+ "step": 470
661
+ },
662
+ {
663
+ "epoch": 3.6481210346510493,
664
+ "eval_runtime": 19.7609,
665
+ "eval_samples_per_second": 21.861,
666
+ "eval_steps_per_second": 5.465,
667
+ "step": 470
668
+ },
669
+ {
670
+ "epoch": 3.726207906295754,
671
+ "grad_norm": 429292.5625,
672
+ "learning_rate": 6.25e-05,
673
+ "loss": 982.0832,
674
+ "step": 480
675
+ },
676
+ {
677
+ "epoch": 3.726207906295754,
678
+ "eval_runtime": 19.7484,
679
+ "eval_samples_per_second": 21.875,
680
+ "eval_steps_per_second": 5.469,
681
+ "step": 480
682
+ },
683
+ {
684
+ "epoch": 3.804294777940459,
685
+ "grad_norm": 388092.28125,
686
+ "learning_rate": 6.171875e-05,
687
+ "loss": 849.4025,
688
+ "step": 490
689
+ },
690
+ {
691
+ "epoch": 3.804294777940459,
692
+ "eval_runtime": 19.7469,
693
+ "eval_samples_per_second": 21.877,
694
+ "eval_steps_per_second": 5.469,
695
+ "step": 490
696
+ },
697
+ {
698
+ "epoch": 3.8823816495851635,
699
+ "grad_norm": 240416.734375,
700
+ "learning_rate": 6.0937500000000004e-05,
701
+ "loss": 1007.4567,
702
+ "step": 500
703
+ },
704
+ {
705
+ "epoch": 3.8823816495851635,
706
+ "eval_runtime": 19.7422,
707
+ "eval_samples_per_second": 21.882,
708
+ "eval_steps_per_second": 5.471,
709
+ "step": 500
710
+ },
711
+ {
712
+ "epoch": 3.9604685212298683,
713
+ "grad_norm": 309579.21875,
714
+ "learning_rate": 6.015625e-05,
715
+ "loss": 960.0591,
716
+ "step": 510
717
+ },
718
+ {
719
+ "epoch": 3.9604685212298683,
720
+ "eval_runtime": 19.7662,
721
+ "eval_samples_per_second": 21.855,
722
+ "eval_steps_per_second": 5.464,
723
+ "step": 510
724
  }
725
  ],
726
  "logging_steps": 10,