mgh6 commited on
Commit
cea3a32
·
verified ·
1 Parent(s): 18f7c11

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88647830ecd553d3bbcce815c85cc295f4bf39af9e61197684a6bbf2ad0d22cd
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ecb58c1fcba639498eb4bb8f9fd11485e8f410da635bffd7990f7d24a9ad84
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6a4f68f91aa0a169d492df4d096b1d4770de24a063b76c7cc1a09f608822ee7
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc51d878f8242e2efc63c0c0a3e6c6b8ebb1c5eedd276b9fd4ca5863d4b4c44c
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21dfc6c263d5ad0f8ba77e03600244b9f2781e61ae66cba4cff3c2ce6c58574f
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11f1252e969592bce36bc2e2fc4eed6af06892f0a3f45eb582be003ac5046ad5
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86d123b176365e851d79aa73f522c50da61f447efcfc0bcc767ae1a1949443a3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37acfbdf3414734e537adb979fbdbc4d04a389a43d3107d724270efe19fa191f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
  "eval_steps": 50,
6
- "global_step": 2184,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -652,6 +652,96 @@
652
  "eval_samples_per_second": 41.607,
653
  "eval_steps_per_second": 20.803,
654
  "step": 2150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
  ],
657
  "logging_steps": 50,
@@ -671,7 +761,7 @@
671
  "attributes": {}
672
  }
673
  },
674
- "total_flos": 5.614783810576056e+17,
675
  "train_batch_size": 2,
676
  "trial_name": null,
677
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.0,
5
  "eval_steps": 50,
6
+ "global_step": 2457,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
652
  "eval_samples_per_second": 41.607,
653
  "eval_steps_per_second": 20.803,
654
  "step": 2150
655
+ },
656
+ {
657
+ "epoch": 8.058816771970132,
658
+ "grad_norm": 80.5433349609375,
659
+ "learning_rate": 1.9117647058823528e-05,
660
+ "loss": 0.8227,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 8.058816771970132,
665
+ "eval_loss": 1.2389429807662964,
666
+ "eval_runtime": 117.23,
667
+ "eval_samples_per_second": 41.167,
668
+ "eval_steps_per_second": 20.583,
669
+ "step": 2200
670
+ },
671
+ {
672
+ "epoch": 8.242619184376794,
673
+ "grad_norm": 45.97893142700195,
674
+ "learning_rate": 1.7279411764705884e-05,
675
+ "loss": 0.8316,
676
+ "step": 2250
677
+ },
678
+ {
679
+ "epoch": 8.242619184376794,
680
+ "eval_loss": 1.2351105213165283,
681
+ "eval_runtime": 116.2345,
682
+ "eval_samples_per_second": 41.52,
683
+ "eval_steps_per_second": 20.76,
684
+ "step": 2250
685
+ },
686
+ {
687
+ "epoch": 8.426421596783458,
688
+ "grad_norm": 68.8030014038086,
689
+ "learning_rate": 1.5441176470588237e-05,
690
+ "loss": 0.8299,
691
+ "step": 2300
692
+ },
693
+ {
694
+ "epoch": 8.426421596783458,
695
+ "eval_loss": 1.2383313179016113,
696
+ "eval_runtime": 115.9203,
697
+ "eval_samples_per_second": 41.632,
698
+ "eval_steps_per_second": 20.816,
699
+ "step": 2300
700
+ },
701
+ {
702
+ "epoch": 8.610224009190121,
703
+ "grad_norm": 48.16875076293945,
704
+ "learning_rate": 1.3602941176470587e-05,
705
+ "loss": 0.822,
706
+ "step": 2350
707
+ },
708
+ {
709
+ "epoch": 8.610224009190121,
710
+ "eval_loss": 1.2370705604553223,
711
+ "eval_runtime": 116.2999,
712
+ "eval_samples_per_second": 41.496,
713
+ "eval_steps_per_second": 20.748,
714
+ "step": 2350
715
+ },
716
+ {
717
+ "epoch": 8.794026421596783,
718
+ "grad_norm": 49.53213119506836,
719
+ "learning_rate": 1.1764705882352942e-05,
720
+ "loss": 0.8251,
721
+ "step": 2400
722
+ },
723
+ {
724
+ "epoch": 8.794026421596783,
725
+ "eval_loss": 1.2367668151855469,
726
+ "eval_runtime": 116.3102,
727
+ "eval_samples_per_second": 41.493,
728
+ "eval_steps_per_second": 20.746,
729
+ "step": 2400
730
+ },
731
+ {
732
+ "epoch": 8.977828834003446,
733
+ "grad_norm": 42.136714935302734,
734
+ "learning_rate": 9.926470588235293e-06,
735
+ "loss": 0.8225,
736
+ "step": 2450
737
+ },
738
+ {
739
+ "epoch": 8.977828834003446,
740
+ "eval_loss": 1.2319527864456177,
741
+ "eval_runtime": 116.086,
742
+ "eval_samples_per_second": 41.573,
743
+ "eval_steps_per_second": 20.786,
744
+ "step": 2450
745
  }
746
  ],
747
  "logging_steps": 50,
 
761
  "attributes": {}
762
  }
763
  },
764
+ "total_flos": 6.316931282433475e+17,
765
  "train_batch_size": 2,
766
  "trial_name": null,
767
  "trial_params": null