mgh6 commited on
Commit
9eb47f7
·
verified ·
1 Parent(s): 83f26d2

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a62b0ae2bece445149d95dac8be318a1e3cf663dc984da1c6c1763a330dde2ae
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba163da7ac5c05dca7b23ba9ae84e43000a41894b62d4b0bd6c156263a09d667
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db3b73b9e39e57f14acc8da4a5a2ee7f1b952789fcd7ddf666c5f54ad92c0646
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228620246956b9c20bcf1b4373421a28779315491724ca8b3e1ccf5acdfefaac
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21dfc6c263d5ad0f8ba77e03600244b9f2781e61ae66cba4cff3c2ce6c58574f
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2d83df56a301a7a031ee1a2baca538bacda8ebe202ee2460e255209acd4857
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6359143556c6c392e65dff6e223cd73982950d740ede67b2be0a429a6648f6d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5bdf0f87d77b7de3138456a39226e05b8a47b0bad9a0f4dd8d726aafd08598b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.999080987937967,
5
  "eval_steps": 50,
6
- "global_step": 2176,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -652,6 +652,81 @@
652
  "eval_samples_per_second": 41.646,
653
  "eval_steps_per_second": 20.823,
654
  "step": 2150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
  ],
657
  "logging_steps": 50,
@@ -671,7 +746,7 @@
671
  "attributes": {}
672
  }
673
  },
674
- "total_flos": 5.614783797825372e+17,
675
  "train_batch_size": 2,
676
  "trial_name": null,
677
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.998966111430212,
5
  "eval_steps": 50,
6
+ "global_step": 2448,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
652
  "eval_samples_per_second": 41.646,
653
  "eval_steps_per_second": 20.823,
654
  "step": 2150
655
+ },
656
+ {
657
+ "epoch": 8.087306145893164,
658
+ "grad_norm": 45.93680191040039,
659
+ "learning_rate": 1.9117647058823528e-05,
660
+ "loss": 0.8363,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 8.087306145893164,
665
+ "eval_loss": 1.2366794347763062,
666
+ "eval_runtime": 115.7882,
667
+ "eval_samples_per_second": 41.68,
668
+ "eval_steps_per_second": 20.84,
669
+ "step": 2200
670
+ },
671
+ {
672
+ "epoch": 8.271108558299828,
673
+ "grad_norm": 73.48126983642578,
674
+ "learning_rate": 1.7279411764705884e-05,
675
+ "loss": 0.8333,
676
+ "step": 2250
677
+ },
678
+ {
679
+ "epoch": 8.271108558299828,
680
+ "eval_loss": 1.2330245971679688,
681
+ "eval_runtime": 115.848,
682
+ "eval_samples_per_second": 41.658,
683
+ "eval_steps_per_second": 20.829,
684
+ "step": 2250
685
+ },
686
+ {
687
+ "epoch": 8.454910970706491,
688
+ "grad_norm": 76.98050689697266,
689
+ "learning_rate": 1.5441176470588237e-05,
690
+ "loss": 0.8267,
691
+ "step": 2300
692
+ },
693
+ {
694
+ "epoch": 8.454910970706491,
695
+ "eval_loss": 1.2376160621643066,
696
+ "eval_runtime": 115.9174,
697
+ "eval_samples_per_second": 41.633,
698
+ "eval_steps_per_second": 20.817,
699
+ "step": 2300
700
+ },
701
+ {
702
+ "epoch": 8.638713383113153,
703
+ "grad_norm": 128.97714233398438,
704
+ "learning_rate": 1.3602941176470587e-05,
705
+ "loss": 0.8217,
706
+ "step": 2350
707
+ },
708
+ {
709
+ "epoch": 8.638713383113153,
710
+ "eval_loss": 1.2340155839920044,
711
+ "eval_runtime": 115.9099,
712
+ "eval_samples_per_second": 41.636,
713
+ "eval_steps_per_second": 20.818,
714
+ "step": 2350
715
+ },
716
+ {
717
+ "epoch": 8.822515795519816,
718
+ "grad_norm": 55.4945182800293,
719
+ "learning_rate": 1.1764705882352942e-05,
720
+ "loss": 0.8221,
721
+ "step": 2400
722
+ },
723
+ {
724
+ "epoch": 8.822515795519816,
725
+ "eval_loss": 1.2404063940048218,
726
+ "eval_runtime": 116.0058,
727
+ "eval_samples_per_second": 41.601,
728
+ "eval_steps_per_second": 20.801,
729
+ "step": 2400
730
  }
731
  ],
732
  "logging_steps": 50,
 
746
  "attributes": {}
747
  }
748
  },
749
+ "total_flos": 6.316931268340613e+17,
750
  "train_batch_size": 2,
751
  "trial_name": null,
752
  "trial_params": null