mgh6 commited on
Commit
86fb13d
·
verified ·
1 Parent(s): fa65722

Training in progress, epoch 9, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f818eed3b71ae86bf60325ca842c29db729092a3a00f068d37dc3f8bc0e71f7b
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d593a166093506cff6ad983870b1972ed87c3ca52d1a9177fec94514c522d6
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c7f95b813a9cd3ec4c03febae19edca2984f716b88224e7f0858d8e07ac8181
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ccc17548892e36c8966cf50ffec82ca4c0d9aa652a15f51e1c05b63b216bf0
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef182b3769d944b189f876a5cac490559ff2ea07b4cbd3762299fdab1ce127f7
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032043679e88a4670001d7081f04bfea8360f4b5cce74f2dc24d2448e791dd14
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c384e9de8d22769c457fde7ec327a8b66d9e5e7803cfc2f5ad081bf4e28105c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e79c42299ae7a8c91c22377dcb5abd815ce8e10be1fead95ab281382c55da75
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.997206530510894,
5
  "eval_steps": 50,
6
- "global_step": 2259,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -682,6 +682,81 @@
682
  "eval_samples_per_second": 31.356,
683
  "eval_steps_per_second": 15.678,
684
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  }
686
  ],
687
  "logging_steps": 50,
@@ -696,7 +771,7 @@
696
  "should_evaluate": false,
697
  "should_log": false,
698
  "should_save": true,
699
- "should_training_stop": false
700
  },
701
  "attributes": {}
702
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.997206530510894,
5
  "eval_steps": 50,
6
+ "global_step": 2510,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
682
  "eval_samples_per_second": 31.356,
683
  "eval_steps_per_second": 15.678,
684
  "step": 2250
685
+ },
686
+ {
687
+ "epoch": 9.162890309764729,
688
+ "grad_norm": 0.03132237493991852,
689
+ "learning_rate": 8.366533864541832e-06,
690
+ "loss": 0.0286,
691
+ "step": 2300
692
+ },
693
+ {
694
+ "epoch": 9.162890309764729,
695
+ "eval_loss": 0.07365094125270844,
696
+ "eval_runtime": 27.0251,
697
+ "eval_samples_per_second": 31.378,
698
+ "eval_steps_per_second": 15.689,
699
+ "step": 2300
700
+ },
701
+ {
702
+ "epoch": 9.361537028990005,
703
+ "grad_norm": 0.024464119225740433,
704
+ "learning_rate": 6.374501992031872e-06,
705
+ "loss": 0.0271,
706
+ "step": 2350
707
+ },
708
+ {
709
+ "epoch": 9.361537028990005,
710
+ "eval_loss": 0.07335445284843445,
711
+ "eval_runtime": 27.016,
712
+ "eval_samples_per_second": 31.389,
713
+ "eval_steps_per_second": 15.694,
714
+ "step": 2350
715
+ },
716
+ {
717
+ "epoch": 9.560183748215284,
718
+ "grad_norm": 0.02511492557823658,
719
+ "learning_rate": 4.382470119521913e-06,
720
+ "loss": 0.0278,
721
+ "step": 2400
722
+ },
723
+ {
724
+ "epoch": 9.560183748215284,
725
+ "eval_loss": 0.07323840260505676,
726
+ "eval_runtime": 27.0119,
727
+ "eval_samples_per_second": 31.394,
728
+ "eval_steps_per_second": 15.697,
729
+ "step": 2400
730
+ },
731
+ {
732
+ "epoch": 9.75883046744056,
733
+ "grad_norm": 0.0219442006200552,
734
+ "learning_rate": 2.3904382470119524e-06,
735
+ "loss": 0.0268,
736
+ "step": 2450
737
+ },
738
+ {
739
+ "epoch": 9.75883046744056,
740
+ "eval_loss": 0.07320257276296616,
741
+ "eval_runtime": 26.9808,
742
+ "eval_samples_per_second": 31.43,
743
+ "eval_steps_per_second": 15.715,
744
+ "step": 2450
745
+ },
746
+ {
747
+ "epoch": 9.95747718666584,
748
+ "grad_norm": 0.030443966388702393,
749
+ "learning_rate": 3.98406374501992e-07,
750
+ "loss": 0.027,
751
+ "step": 2500
752
+ },
753
+ {
754
+ "epoch": 9.95747718666584,
755
+ "eval_loss": 0.07312251627445221,
756
+ "eval_runtime": 27.0283,
757
+ "eval_samples_per_second": 31.375,
758
+ "eval_steps_per_second": 15.687,
759
+ "step": 2500
760
  }
761
  ],
762
  "logging_steps": 50,
 
771
  "should_evaluate": false,
772
  "should_log": false,
773
  "should_save": true,
774
+ "should_training_stop": true
775
  },
776
  "attributes": {}
777
  }