mgh6 commited on
Commit
f31c2b1
·
verified ·
1 Parent(s): 0d436a6

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940ece6053481e3946e69ad55df159ac113b6630529ebef37438d34f4468b28e
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f818eed3b71ae86bf60325ca842c29db729092a3a00f068d37dc3f8bc0e71f7b
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be54ea9e7155d62de181add57ec85fc6b766b6bb74cf5509aa566a8f10d54df
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c7f95b813a9cd3ec4c03febae19edca2984f716b88224e7f0858d8e07ac8181
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e2f5546fc83506789b1d0adb53d6b6a28f107b064c7f808a5d419eeca20b8cf
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef182b3769d944b189f876a5cac490559ff2ea07b4cbd3762299fdab1ce127f7
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68f472c819beed817ac9f811e783cb3b4e6768b75b8d75dac6c446449f746786
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c384e9de8d22769c457fde7ec327a8b66d9e5e7803cfc2f5ad081bf4e28105c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.997206530510894,
5
  "eval_steps": 50,
6
- "global_step": 2008,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,81 @@
607
  "eval_samples_per_second": 31.365,
608
  "eval_steps_per_second": 15.683,
609
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 50,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.997206530510894,
5
  "eval_steps": 50,
6
+ "global_step": 2259,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 31.365,
608
  "eval_steps_per_second": 15.683,
609
  "step": 2000
610
+ },
611
+ {
612
+ "epoch": 8.166863244149233,
613
+ "grad_norm": 0.03973577171564102,
614
+ "learning_rate": 1.8326693227091633e-05,
615
+ "loss": 0.0298,
616
+ "step": 2050
617
+ },
618
+ {
619
+ "epoch": 8.166863244149233,
620
+ "eval_loss": 0.07551723718643188,
621
+ "eval_runtime": 27.0162,
622
+ "eval_samples_per_second": 31.389,
623
+ "eval_steps_per_second": 15.694,
624
+ "step": 2050
625
+ },
626
+ {
627
+ "epoch": 8.36550996337451,
628
+ "grad_norm": 0.042930684983730316,
629
+ "learning_rate": 1.6334661354581674e-05,
630
+ "loss": 0.0293,
631
+ "step": 2100
632
+ },
633
+ {
634
+ "epoch": 8.36550996337451,
635
+ "eval_loss": 0.0750298798084259,
636
+ "eval_runtime": 27.0096,
637
+ "eval_samples_per_second": 31.396,
638
+ "eval_steps_per_second": 15.698,
639
+ "step": 2100
640
+ },
641
+ {
642
+ "epoch": 8.564156682599789,
643
+ "grad_norm": 0.04831754416227341,
644
+ "learning_rate": 1.4342629482071715e-05,
645
+ "loss": 0.0293,
646
+ "step": 2150
647
+ },
648
+ {
649
+ "epoch": 8.564156682599789,
650
+ "eval_loss": 0.07498627156019211,
651
+ "eval_runtime": 27.0181,
652
+ "eval_samples_per_second": 31.386,
653
+ "eval_steps_per_second": 15.693,
654
+ "step": 2150
655
+ },
656
+ {
657
+ "epoch": 8.762803401825067,
658
+ "grad_norm": 0.040915608406066895,
659
+ "learning_rate": 1.2350597609561753e-05,
660
+ "loss": 0.0291,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 8.762803401825067,
665
+ "eval_loss": 0.07433921098709106,
666
+ "eval_runtime": 27.0115,
667
+ "eval_samples_per_second": 31.394,
668
+ "eval_steps_per_second": 15.697,
669
+ "step": 2200
670
+ },
671
+ {
672
+ "epoch": 8.961450121050344,
673
+ "grad_norm": 0.049351248890161514,
674
+ "learning_rate": 1.0358565737051794e-05,
675
+ "loss": 0.0289,
676
+ "step": 2250
677
+ },
678
+ {
679
+ "epoch": 8.961450121050344,
680
+ "eval_loss": 0.07407635450363159,
681
+ "eval_runtime": 27.0446,
682
+ "eval_samples_per_second": 31.356,
683
+ "eval_steps_per_second": 15.678,
684
+ "step": 2250
685
  }
686
  ],
687
  "logging_steps": 50,