Kudod commited on
Commit
c3caa2a
·
verified ·
1 Parent(s): 6505e22

Training in progress, step 50000, checkpoint

Browse files
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:214a1072eb6e2e4cc13be55f0e76f496dbf8421eeae52bb353439962f9793e2b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0abfd4b4cd810dbecfbb741346885f586009f5de461a8977bf4d899f17d11548
3
  size 14645
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.3261267254282387,
6
  "eval_steps": 10000,
7
- "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -600,6 +600,154 @@
600
  "eval_samples_per_second": 139.771,
601
  "eval_steps_per_second": 4.368,
602
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  }
604
  ],
605
  "logging_steps": 500,
@@ -619,7 +767,7 @@
619
  "attributes": {}
620
  }
621
  },
622
- "total_flos": 4.483885876471327e+17,
623
  "train_batch_size": 32,
624
  "trial_name": null,
625
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.157658406785298,
6
  "eval_steps": 10000,
7
+ "global_step": 50000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
600
  "eval_samples_per_second": 139.771,
601
  "eval_steps_per_second": 4.368,
602
  "step": 40000
603
+ },
604
+ {
605
+ "epoch": 3.367703309496092,
606
+ "grad_norm": NaN,
607
+ "learning_rate": 0.0,
608
+ "loss": 0.0,
609
+ "step": 40500
610
+ },
611
+ {
612
+ "epoch": 3.409279893563945,
613
+ "grad_norm": NaN,
614
+ "learning_rate": 0.0,
615
+ "loss": 0.0,
616
+ "step": 41000
617
+ },
618
+ {
619
+ "epoch": 3.4508564776317976,
620
+ "grad_norm": NaN,
621
+ "learning_rate": 0.0,
622
+ "loss": 0.0,
623
+ "step": 41500
624
+ },
625
+ {
626
+ "epoch": 3.492433061699651,
627
+ "grad_norm": NaN,
628
+ "learning_rate": 0.0,
629
+ "loss": 0.0,
630
+ "step": 42000
631
+ },
632
+ {
633
+ "epoch": 3.5340096457675036,
634
+ "grad_norm": NaN,
635
+ "learning_rate": 0.0,
636
+ "loss": 0.0,
637
+ "step": 42500
638
+ },
639
+ {
640
+ "epoch": 3.5755862298353565,
641
+ "grad_norm": NaN,
642
+ "learning_rate": 0.0,
643
+ "loss": 0.0,
644
+ "step": 43000
645
+ },
646
+ {
647
+ "epoch": 3.6171628139032097,
648
+ "grad_norm": NaN,
649
+ "learning_rate": 0.0,
650
+ "loss": 0.0,
651
+ "step": 43500
652
+ },
653
+ {
654
+ "epoch": 3.658739397971063,
655
+ "grad_norm": NaN,
656
+ "learning_rate": 0.0,
657
+ "loss": 0.0,
658
+ "step": 44000
659
+ },
660
+ {
661
+ "epoch": 3.7003159820389158,
662
+ "grad_norm": NaN,
663
+ "learning_rate": 0.0,
664
+ "loss": 0.0,
665
+ "step": 44500
666
+ },
667
+ {
668
+ "epoch": 3.7418925661067686,
669
+ "grad_norm": NaN,
670
+ "learning_rate": 0.0,
671
+ "loss": 0.0,
672
+ "step": 45000
673
+ },
674
+ {
675
+ "epoch": 3.7834691501746214,
676
+ "grad_norm": NaN,
677
+ "learning_rate": 0.0,
678
+ "loss": 0.0,
679
+ "step": 45500
680
+ },
681
+ {
682
+ "epoch": 3.8250457342424746,
683
+ "grad_norm": NaN,
684
+ "learning_rate": 0.0,
685
+ "loss": 0.0,
686
+ "step": 46000
687
+ },
688
+ {
689
+ "epoch": 3.866622318310328,
690
+ "grad_norm": NaN,
691
+ "learning_rate": 0.0,
692
+ "loss": 0.0,
693
+ "step": 46500
694
+ },
695
+ {
696
+ "epoch": 3.9081989023781807,
697
+ "grad_norm": NaN,
698
+ "learning_rate": 0.0,
699
+ "loss": 0.0,
700
+ "step": 47000
701
+ },
702
+ {
703
+ "epoch": 3.9497754864460335,
704
+ "grad_norm": NaN,
705
+ "learning_rate": 0.0,
706
+ "loss": 0.0,
707
+ "step": 47500
708
+ },
709
+ {
710
+ "epoch": 3.9913520705138867,
711
+ "grad_norm": NaN,
712
+ "learning_rate": 0.0,
713
+ "loss": 0.0,
714
+ "step": 48000
715
+ },
716
+ {
717
+ "epoch": 4.03292865458174,
718
+ "grad_norm": NaN,
719
+ "learning_rate": 0.0,
720
+ "loss": 0.0,
721
+ "step": 48500
722
+ },
723
+ {
724
+ "epoch": 4.074505238649593,
725
+ "grad_norm": NaN,
726
+ "learning_rate": 0.0,
727
+ "loss": 0.0,
728
+ "step": 49000
729
+ },
730
+ {
731
+ "epoch": 4.116081822717446,
732
+ "grad_norm": NaN,
733
+ "learning_rate": 0.0,
734
+ "loss": 0.0,
735
+ "step": 49500
736
+ },
737
+ {
738
+ "epoch": 4.157658406785298,
739
+ "grad_norm": NaN,
740
+ "learning_rate": 0.0,
741
+ "loss": 0.0,
742
+ "step": 50000
743
+ },
744
+ {
745
+ "epoch": 4.157658406785298,
746
+ "eval_loss": NaN,
747
+ "eval_runtime": 2750.6919,
748
+ "eval_samples_per_second": 139.902,
749
+ "eval_steps_per_second": 4.372,
750
+ "step": 50000
751
  }
752
  ],
753
  "logging_steps": 500,
 
767
  "attributes": {}
768
  }
769
  },
770
+ "total_flos": 5.605184286005299e+17,
771
  "train_batch_size": 32,
772
  "trial_name": null,
773
  "trial_params": null