ErrorAI commited on
Commit
588876b
·
verified ·
1 Parent(s): 4d338fc

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3896dc7e3dfc728c24eaa63c18e8781b696d643a43167cf61002d15f7d96c342
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce1d3865491490218207bac75bccedb8c2e28ef95b0cf9a0f748b479a6adc57
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57887673c056b1c8087dce5b980c4db8dc84eed1b075794a285c54d497c534b1
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8287c39800d6ba02c8dfe28b1ae66f1a0eed7e1907cdd7f1aa84e95e9369848e
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f5daff26b89d78b10080e8caa8f194190056243cab03647cb95fa7bc83c4aee
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03991d31508c600c9181221b8bb07dd43258c023851238fbb98928b343012b1a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2347f838bf86167a02aaac5d32873f1e6b1d790c95b57022527bb2bcb488890
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2409fb1c3fd1c2de2c63b5459c49a97abc9a50084af4337cc5090c00ef975a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7515657620041754,
5
  "eval_steps": 500,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -637,6 +637,224 @@
637
  "learning_rate": 1.5872342839067306e-05,
638
  "loss": 2.7213,
639
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  }
641
  ],
642
  "logging_steps": 1,
@@ -651,12 +869,12 @@
651
  "should_evaluate": false,
652
  "should_log": false,
653
  "should_save": true,
654
- "should_training_stop": false
655
  },
656
  "attributes": {}
657
  }
658
  },
659
- "total_flos": 6.309618477170688e+16,
660
  "train_batch_size": 4,
661
  "trial_name": null,
662
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0020876826722338,
5
  "eval_steps": 500,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
637
  "learning_rate": 1.5872342839067306e-05,
638
  "loss": 2.7213,
639
  "step": 90
640
+ },
641
+ {
642
+ "epoch": 0.7599164926931107,
643
+ "grad_norm": 39.1318244934082,
644
+ "learning_rate": 1.4886944624647647e-05,
645
+ "loss": 2.5212,
646
+ "step": 91
647
+ },
648
+ {
649
+ "epoch": 0.7682672233820459,
650
+ "grad_norm": 37.528324127197266,
651
+ "learning_rate": 1.3927749088052217e-05,
652
+ "loss": 3.2661,
653
+ "step": 92
654
+ },
655
+ {
656
+ "epoch": 0.7766179540709812,
657
+ "grad_norm": 33.13786315917969,
658
+ "learning_rate": 1.2995472016937404e-05,
659
+ "loss": 4.3563,
660
+ "step": 93
661
+ },
662
+ {
663
+ "epoch": 0.7849686847599165,
664
+ "grad_norm": 28.756526947021484,
665
+ "learning_rate": 1.209080911139187e-05,
666
+ "loss": 3.6397,
667
+ "step": 94
668
+ },
669
+ {
670
+ "epoch": 0.7933194154488518,
671
+ "grad_norm": 25.469545364379883,
672
+ "learning_rate": 1.1214435464779006e-05,
673
+ "loss": 4.4051,
674
+ "step": 95
675
+ },
676
+ {
677
+ "epoch": 0.8016701461377871,
678
+ "grad_norm": 27.566421508789062,
679
+ "learning_rate": 1.0367005059957096e-05,
680
+ "loss": 5.1261,
681
+ "step": 96
682
+ },
683
+ {
684
+ "epoch": 0.8100208768267223,
685
+ "grad_norm": 18.103004455566406,
686
+ "learning_rate": 9.549150281252633e-06,
687
+ "loss": 3.8833,
688
+ "step": 97
689
+ },
690
+ {
691
+ "epoch": 0.8183716075156576,
692
+ "grad_norm": 13.448304176330566,
693
+ "learning_rate": 8.761481442551573e-06,
694
+ "loss": 5.2468,
695
+ "step": 98
696
+ },
697
+ {
698
+ "epoch": 0.826722338204593,
699
+ "grad_norm": 11.937213897705078,
700
+ "learning_rate": 8.004586331860175e-06,
701
+ "loss": 4.6798,
702
+ "step": 99
703
+ },
704
+ {
705
+ "epoch": 0.8350730688935282,
706
+ "grad_norm": 8.884134292602539,
707
+ "learning_rate": 7.2790297726755716e-06,
708
+ "loss": 4.9037,
709
+ "step": 100
710
+ },
711
+ {
712
+ "epoch": 0.8434237995824635,
713
+ "grad_norm": 9.988716125488281,
714
+ "learning_rate": 6.585353202493322e-06,
715
+ "loss": 5.0198,
716
+ "step": 101
717
+ },
718
+ {
719
+ "epoch": 0.8517745302713987,
720
+ "grad_norm": 9.69389820098877,
721
+ "learning_rate": 5.924074268766422e-06,
722
+ "loss": 3.5639,
723
+ "step": 102
724
+ },
725
+ {
726
+ "epoch": 0.860125260960334,
727
+ "grad_norm": 12.106159210205078,
728
+ "learning_rate": 5.295686442617443e-06,
729
+ "loss": 4.083,
730
+ "step": 103
731
+ },
732
+ {
733
+ "epoch": 0.8684759916492694,
734
+ "grad_norm": 10.521677017211914,
735
+ "learning_rate": 4.700658650591827e-06,
736
+ "loss": 4.8845,
737
+ "step": 104
738
+ },
739
+ {
740
+ "epoch": 0.8768267223382046,
741
+ "grad_norm": 12.082547187805176,
742
+ "learning_rate": 4.139434924727359e-06,
743
+ "loss": 4.4946,
744
+ "step": 105
745
+ },
746
+ {
747
+ "epoch": 0.8851774530271399,
748
+ "grad_norm": 10.821547508239746,
749
+ "learning_rate": 3.612434071200771e-06,
750
+ "loss": 4.7307,
751
+ "step": 106
752
+ },
753
+ {
754
+ "epoch": 0.8935281837160751,
755
+ "grad_norm": 10.070833206176758,
756
+ "learning_rate": 3.1200493577989875e-06,
757
+ "loss": 5.0714,
758
+ "step": 107
759
+ },
760
+ {
761
+ "epoch": 0.9018789144050104,
762
+ "grad_norm": 19.112159729003906,
763
+ "learning_rate": 2.662648220447811e-06,
764
+ "loss": 4.6327,
765
+ "step": 108
766
+ },
767
+ {
768
+ "epoch": 0.9102296450939458,
769
+ "grad_norm": 12.466245651245117,
770
+ "learning_rate": 2.240571989017598e-06,
771
+ "loss": 5.2114,
772
+ "step": 109
773
+ },
774
+ {
775
+ "epoch": 0.918580375782881,
776
+ "grad_norm": 17.981706619262695,
777
+ "learning_rate": 1.8541356326100433e-06,
778
+ "loss": 5.5056,
779
+ "step": 110
780
+ },
781
+ {
782
+ "epoch": 0.9269311064718163,
783
+ "grad_norm": 20.902307510375977,
784
+ "learning_rate": 1.5036275245164377e-06,
785
+ "loss": 4.974,
786
+ "step": 111
787
+ },
788
+ {
789
+ "epoch": 0.9352818371607515,
790
+ "grad_norm": 17.289682388305664,
791
+ "learning_rate": 1.1893092270227724e-06,
792
+ "loss": 4.9496,
793
+ "step": 112
794
+ },
795
+ {
796
+ "epoch": 0.9436325678496869,
797
+ "grad_norm": 17.364238739013672,
798
+ "learning_rate": 9.114152962220735e-07,
799
+ "loss": 6.3092,
800
+ "step": 113
801
+ },
802
+ {
803
+ "epoch": 0.9519832985386222,
804
+ "grad_norm": 30.94085121154785,
805
+ "learning_rate": 6.701531069799038e-07,
806
+ "loss": 5.5584,
807
+ "step": 114
808
+ },
809
+ {
810
+ "epoch": 0.9603340292275574,
811
+ "grad_norm": 27.87859535217285,
812
+ "learning_rate": 4.6570269818346224e-07,
813
+ "loss": 6.069,
814
+ "step": 115
815
+ },
816
+ {
817
+ "epoch": 0.9686847599164927,
818
+ "grad_norm": 30.613059997558594,
819
+ "learning_rate": 2.9821663838981993e-07,
820
+ "loss": 6.3244,
821
+ "step": 116
822
+ },
823
+ {
824
+ "epoch": 0.9770354906054279,
825
+ "grad_norm": 8.08796215057373,
826
+ "learning_rate": 1.6781991197352133e-07,
827
+ "loss": 2.1168,
828
+ "step": 117
829
+ },
830
+ {
831
+ "epoch": 0.9853862212943633,
832
+ "grad_norm": 7.885867595672607,
833
+ "learning_rate": 7.460982585860144e-08,
834
+ "loss": 4.3936,
835
+ "step": 118
836
+ },
837
+ {
838
+ "epoch": 0.9937369519832986,
839
+ "grad_norm": 13.48105525970459,
840
+ "learning_rate": 1.8655936904465875e-08,
841
+ "loss": 5.1137,
842
+ "step": 119
843
+ },
844
+ {
845
+ "epoch": 0.9937369519832986,
846
+ "eval_loss": 0.985297679901123,
847
+ "eval_runtime": 5.2963,
848
+ "eval_samples_per_second": 19.07,
849
+ "eval_steps_per_second": 4.909,
850
+ "step": 119
851
+ },
852
+ {
853
+ "epoch": 1.0020876826722338,
854
+ "grad_norm": 15.896007537841797,
855
+ "learning_rate": 0.0,
856
+ "loss": 4.0913,
857
+ "step": 120
858
  }
859
  ],
860
  "logging_steps": 1,
 
869
  "should_evaluate": false,
870
  "should_log": false,
871
  "should_save": true,
872
+ "should_training_stop": true
873
  },
874
  "attributes": {}
875
  }
876
  },
877
+ "total_flos": 8.408442956729549e+16,
878
  "train_batch_size": 4,
879
  "trial_name": null,
880
  "trial_params": null