igriv commited on
Commit
bfcfb48
·
verified ·
1 Parent(s): 1a8e989

Training in progress, step 594, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:324b2ab791b80076dbf4c8d3f97f114006208a69a2652ad5188a7e7e501a6879
3
  size 2147605960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ec759cdb5f06b9e582baac1d961ceb3d11fcd7f8b6bcd68bb72968ad4fc853
3
  size 2147605960
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e52928f7d960a14ccf21a39ad4b199d3b7b559526f1790f2d09fe590dfa86ee
3
  size 1091573733
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81b8e84d0a507cddcee392d3cd6e9ef9a76ed6e9f3e1b078a9c884d9902f9fd
3
  size 1091573733
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353dcb334997a0dcc8538fc5cb1c6093fe5eb610323591c3e82b29a32637f485
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04541f08bf69ac7fe480c21fd1822bf1bcd1879399508271123652fdf993af0e
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bb81aea6c6946ec583b15472a291bc37e3ab21ba03b6112dd3b6ea20bd2a7b8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74f2392731f9bfe3e3fcd2fa4bb9529cf683336ccaf940189be2143efabe210
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8421052631578947,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -708,6 +708,132 @@
708
  "learning_rate": 1.3126041392116772e-05,
709
  "loss": 0.5162,
710
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  }
712
  ],
713
  "logging_steps": 5,
@@ -722,12 +848,12 @@
722
  "should_evaluate": false,
723
  "should_log": false,
724
  "should_save": true,
725
- "should_training_stop": false
726
  },
727
  "attributes": {}
728
  }
729
  },
730
- "total_flos": 3.184190182619474e+18,
731
  "train_batch_size": 4,
732
  "trial_name": null,
733
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 594,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
708
  "learning_rate": 1.3126041392116772e-05,
709
  "loss": 0.5162,
710
  "step": 500
711
+ },
712
+ {
713
+ "epoch": 0.8505263157894737,
714
+ "grad_norm": 0.11415216326713562,
715
+ "learning_rate": 1.1807873565164506e-05,
716
+ "loss": 0.5486,
717
+ "step": 505
718
+ },
719
+ {
720
+ "epoch": 0.8589473684210527,
721
+ "grad_norm": 0.12088897824287415,
722
+ "learning_rate": 1.0555289705749483e-05,
723
+ "loss": 0.5531,
724
+ "step": 510
725
+ },
726
+ {
727
+ "epoch": 0.8673684210526316,
728
+ "grad_norm": 0.1071295514702797,
729
+ "learning_rate": 9.369221296335006e-06,
730
+ "loss": 0.5187,
731
+ "step": 515
732
+ },
733
+ {
734
+ "epoch": 0.8757894736842106,
735
+ "grad_norm": 0.11686161160469055,
736
+ "learning_rate": 8.250550355250875e-06,
737
+ "loss": 0.5589,
738
+ "step": 520
739
+ },
740
+ {
741
+ "epoch": 0.8842105263157894,
742
+ "grad_norm": 0.11512535065412521,
743
+ "learning_rate": 7.200108780781556e-06,
744
+ "loss": 0.5505,
745
+ "step": 525
746
+ },
747
+ {
748
+ "epoch": 0.8926315789473684,
749
+ "grad_norm": 0.10691066831350327,
750
+ "learning_rate": 6.218677732526035e-06,
751
+ "loss": 0.538,
752
+ "step": 530
753
+ },
754
+ {
755
+ "epoch": 0.9010526315789473,
756
+ "grad_norm": 0.10703324526548386,
757
+ "learning_rate": 5.306987050489442e-06,
758
+ "loss": 0.5487,
759
+ "step": 535
760
+ },
761
+ {
762
+ "epoch": 0.9094736842105263,
763
+ "grad_norm": 0.11932916939258575,
764
+ "learning_rate": 4.465714712338398e-06,
765
+ "loss": 0.5522,
766
+ "step": 540
767
+ },
768
+ {
769
+ "epoch": 0.9178947368421052,
770
+ "grad_norm": 0.11946967244148254,
771
+ "learning_rate": 3.6954863292237297e-06,
772
+ "loss": 0.5326,
773
+ "step": 545
774
+ },
775
+ {
776
+ "epoch": 0.9263157894736842,
777
+ "grad_norm": 0.11607641726732254,
778
+ "learning_rate": 2.996874680545603e-06,
779
+ "loss": 0.5393,
780
+ "step": 550
781
+ },
782
+ {
783
+ "epoch": 0.9347368421052632,
784
+ "grad_norm": 0.11775229871273041,
785
+ "learning_rate": 2.3703992880066638e-06,
786
+ "loss": 0.5182,
787
+ "step": 555
788
+ },
789
+ {
790
+ "epoch": 0.9431578947368421,
791
+ "grad_norm": 0.1001369059085846,
792
+ "learning_rate": 1.8165260292704711e-06,
793
+ "loss": 0.5487,
794
+ "step": 560
795
+ },
796
+ {
797
+ "epoch": 0.9515789473684211,
798
+ "grad_norm": 0.12595337629318237,
799
+ "learning_rate": 1.3356667915121025e-06,
800
+ "loss": 0.5291,
801
+ "step": 565
802
+ },
803
+ {
804
+ "epoch": 0.96,
805
+ "grad_norm": 0.09997394680976868,
806
+ "learning_rate": 9.281791651187366e-07,
807
+ "loss": 0.5309,
808
+ "step": 570
809
+ },
810
+ {
811
+ "epoch": 0.968421052631579,
812
+ "grad_norm": 0.12577302753925323,
813
+ "learning_rate": 5.943661777680354e-07,
814
+ "loss": 0.5805,
815
+ "step": 575
816
+ },
817
+ {
818
+ "epoch": 0.9768421052631578,
819
+ "grad_norm": 0.11574060469865799,
820
+ "learning_rate": 3.3447606908196817e-07,
821
+ "loss": 0.5498,
822
+ "step": 580
823
+ },
824
+ {
825
+ "epoch": 0.9852631578947368,
826
+ "grad_norm": 0.1203237846493721,
827
+ "learning_rate": 1.487021060236904e-07,
828
+ "loss": 0.5443,
829
+ "step": 585
830
+ },
831
+ {
832
+ "epoch": 0.9936842105263158,
833
+ "grad_norm": 0.11638718843460083,
834
+ "learning_rate": 3.7182439174832106e-08,
835
+ "loss": 0.5372,
836
+ "step": 590
837
  }
838
  ],
839
  "logging_steps": 5,
 
848
  "should_evaluate": false,
849
  "should_log": false,
850
  "should_save": true,
851
+ "should_training_stop": true
852
  },
853
  "attributes": {}
854
  }
855
  },
856
+ "total_flos": 3.781344564135076e+18,
857
  "train_batch_size": 4,
858
  "trial_name": null,
859
  "trial_params": null