irudachirath commited on
Commit
818f37a
·
verified ·
1 Parent(s): ed36980

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9eff5ee207bd67a332529e94c498f20dca4ade4b14ddb6e802baca50011cd67
3
  size 5517243408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6dbe798832e342a67190d44dceef80bd78777804e88c221e3afe22c3917a08
3
  size 5517243408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:165a108047674c268721b9f66f363eeb5e7a212dd9c8fa135cf606f83c14ae9d
3
  size 8984377658
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7284ca750912e2bdb6f4c871990e72df084d5c36a94af05687833beb962f143
3
  size 8984377658
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:903a92009048b799b57a07de5b0b1e00ddd8f3ce27313ee0f905a7b0c8a563fb
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1a4433fcead82ba39559404307b112f4eb74f11934440fc8d2e4a1f5d92376
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24d93be1fd2153b773b930d26cadd2c1619498d1049cd40be5d0e37a4e1e8017
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c3f157259c65d7b9df5f7d522deadeb86b0679070a3309e3b18e333bffea53
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1500,
3
  "best_metric": 0.02954169362783432,
4
  "best_model_checkpoint": "/content/mbart-model/checkpoint-1500",
5
- "epoch": 4.139072847682119,
6
  "eval_steps": 500,
7
- "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -758,6 +758,156 @@
758
  "eval_samples_per_second": 25.34,
759
  "eval_steps_per_second": 1.588,
760
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  }
762
  ],
763
  "logging_steps": 25,
@@ -772,7 +922,7 @@
772
  "early_stopping_threshold": 0.0
773
  },
774
  "attributes": {
775
- "early_stopping_patience_counter": 2
776
  }
777
  },
778
  "TrainerControl": {
@@ -781,12 +931,12 @@
781
  "should_evaluate": false,
782
  "should_log": false,
783
  "should_save": true,
784
- "should_training_stop": false
785
  },
786
  "attributes": {}
787
  }
788
  },
789
- "total_flos": 3.739772456534016e+16,
790
  "train_batch_size": 32,
791
  "trial_name": null,
792
  "trial_params": null
 
2
  "best_global_step": 1500,
3
  "best_metric": 0.02954169362783432,
4
  "best_model_checkpoint": "/content/mbart-model/checkpoint-1500",
5
+ "epoch": 4.966887417218543,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
758
  "eval_samples_per_second": 25.34,
759
  "eval_steps_per_second": 1.588,
760
  "step": 2500
761
+ },
762
+ {
763
+ "epoch": 4.180463576158941,
764
+ "grad_norm": 0.10778629779815674,
765
+ "learning_rate": 9.841269841269842e-06,
766
+ "loss": 0.007042064070701599,
767
+ "step": 2525
768
+ },
769
+ {
770
+ "epoch": 4.2218543046357615,
771
+ "grad_norm": 0.108833447098732,
772
+ "learning_rate": 9.345238095238096e-06,
773
+ "loss": 0.00790070116519928,
774
+ "step": 2550
775
+ },
776
+ {
777
+ "epoch": 4.263245033112582,
778
+ "grad_norm": 0.18365369737148285,
779
+ "learning_rate": 8.84920634920635e-06,
780
+ "loss": 0.006773302555084229,
781
+ "step": 2575
782
+ },
783
+ {
784
+ "epoch": 4.304635761589404,
785
+ "grad_norm": 0.1245948076248169,
786
+ "learning_rate": 8.353174603174603e-06,
787
+ "loss": 0.007275177240371704,
788
+ "step": 2600
789
+ },
790
+ {
791
+ "epoch": 4.346026490066225,
792
+ "grad_norm": 0.14145947992801666,
793
+ "learning_rate": 7.857142857142858e-06,
794
+ "loss": 0.0075223612785339355,
795
+ "step": 2625
796
+ },
797
+ {
798
+ "epoch": 4.387417218543046,
799
+ "grad_norm": 0.1652764230966568,
800
+ "learning_rate": 7.361111111111112e-06,
801
+ "loss": 0.006786306500434876,
802
+ "step": 2650
803
+ },
804
+ {
805
+ "epoch": 4.428807947019868,
806
+ "grad_norm": 0.09350095689296722,
807
+ "learning_rate": 6.865079365079366e-06,
808
+ "loss": 0.006964877843856811,
809
+ "step": 2675
810
+ },
811
+ {
812
+ "epoch": 4.470198675496689,
813
+ "grad_norm": 0.11477820575237274,
814
+ "learning_rate": 6.369047619047619e-06,
815
+ "loss": 0.006648789644241333,
816
+ "step": 2700
817
+ },
818
+ {
819
+ "epoch": 4.51158940397351,
820
+ "grad_norm": 0.16006991267204285,
821
+ "learning_rate": 5.873015873015873e-06,
822
+ "loss": 0.007510648965835571,
823
+ "step": 2725
824
+ },
825
+ {
826
+ "epoch": 4.552980132450331,
827
+ "grad_norm": 0.08736822754144669,
828
+ "learning_rate": 5.3769841269841275e-06,
829
+ "loss": 0.00650223195552826,
830
+ "step": 2750
831
+ },
832
+ {
833
+ "epoch": 4.594370860927152,
834
+ "grad_norm": 0.11176948249340057,
835
+ "learning_rate": 4.880952380952381e-06,
836
+ "loss": 0.007162246108055115,
837
+ "step": 2775
838
+ },
839
+ {
840
+ "epoch": 4.635761589403973,
841
+ "grad_norm": 0.1488288938999176,
842
+ "learning_rate": 4.3849206349206344e-06,
843
+ "loss": 0.006841970086097718,
844
+ "step": 2800
845
+ },
846
+ {
847
+ "epoch": 4.677152317880795,
848
+ "grad_norm": 0.11193964630365372,
849
+ "learning_rate": 3.888888888888889e-06,
850
+ "loss": 0.006939524412155151,
851
+ "step": 2825
852
+ },
853
+ {
854
+ "epoch": 4.718543046357616,
855
+ "grad_norm": 0.12787118554115295,
856
+ "learning_rate": 3.3928571428571426e-06,
857
+ "loss": 0.006942141056060791,
858
+ "step": 2850
859
+ },
860
+ {
861
+ "epoch": 4.759933774834437,
862
+ "grad_norm": 0.17415784299373627,
863
+ "learning_rate": 2.896825396825397e-06,
864
+ "loss": 0.007073127031326294,
865
+ "step": 2875
866
+ },
867
+ {
868
+ "epoch": 4.801324503311259,
869
+ "grad_norm": 0.11148407310247421,
870
+ "learning_rate": 2.4007936507936512e-06,
871
+ "loss": 0.007078287005424499,
872
+ "step": 2900
873
+ },
874
+ {
875
+ "epoch": 4.8427152317880795,
876
+ "grad_norm": 0.07541561126708984,
877
+ "learning_rate": 1.9047619047619051e-06,
878
+ "loss": 0.00713414192199707,
879
+ "step": 2925
880
+ },
881
+ {
882
+ "epoch": 4.8841059602649,
883
+ "grad_norm": 0.09067052602767944,
884
+ "learning_rate": 1.4087301587301588e-06,
885
+ "loss": 0.006777424216270447,
886
+ "step": 2950
887
+ },
888
+ {
889
+ "epoch": 4.925496688741722,
890
+ "grad_norm": 0.1183658018708229,
891
+ "learning_rate": 9.126984126984128e-07,
892
+ "loss": 0.006296271085739135,
893
+ "step": 2975
894
+ },
895
+ {
896
+ "epoch": 4.966887417218543,
897
+ "grad_norm": 0.1548430323600769,
898
+ "learning_rate": 4.1666666666666667e-07,
899
+ "loss": 0.006506852507591247,
900
+ "step": 3000
901
+ },
902
+ {
903
+ "epoch": 4.966887417218543,
904
+ "eval_bleu": 64.74067661751941,
905
+ "eval_exact_match": 0.4607026439695762,
906
+ "eval_loss": 0.03365711122751236,
907
+ "eval_runtime": 101.7926,
908
+ "eval_samples_per_second": 27.124,
909
+ "eval_steps_per_second": 1.7,
910
+ "step": 3000
911
  }
912
  ],
913
  "logging_steps": 25,
 
922
  "early_stopping_threshold": 0.0
923
  },
924
  "attributes": {
925
+ "early_stopping_patience_counter": 3
926
  }
927
  },
928
  "TrainerControl": {
 
931
  "should_evaluate": false,
932
  "should_log": false,
933
  "should_save": true,
934
+ "should_training_stop": true
935
  },
936
  "attributes": {}
937
  }
938
  },
939
+ "total_flos": 4.487839157846016e+16,
940
  "train_batch_size": 32,
941
  "trial_name": null,
942
  "trial_params": null