basilkr commited on
Commit
1b90f62
·
1 Parent(s): cfa667a

Upload 17 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +2 -2
  2. pytorch_model.bin +2 -2
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +3 -252
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
- size 0
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ea4092ddde682d366002e26f501e8f2c43fa7a07916494a0a33eef1122e075
3
+ size 6111428695
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
- size 0
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2685daba978c2b8d65cb43ad777cda963664eba2acb444794a1480b6a6afd6b
3
+ size 3055754841
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c49a19341b10faeb7bf1e2f2e25fe3d9dd5140c056ce7ce4e4e5b493b4d2568
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d7e6b7df767721053ff9f6323a231cf0852ba9410e5b7d74ee3079d462119a
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:013b05fcefe78bf3963cc49dbbe22e39344ba52082d1913dcd60b29a95d1e7e8
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e769351f21ac92d172aed90d39bd27fe1d1e537736f1318d4c2465433f1bbfa
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e37efdae495a667db75aa2ae3d9a00298d25e92b65955065a8365d55e484799
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d8eb51cc9c26967d17a84e7b6820a0d26198ba86110325f72bafd766eeb13bb
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 100.0,
3
  "best_model_checkpoint": "./Malasar_ASR_DICtandLuke/checkpoint-1000",
4
- "epoch": 7.590132827324478,
5
- "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -753,260 +753,11 @@
753
  "eval_steps_per_second": 0.204,
754
  "eval_wer": 100.0,
755
  "step": 3000
756
- },
757
- {
758
- "epoch": 5.74,
759
- "learning_rate": 2.802857142857143e-06,
760
- "loss": 0.0062,
761
- "step": 3025
762
- },
763
- {
764
- "epoch": 5.79,
765
- "learning_rate": 2.7314285714285714e-06,
766
- "loss": 0.0063,
767
- "step": 3050
768
- },
769
- {
770
- "epoch": 5.83,
771
- "learning_rate": 2.6600000000000004e-06,
772
- "loss": 0.0072,
773
- "step": 3075
774
- },
775
- {
776
- "epoch": 5.88,
777
- "learning_rate": 2.5885714285714285e-06,
778
- "loss": 0.0047,
779
- "step": 3100
780
- },
781
- {
782
- "epoch": 5.93,
783
- "learning_rate": 2.5171428571428575e-06,
784
- "loss": 0.0078,
785
- "step": 3125
786
- },
787
- {
788
- "epoch": 5.98,
789
- "learning_rate": 2.445714285714286e-06,
790
- "loss": 0.0069,
791
- "step": 3150
792
- },
793
- {
794
- "epoch": 6.02,
795
- "learning_rate": 2.3742857142857147e-06,
796
- "loss": 0.0039,
797
- "step": 3175
798
- },
799
- {
800
- "epoch": 6.07,
801
- "learning_rate": 2.302857142857143e-06,
802
- "loss": 0.0035,
803
- "step": 3200
804
- },
805
- {
806
- "epoch": 6.12,
807
- "learning_rate": 2.2314285714285715e-06,
808
- "loss": 0.0043,
809
- "step": 3225
810
- },
811
- {
812
- "epoch": 6.17,
813
- "learning_rate": 2.16e-06,
814
- "loss": 0.0045,
815
- "step": 3250
816
- },
817
- {
818
- "epoch": 6.21,
819
- "learning_rate": 2.0885714285714287e-06,
820
- "loss": 0.0023,
821
- "step": 3275
822
- },
823
- {
824
- "epoch": 6.26,
825
- "learning_rate": 2.0171428571428573e-06,
826
- "loss": 0.0031,
827
- "step": 3300
828
- },
829
- {
830
- "epoch": 6.31,
831
- "learning_rate": 1.945714285714286e-06,
832
- "loss": 0.0028,
833
- "step": 3325
834
- },
835
- {
836
- "epoch": 6.36,
837
- "learning_rate": 1.8742857142857142e-06,
838
- "loss": 0.0053,
839
- "step": 3350
840
- },
841
- {
842
- "epoch": 6.4,
843
- "learning_rate": 1.8028571428571432e-06,
844
- "loss": 0.0016,
845
- "step": 3375
846
- },
847
- {
848
- "epoch": 6.45,
849
- "learning_rate": 1.7314285714285716e-06,
850
- "loss": 0.0019,
851
- "step": 3400
852
- },
853
- {
854
- "epoch": 6.5,
855
- "learning_rate": 1.6600000000000002e-06,
856
- "loss": 0.0038,
857
- "step": 3425
858
- },
859
- {
860
- "epoch": 6.55,
861
- "learning_rate": 1.5885714285714288e-06,
862
- "loss": 0.0028,
863
- "step": 3450
864
- },
865
- {
866
- "epoch": 6.59,
867
- "learning_rate": 1.5171428571428574e-06,
868
- "loss": 0.0047,
869
- "step": 3475
870
- },
871
- {
872
- "epoch": 6.64,
873
- "learning_rate": 1.4457142857142858e-06,
874
- "loss": 0.0041,
875
- "step": 3500
876
- },
877
- {
878
- "epoch": 6.69,
879
- "learning_rate": 1.3742857142857143e-06,
880
- "loss": 0.0015,
881
- "step": 3525
882
- },
883
- {
884
- "epoch": 6.74,
885
- "learning_rate": 1.302857142857143e-06,
886
- "loss": 0.002,
887
- "step": 3550
888
- },
889
- {
890
- "epoch": 6.78,
891
- "learning_rate": 1.2314285714285715e-06,
892
- "loss": 0.007,
893
- "step": 3575
894
- },
895
- {
896
- "epoch": 6.83,
897
- "learning_rate": 1.1600000000000001e-06,
898
- "loss": 0.0023,
899
- "step": 3600
900
- },
901
- {
902
- "epoch": 6.88,
903
- "learning_rate": 1.0885714285714287e-06,
904
- "loss": 0.0023,
905
- "step": 3625
906
- },
907
- {
908
- "epoch": 6.93,
909
- "learning_rate": 1.0171428571428573e-06,
910
- "loss": 0.0022,
911
- "step": 3650
912
- },
913
- {
914
- "epoch": 6.97,
915
- "learning_rate": 9.457142857142858e-07,
916
- "loss": 0.0034,
917
- "step": 3675
918
- },
919
- {
920
- "epoch": 7.02,
921
- "learning_rate": 8.742857142857144e-07,
922
- "loss": 0.0057,
923
- "step": 3700
924
- },
925
- {
926
- "epoch": 7.07,
927
- "learning_rate": 8.028571428571429e-07,
928
- "loss": 0.0009,
929
- "step": 3725
930
- },
931
- {
932
- "epoch": 7.12,
933
- "learning_rate": 7.314285714285715e-07,
934
- "loss": 0.003,
935
- "step": 3750
936
- },
937
- {
938
- "epoch": 7.16,
939
- "learning_rate": 6.6e-07,
940
- "loss": 0.0037,
941
- "step": 3775
942
- },
943
- {
944
- "epoch": 7.21,
945
- "learning_rate": 5.885714285714286e-07,
946
- "loss": 0.0007,
947
- "step": 3800
948
- },
949
- {
950
- "epoch": 7.26,
951
- "learning_rate": 5.171428571428572e-07,
952
- "loss": 0.0009,
953
- "step": 3825
954
- },
955
- {
956
- "epoch": 7.31,
957
- "learning_rate": 4.457142857142858e-07,
958
- "loss": 0.0008,
959
- "step": 3850
960
- },
961
- {
962
- "epoch": 7.35,
963
- "learning_rate": 3.7428571428571434e-07,
964
- "loss": 0.0019,
965
- "step": 3875
966
- },
967
- {
968
- "epoch": 7.4,
969
- "learning_rate": 3.028571428571429e-07,
970
- "loss": 0.001,
971
- "step": 3900
972
- },
973
- {
974
- "epoch": 7.45,
975
- "learning_rate": 2.3142857142857144e-07,
976
- "loss": 0.0011,
977
- "step": 3925
978
- },
979
- {
980
- "epoch": 7.5,
981
- "learning_rate": 1.6e-07,
982
- "loss": 0.0031,
983
- "step": 3950
984
- },
985
- {
986
- "epoch": 7.54,
987
- "learning_rate": 8.857142857142858e-08,
988
- "loss": 0.002,
989
- "step": 3975
990
- },
991
- {
992
- "epoch": 7.59,
993
- "learning_rate": 1.7142857142857143e-08,
994
- "loss": 0.002,
995
- "step": 4000
996
- },
997
- {
998
- "epoch": 7.59,
999
- "eval_loss": 0.09421070665121078,
1000
- "eval_runtime": 575.1216,
1001
- "eval_samples_per_second": 3.257,
1002
- "eval_steps_per_second": 0.205,
1003
- "eval_wer": 100.0,
1004
- "step": 4000
1005
  }
1006
  ],
1007
  "max_steps": 4000,
1008
  "num_train_epochs": 8,
1009
- "total_flos": 1.3053740888162304e+20,
1010
  "trial_name": null,
1011
  "trial_params": null
1012
  }
 
1
  {
2
  "best_metric": 100.0,
3
  "best_model_checkpoint": "./Malasar_ASR_DICtandLuke/checkpoint-1000",
4
+ "epoch": 5.692599620493358,
5
+ "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
753
  "eval_steps_per_second": 0.204,
754
  "eval_wer": 100.0,
755
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
  }
757
  ],
758
  "max_steps": 4000,
759
  "num_train_epochs": 8,
760
+ "total_flos": 9.79066287783936e+19,
761
  "trial_name": null,
762
  "trial_params": null
763
  }