YeBhoneLin10 commited on
Commit
75b7409
·
verified ·
1 Parent(s): c59a336

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32009f61138d121877590a771ea7b59590e77167dcc592bddce1d3028adcb775
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948197f12c590c0dda47ebc1917b48df09d620ce9166a3a391d7061f05435036
3
  size 151061672
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b473282ad7d99494e8d3d04acf1a70c7329a2fe95cff6e4f1efcb04522160a9
3
  size 297616186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f16ff8a7474b79d86fe40d0ca22c7f7ed33fcee156c67270373e05fb5778508
3
  size 297616186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e01a4900daa8e0003556007ff54cc9f8fc33170bd737bbc2836da3135ce6440
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b63186d5b05381af62b3d7173fde5bc46daf7e9a7d51b3c424f534f7bee96e2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d069d021e3ca87c952c1cbefefa02430980ec1efa75a58a8bd98140e99e3bc1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5638f21d0f0354e92b9a9f8dfa6255986109c390d6b1c6955ecb1e5acd3b9eab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 64.5190036982578,
3
- "best_model_checkpoint": "./checkpoint-3000",
4
- "epoch": 15.015,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -874,6 +874,295 @@
874
  "eval_steps_per_second": 0.223,
875
  "eval_wer": 64.5190036982578,
876
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
  }
878
  ],
879
  "logging_steps": 25,
@@ -893,7 +1182,7 @@
893
  "attributes": {}
894
  }
895
  },
896
- "total_flos": 2.3541808896e+18,
897
  "train_batch_size": 32,
898
  "trial_name": null,
899
  "trial_params": null
 
1
  {
2
+ "best_metric": 63.24466426057123,
3
+ "best_model_checkpoint": "./checkpoint-4000",
4
+ "epoch": 20.02,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
874
  "eval_steps_per_second": 0.223,
875
  "eval_wer": 64.5190036982578,
876
  "step": 3000
877
+ },
878
+ {
879
+ "epoch": 15.02,
880
+ "grad_norm": 5.850539207458496,
881
+ "learning_rate": 4.395555555555556e-06,
882
+ "loss": 0.0914,
883
+ "step": 3025
884
+ },
885
+ {
886
+ "epoch": 15.025,
887
+ "grad_norm": 4.085538387298584,
888
+ "learning_rate": 4.34e-06,
889
+ "loss": 0.1051,
890
+ "step": 3050
891
+ },
892
+ {
893
+ "epoch": 15.03,
894
+ "grad_norm": 3.830420732498169,
895
+ "learning_rate": 4.284444444444445e-06,
896
+ "loss": 0.0627,
897
+ "step": 3075
898
+ },
899
+ {
900
+ "epoch": 15.035,
901
+ "grad_norm": 4.171704292297363,
902
+ "learning_rate": 4.228888888888889e-06,
903
+ "loss": 0.072,
904
+ "step": 3100
905
+ },
906
+ {
907
+ "epoch": 16.001,
908
+ "grad_norm": 6.426929950714111,
909
+ "learning_rate": 4.173333333333334e-06,
910
+ "loss": 0.0768,
911
+ "step": 3125
912
+ },
913
+ {
914
+ "epoch": 16.006,
915
+ "grad_norm": 2.9023313522338867,
916
+ "learning_rate": 4.117777777777779e-06,
917
+ "loss": 0.093,
918
+ "step": 3150
919
+ },
920
+ {
921
+ "epoch": 16.011,
922
+ "grad_norm": 3.1878390312194824,
923
+ "learning_rate": 4.062222222222223e-06,
924
+ "loss": 0.0799,
925
+ "step": 3175
926
+ },
927
+ {
928
+ "epoch": 16.016,
929
+ "grad_norm": 3.0205600261688232,
930
+ "learning_rate": 4.006666666666667e-06,
931
+ "loss": 0.0809,
932
+ "step": 3200
933
+ },
934
+ {
935
+ "epoch": 16.021,
936
+ "grad_norm": 4.082855701446533,
937
+ "learning_rate": 3.951111111111112e-06,
938
+ "loss": 0.0905,
939
+ "step": 3225
940
+ },
941
+ {
942
+ "epoch": 16.026,
943
+ "grad_norm": 3.3701670169830322,
944
+ "learning_rate": 3.895555555555556e-06,
945
+ "loss": 0.0874,
946
+ "step": 3250
947
+ },
948
+ {
949
+ "epoch": 16.031,
950
+ "grad_norm": 4.763386249542236,
951
+ "learning_rate": 3.8400000000000005e-06,
952
+ "loss": 0.0602,
953
+ "step": 3275
954
+ },
955
+ {
956
+ "epoch": 16.036,
957
+ "grad_norm": 3.008446216583252,
958
+ "learning_rate": 3.784444444444445e-06,
959
+ "loss": 0.0632,
960
+ "step": 3300
961
+ },
962
+ {
963
+ "epoch": 17.002,
964
+ "grad_norm": 4.296396732330322,
965
+ "learning_rate": 3.728888888888889e-06,
966
+ "loss": 0.0774,
967
+ "step": 3325
968
+ },
969
+ {
970
+ "epoch": 17.007,
971
+ "grad_norm": 2.4229137897491455,
972
+ "learning_rate": 3.673333333333334e-06,
973
+ "loss": 0.0821,
974
+ "step": 3350
975
+ },
976
+ {
977
+ "epoch": 17.012,
978
+ "grad_norm": 3.1641666889190674,
979
+ "learning_rate": 3.617777777777778e-06,
980
+ "loss": 0.0744,
981
+ "step": 3375
982
+ },
983
+ {
984
+ "epoch": 17.017,
985
+ "grad_norm": 3.2084319591522217,
986
+ "learning_rate": 3.5622222222222224e-06,
987
+ "loss": 0.0749,
988
+ "step": 3400
989
+ },
990
+ {
991
+ "epoch": 17.022,
992
+ "grad_norm": 3.579460620880127,
993
+ "learning_rate": 3.5066666666666673e-06,
994
+ "loss": 0.089,
995
+ "step": 3425
996
+ },
997
+ {
998
+ "epoch": 17.027,
999
+ "grad_norm": 4.045797824859619,
1000
+ "learning_rate": 3.4511111111111113e-06,
1001
+ "loss": 0.073,
1002
+ "step": 3450
1003
+ },
1004
+ {
1005
+ "epoch": 17.032,
1006
+ "grad_norm": 4.456791400909424,
1007
+ "learning_rate": 3.3955555555555558e-06,
1008
+ "loss": 0.0558,
1009
+ "step": 3475
1010
+ },
1011
+ {
1012
+ "epoch": 17.037,
1013
+ "grad_norm": 4.2783203125,
1014
+ "learning_rate": 3.3400000000000006e-06,
1015
+ "loss": 0.0571,
1016
+ "step": 3500
1017
+ },
1018
+ {
1019
+ "epoch": 18.003,
1020
+ "grad_norm": 2.9448039531707764,
1021
+ "learning_rate": 3.2844444444444447e-06,
1022
+ "loss": 0.0781,
1023
+ "step": 3525
1024
+ },
1025
+ {
1026
+ "epoch": 18.008,
1027
+ "grad_norm": 2.74501371383667,
1028
+ "learning_rate": 3.228888888888889e-06,
1029
+ "loss": 0.0748,
1030
+ "step": 3550
1031
+ },
1032
+ {
1033
+ "epoch": 18.013,
1034
+ "grad_norm": 2.291656494140625,
1035
+ "learning_rate": 3.173333333333334e-06,
1036
+ "loss": 0.0692,
1037
+ "step": 3575
1038
+ },
1039
+ {
1040
+ "epoch": 18.018,
1041
+ "grad_norm": 2.8000614643096924,
1042
+ "learning_rate": 3.117777777777778e-06,
1043
+ "loss": 0.0696,
1044
+ "step": 3600
1045
+ },
1046
+ {
1047
+ "epoch": 18.023,
1048
+ "grad_norm": 3.9665687084198,
1049
+ "learning_rate": 3.0622222222222225e-06,
1050
+ "loss": 0.0883,
1051
+ "step": 3625
1052
+ },
1053
+ {
1054
+ "epoch": 18.028,
1055
+ "grad_norm": 3.8955137729644775,
1056
+ "learning_rate": 3.0066666666666674e-06,
1057
+ "loss": 0.063,
1058
+ "step": 3650
1059
+ },
1060
+ {
1061
+ "epoch": 18.033,
1062
+ "grad_norm": 3.781052827835083,
1063
+ "learning_rate": 2.9511111111111114e-06,
1064
+ "loss": 0.0479,
1065
+ "step": 3675
1066
+ },
1067
+ {
1068
+ "epoch": 18.038,
1069
+ "grad_norm": 3.0369510650634766,
1070
+ "learning_rate": 2.895555555555556e-06,
1071
+ "loss": 0.0542,
1072
+ "step": 3700
1073
+ },
1074
+ {
1075
+ "epoch": 19.004,
1076
+ "grad_norm": 3.2044124603271484,
1077
+ "learning_rate": 2.84e-06,
1078
+ "loss": 0.0774,
1079
+ "step": 3725
1080
+ },
1081
+ {
1082
+ "epoch": 19.009,
1083
+ "grad_norm": 3.152061939239502,
1084
+ "learning_rate": 2.784444444444445e-06,
1085
+ "loss": 0.0698,
1086
+ "step": 3750
1087
+ },
1088
+ {
1089
+ "epoch": 19.014,
1090
+ "grad_norm": 2.7949397563934326,
1091
+ "learning_rate": 2.7288888888888893e-06,
1092
+ "loss": 0.0661,
1093
+ "step": 3775
1094
+ },
1095
+ {
1096
+ "epoch": 19.019,
1097
+ "grad_norm": 2.393399477005005,
1098
+ "learning_rate": 2.6733333333333333e-06,
1099
+ "loss": 0.0644,
1100
+ "step": 3800
1101
+ },
1102
+ {
1103
+ "epoch": 19.024,
1104
+ "grad_norm": 3.772813558578491,
1105
+ "learning_rate": 2.617777777777778e-06,
1106
+ "loss": 0.0869,
1107
+ "step": 3825
1108
+ },
1109
+ {
1110
+ "epoch": 19.029,
1111
+ "grad_norm": 3.960970401763916,
1112
+ "learning_rate": 2.5622222222222226e-06,
1113
+ "loss": 0.0521,
1114
+ "step": 3850
1115
+ },
1116
+ {
1117
+ "epoch": 19.034,
1118
+ "grad_norm": 4.602725505828857,
1119
+ "learning_rate": 2.5066666666666667e-06,
1120
+ "loss": 0.0473,
1121
+ "step": 3875
1122
+ },
1123
+ {
1124
+ "epoch": 19.039,
1125
+ "grad_norm": 0.9137289524078369,
1126
+ "learning_rate": 2.451111111111111e-06,
1127
+ "loss": 0.0497,
1128
+ "step": 3900
1129
+ },
1130
+ {
1131
+ "epoch": 20.005,
1132
+ "grad_norm": 3.5569727420806885,
1133
+ "learning_rate": 2.3955555555555556e-06,
1134
+ "loss": 0.0774,
1135
+ "step": 3925
1136
+ },
1137
+ {
1138
+ "epoch": 20.01,
1139
+ "grad_norm": 2.2422502040863037,
1140
+ "learning_rate": 2.3400000000000005e-06,
1141
+ "loss": 0.0629,
1142
+ "step": 3950
1143
+ },
1144
+ {
1145
+ "epoch": 20.015,
1146
+ "grad_norm": 2.4179162979125977,
1147
+ "learning_rate": 2.2844444444444445e-06,
1148
+ "loss": 0.0637,
1149
+ "step": 3975
1150
+ },
1151
+ {
1152
+ "epoch": 20.02,
1153
+ "grad_norm": 3.352149724960327,
1154
+ "learning_rate": 2.228888888888889e-06,
1155
+ "loss": 0.0655,
1156
+ "step": 4000
1157
+ },
1158
+ {
1159
+ "epoch": 20.02,
1160
+ "eval_loss": 0.20272822678089142,
1161
+ "eval_runtime": 353.3869,
1162
+ "eval_samples_per_second": 1.803,
1163
+ "eval_steps_per_second": 0.226,
1164
+ "eval_wer": 63.24466426057123,
1165
+ "step": 4000
1166
  }
1167
  ],
1168
  "logging_steps": 25,
 
1182
  "attributes": {}
1183
  }
1184
  },
1185
+ "total_flos": 3.1389078528e+18,
1186
  "train_batch_size": 32,
1187
  "trial_name": null,
1188
  "trial_params": null