jflotz commited on
Commit
f47a032
·
1 Parent(s): e2d8aa0

Training in progress, step 60000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a806770a6d25518cb1674ae526f6d236de399a295d5a453821ffdc0e6a41627
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a5b6e2d3a240e31f2407589b74bf56102df3cd6db72efc78606028852235e7
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175c9af2cd55b51e9df8d727ba8d18aab140807b11f81fcd4adde2c5741e4d30
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a9d1af9969e324f9fedc078ab1e1ab334bc4c8eeeb0d4b38445a40029af3cf3
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ce3108e385660090a22247b2e131088c6273bea93f4243061660df0632b29
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b8d45ebb28c7b9f23a6abeafd90122c1ed22446a846f1cf2ac94e95c51e1adb
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90ff94ce099b109f6c343c1450c170171a247badda4343ab1850180869cf03e2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5576436211146181,
5
- "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1006,11 +1006,211 @@
1006
  "eval_samples_per_second": 950.212,
1007
  "eval_steps_per_second": 14.892,
1008
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1009
  }
1010
  ],
1011
  "max_steps": 1000000,
1012
  "num_train_epochs": 12,
1013
- "total_flos": 3.505013051517357e+21,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6691723453375417,
5
+ "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1006
  "eval_samples_per_second": 950.212,
1007
  "eval_steps_per_second": 14.892,
1008
  "step": 50000
1009
+ },
1010
+ {
1011
+ "epoch": 0.56,
1012
+ "learning_rate": 0.00014999990431133645,
1013
+ "loss": 0.3579,
1014
+ "step": 50500
1015
+ },
1016
+ {
1017
+ "epoch": 0.57,
1018
+ "learning_rate": 0.0001499996172456075,
1019
+ "loss": 0.3563,
1020
+ "step": 51000
1021
+ },
1022
+ {
1023
+ "epoch": 0.57,
1024
+ "eval_loss": 0.3443795442581177,
1025
+ "eval_runtime": 2.3466,
1026
+ "eval_samples_per_second": 978.854,
1027
+ "eval_steps_per_second": 15.341,
1028
+ "step": 51000
1029
+ },
1030
+ {
1031
+ "epoch": 0.57,
1032
+ "learning_rate": 0.00014999913880359787,
1033
+ "loss": 0.3547,
1034
+ "step": 51500
1035
+ },
1036
+ {
1037
+ "epoch": 0.58,
1038
+ "learning_rate": 0.00014999846898661572,
1039
+ "loss": 0.353,
1040
+ "step": 52000
1041
+ },
1042
+ {
1043
+ "epoch": 0.58,
1044
+ "eval_loss": 0.34043800830841064,
1045
+ "eval_runtime": 2.4046,
1046
+ "eval_samples_per_second": 955.259,
1047
+ "eval_steps_per_second": 14.971,
1048
+ "step": 52000
1049
+ },
1050
+ {
1051
+ "epoch": 0.59,
1052
+ "learning_rate": 0.00014999760779649222,
1053
+ "loss": 0.3516,
1054
+ "step": 52500
1055
+ },
1056
+ {
1057
+ "epoch": 0.59,
1058
+ "learning_rate": 0.00014999655523558183,
1059
+ "loss": 0.3502,
1060
+ "step": 53000
1061
+ },
1062
+ {
1063
+ "epoch": 0.59,
1064
+ "eval_loss": 0.3358021378517151,
1065
+ "eval_runtime": 2.3861,
1066
+ "eval_samples_per_second": 962.643,
1067
+ "eval_steps_per_second": 15.087,
1068
+ "step": 53000
1069
+ },
1070
+ {
1071
+ "epoch": 0.6,
1072
+ "learning_rate": 0.00014999531130676229,
1073
+ "loss": 0.3491,
1074
+ "step": 53500
1075
+ },
1076
+ {
1077
+ "epoch": 0.6,
1078
+ "learning_rate": 0.00014999387601343436,
1079
+ "loss": 0.3473,
1080
+ "step": 54000
1081
+ },
1082
+ {
1083
+ "epoch": 0.6,
1084
+ "eval_loss": 0.3330630362033844,
1085
+ "eval_runtime": 2.3348,
1086
+ "eval_samples_per_second": 983.811,
1087
+ "eval_steps_per_second": 15.419,
1088
+ "step": 54000
1089
+ },
1090
+ {
1091
+ "epoch": 0.61,
1092
+ "learning_rate": 0.00014999224935952215,
1093
+ "loss": 0.3463,
1094
+ "step": 54500
1095
+ },
1096
+ {
1097
+ "epoch": 0.61,
1098
+ "learning_rate": 0.00014999043134947282,
1099
+ "loss": 0.3445,
1100
+ "step": 55000
1101
+ },
1102
+ {
1103
+ "epoch": 0.61,
1104
+ "eval_loss": 0.33030495047569275,
1105
+ "eval_runtime": 2.318,
1106
+ "eval_samples_per_second": 990.921,
1107
+ "eval_steps_per_second": 15.53,
1108
+ "step": 55000
1109
+ },
1110
+ {
1111
+ "epoch": 0.62,
1112
+ "learning_rate": 0.00014998842198825674,
1113
+ "loss": 0.3434,
1114
+ "step": 55500
1115
+ },
1116
+ {
1117
+ "epoch": 0.62,
1118
+ "learning_rate": 0.00014998622128136748,
1119
+ "loss": 0.342,
1120
+ "step": 56000
1121
+ },
1122
+ {
1123
+ "epoch": 0.62,
1124
+ "eval_loss": 0.3295433819293976,
1125
+ "eval_runtime": 2.3042,
1126
+ "eval_samples_per_second": 996.891,
1127
+ "eval_steps_per_second": 15.624,
1128
+ "step": 56000
1129
+ },
1130
+ {
1131
+ "epoch": 0.63,
1132
+ "learning_rate": 0.00014998382923482164,
1133
+ "loss": 0.3411,
1134
+ "step": 56500
1135
+ },
1136
+ {
1137
+ "epoch": 0.64,
1138
+ "learning_rate": 0.000149981245855159,
1139
+ "loss": 0.3396,
1140
+ "step": 57000
1141
+ },
1142
+ {
1143
+ "epoch": 0.64,
1144
+ "eval_loss": 0.32805606722831726,
1145
+ "eval_runtime": 2.4137,
1146
+ "eval_samples_per_second": 951.659,
1147
+ "eval_steps_per_second": 14.915,
1148
+ "step": 57000
1149
+ },
1150
+ {
1151
+ "epoch": 0.64,
1152
+ "learning_rate": 0.00014997847114944242,
1153
+ "loss": 0.3383,
1154
+ "step": 57500
1155
+ },
1156
+ {
1157
+ "epoch": 0.65,
1158
+ "learning_rate": 0.00014997550512525784,
1159
+ "loss": 0.3374,
1160
+ "step": 58000
1161
+ },
1162
+ {
1163
+ "epoch": 0.65,
1164
+ "eval_loss": 0.3256905674934387,
1165
+ "eval_runtime": 2.4211,
1166
+ "eval_samples_per_second": 948.748,
1167
+ "eval_steps_per_second": 14.869,
1168
+ "step": 58000
1169
+ },
1170
+ {
1171
+ "epoch": 0.65,
1172
+ "learning_rate": 0.00014997234779071426,
1173
+ "loss": 0.3365,
1174
+ "step": 58500
1175
+ },
1176
+ {
1177
+ "epoch": 0.66,
1178
+ "learning_rate": 0.0001499689991544437,
1179
+ "loss": 0.3353,
1180
+ "step": 59000
1181
+ },
1182
+ {
1183
+ "epoch": 0.66,
1184
+ "eval_loss": 0.32548126578330994,
1185
+ "eval_runtime": 2.4309,
1186
+ "eval_samples_per_second": 944.932,
1187
+ "eval_steps_per_second": 14.81,
1188
+ "step": 59000
1189
+ },
1190
+ {
1191
+ "epoch": 0.66,
1192
+ "learning_rate": 0.0001499654592256012,
1193
+ "loss": 0.3342,
1194
+ "step": 59500
1195
+ },
1196
+ {
1197
+ "epoch": 0.67,
1198
+ "learning_rate": 0.00014996172801386482,
1199
+ "loss": 0.3333,
1200
+ "step": 60000
1201
+ },
1202
+ {
1203
+ "epoch": 0.67,
1204
+ "eval_loss": 0.3190021514892578,
1205
+ "eval_runtime": 2.4381,
1206
+ "eval_samples_per_second": 942.115,
1207
+ "eval_steps_per_second": 14.765,
1208
+ "step": 60000
1209
  }
1210
  ],
1211
  "max_steps": 1000000,
1212
  "num_train_epochs": 12,
1213
+ "total_flos": 4.2060156618208287e+21,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175c9af2cd55b51e9df8d727ba8d18aab140807b11f81fcd4adde2c5741e4d30
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a9d1af9969e324f9fedc078ab1e1ab334bc4c8eeeb0d4b38445a40029af3cf3
3
  size 449471589