Training in progress, step 16000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3e3cd7750a71ba24a73246f69fdf7daec1177ea5853eea232a257d18883c36c
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1da40d74928c909c113322ad282f56feadf39a270ef5d886cdca23750487bd7a
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d89c80e2c9bcd130c179737ff89fd355d5633e2baca37c40e9a81d122a9d5d9
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:281dc54ed0520d353628c056d22e94e782117e47679a149519cb09d64d5041fb
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e08be80793261ac03b00ebc0b1eac4cdf6646c0ac612f93a04f7f6b012b5292c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 14000,
|
| 3 |
"best_metric": 0.18538166814028884,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-14000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1193,6 +1193,85 @@
|
|
| 1193 |
"eval_steps_per_second": 0.426,
|
| 1194 |
"eval_wer": 0.19834954317712938,
|
| 1195 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
}
|
| 1197 |
],
|
| 1198 |
"logging_steps": 100,
|
|
@@ -1212,7 +1291,7 @@
|
|
| 1212 |
"attributes": {}
|
| 1213 |
}
|
| 1214 |
},
|
| 1215 |
-
"total_flos":
|
| 1216 |
"train_batch_size": 8,
|
| 1217 |
"trial_name": null,
|
| 1218 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 14000,
|
| 3 |
"best_metric": 0.18538166814028884,
|
| 4 |
"best_model_checkpoint": "./distil-whisper/checkpoint-14000",
|
| 5 |
+
"epoch": 9.373169302870533,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 16000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1193 |
"eval_steps_per_second": 0.426,
|
| 1194 |
"eval_wer": 0.19834954317712938,
|
| 1195 |
"step": 15000
|
| 1196 |
+
},
|
| 1197 |
+
{
|
| 1198 |
+
"epoch": 8.845928529584066,
|
| 1199 |
+
"grad_norm": 6.900521278381348,
|
| 1200 |
+
"learning_rate": 1.1949305974652989e-05,
|
| 1201 |
+
"loss": 0.8797,
|
| 1202 |
+
"step": 15100
|
| 1203 |
+
},
|
| 1204 |
+
{
|
| 1205 |
+
"epoch": 8.904510837727006,
|
| 1206 |
+
"grad_norm": 13.14035701751709,
|
| 1207 |
+
"learning_rate": 1.1345805672902837e-05,
|
| 1208 |
+
"loss": 0.8691,
|
| 1209 |
+
"step": 15200
|
| 1210 |
+
},
|
| 1211 |
+
{
|
| 1212 |
+
"epoch": 8.963093145869948,
|
| 1213 |
+
"grad_norm": 6.80872106552124,
|
| 1214 |
+
"learning_rate": 1.0742305371152686e-05,
|
| 1215 |
+
"loss": 0.859,
|
| 1216 |
+
"step": 15300
|
| 1217 |
+
},
|
| 1218 |
+
{
|
| 1219 |
+
"epoch": 9.021675454012888,
|
| 1220 |
+
"grad_norm": 5.7985520362854,
|
| 1221 |
+
"learning_rate": 1.0138805069402535e-05,
|
| 1222 |
+
"loss": 0.8905,
|
| 1223 |
+
"step": 15400
|
| 1224 |
+
},
|
| 1225 |
+
{
|
| 1226 |
+
"epoch": 9.08025776215583,
|
| 1227 |
+
"grad_norm": 7.384444236755371,
|
| 1228 |
+
"learning_rate": 9.535304767652383e-06,
|
| 1229 |
+
"loss": 0.7981,
|
| 1230 |
+
"step": 15500
|
| 1231 |
+
},
|
| 1232 |
+
{
|
| 1233 |
+
"epoch": 9.13884007029877,
|
| 1234 |
+
"grad_norm": 6.441751956939697,
|
| 1235 |
+
"learning_rate": 8.931804465902233e-06,
|
| 1236 |
+
"loss": 0.9026,
|
| 1237 |
+
"step": 15600
|
| 1238 |
+
},
|
| 1239 |
+
{
|
| 1240 |
+
"epoch": 9.197422378441711,
|
| 1241 |
+
"grad_norm": 5.471485614776611,
|
| 1242 |
+
"learning_rate": 8.328304164152082e-06,
|
| 1243 |
+
"loss": 0.8357,
|
| 1244 |
+
"step": 15700
|
| 1245 |
+
},
|
| 1246 |
+
{
|
| 1247 |
+
"epoch": 9.256004686584651,
|
| 1248 |
+
"grad_norm": 6.093921661376953,
|
| 1249 |
+
"learning_rate": 7.724803862401932e-06,
|
| 1250 |
+
"loss": 0.8427,
|
| 1251 |
+
"step": 15800
|
| 1252 |
+
},
|
| 1253 |
+
{
|
| 1254 |
+
"epoch": 9.314586994727593,
|
| 1255 |
+
"grad_norm": 5.414072036743164,
|
| 1256 |
+
"learning_rate": 7.121303560651781e-06,
|
| 1257 |
+
"loss": 0.8235,
|
| 1258 |
+
"step": 15900
|
| 1259 |
+
},
|
| 1260 |
+
{
|
| 1261 |
+
"epoch": 9.373169302870533,
|
| 1262 |
+
"grad_norm": 5.2771897315979,
|
| 1263 |
+
"learning_rate": 6.5178032589016296e-06,
|
| 1264 |
+
"loss": 0.8363,
|
| 1265 |
+
"step": 16000
|
| 1266 |
+
},
|
| 1267 |
+
{
|
| 1268 |
+
"epoch": 9.373169302870533,
|
| 1269 |
+
"eval_loss": 0.08421996235847473,
|
| 1270 |
+
"eval_runtime": 150.6901,
|
| 1271 |
+
"eval_samples_per_second": 3.318,
|
| 1272 |
+
"eval_steps_per_second": 0.418,
|
| 1273 |
+
"eval_wer": 0.19820218096080164,
|
| 1274 |
+
"step": 16000
|
| 1275 |
}
|
| 1276 |
],
|
| 1277 |
"logging_steps": 100,
|
|
|
|
| 1291 |
"attributes": {}
|
| 1292 |
}
|
| 1293 |
},
|
| 1294 |
+
"total_flos": 2.081167965683712e+19,
|
| 1295 |
"train_batch_size": 8,
|
| 1296 |
"trial_name": null,
|
| 1297 |
"trial_params": null
|