Training in progress, step 18000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 223144592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb48d4529d566b344fe7367bac97351c603bbd85ca9f61087d4dc319f1ae6495
|
| 3 |
size 223144592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 281574266
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c75691a1ce258d234722c2d4ab3b85571a4565b6f8c306d9e4e850037046468b
|
| 3 |
size 281574266
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75bd767a73465b16f4ffdfb35ee15891c86eba958fe1f44fc879bf90de6c06cf
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:640a2c1972f8c0eea0b1816d3a5ded6495205b53dbb31dc9c8bee27a5b36d529
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332f9130d37f9b28f027125f3d3e057edade6877a2990aa73c37f287b89e9652
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./distil-whisper/checkpoint-
|
| 5 |
-
"epoch": 10.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1351,12 +1351,91 @@
|
|
| 1351 |
"eval_steps_per_second": 0.435,
|
| 1352 |
"eval_wer": 0.19112879457707044,
|
| 1353 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
}
|
| 1355 |
],
|
| 1356 |
"logging_steps": 100,
|
| 1357 |
-
"max_steps":
|
| 1358 |
"num_input_tokens_seen": 0,
|
| 1359 |
-
"num_train_epochs":
|
| 1360 |
"save_steps": 1000,
|
| 1361 |
"stateful_callbacks": {
|
| 1362 |
"TrainerControl": {
|
|
@@ -1365,12 +1444,12 @@
|
|
| 1365 |
"should_evaluate": false,
|
| 1366 |
"should_log": false,
|
| 1367 |
"should_save": true,
|
| 1368 |
-
"should_training_stop":
|
| 1369 |
},
|
| 1370 |
"attributes": {}
|
| 1371 |
}
|
| 1372 |
},
|
| 1373 |
-
"total_flos": 2.
|
| 1374 |
"train_batch_size": 8,
|
| 1375 |
"trial_name": null,
|
| 1376 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 18000,
|
| 3 |
+
"best_metric": 0.1865605658709107,
|
| 4 |
+
"best_model_checkpoint": "./distil-whisper/checkpoint-18000",
|
| 5 |
+
"epoch": 10.54481546572935,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 18000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1351 |
"eval_steps_per_second": 0.435,
|
| 1352 |
"eval_wer": 0.19112879457707044,
|
| 1353 |
"step": 17000
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 10.017574692442881,
|
| 1357 |
+
"grad_norm": 7.963376045227051,
|
| 1358 |
+
"learning_rate": 3.392152957578172e-05,
|
| 1359 |
+
"loss": 0.8347,
|
| 1360 |
+
"step": 17100
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 10.076157000585823,
|
| 1364 |
+
"grad_norm": 6.854780673980713,
|
| 1365 |
+
"learning_rate": 3.352320254929297e-05,
|
| 1366 |
+
"loss": 0.8392,
|
| 1367 |
+
"step": 17200
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 10.134739308728763,
|
| 1371 |
+
"grad_norm": 7.880845069885254,
|
| 1372 |
+
"learning_rate": 3.312487552280422e-05,
|
| 1373 |
+
"loss": 0.8556,
|
| 1374 |
+
"step": 17300
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 10.193321616871705,
|
| 1378 |
+
"grad_norm": 16.49683952331543,
|
| 1379 |
+
"learning_rate": 3.2726548496315476e-05,
|
| 1380 |
+
"loss": 0.9433,
|
| 1381 |
+
"step": 17400
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"epoch": 10.251903925014645,
|
| 1385 |
+
"grad_norm": 9.839179039001465,
|
| 1386 |
+
"learning_rate": 3.232822146982673e-05,
|
| 1387 |
+
"loss": 0.8697,
|
| 1388 |
+
"step": 17500
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"epoch": 10.310486233157587,
|
| 1392 |
+
"grad_norm": 7.640855312347412,
|
| 1393 |
+
"learning_rate": 3.1929894443337984e-05,
|
| 1394 |
+
"loss": 0.934,
|
| 1395 |
+
"step": 17600
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 10.369068541300527,
|
| 1399 |
+
"grad_norm": 9.590755462646484,
|
| 1400 |
+
"learning_rate": 3.153156741684923e-05,
|
| 1401 |
+
"loss": 0.8328,
|
| 1402 |
+
"step": 17700
|
| 1403 |
+
},
|
| 1404 |
+
{
|
| 1405 |
+
"epoch": 10.427650849443468,
|
| 1406 |
+
"grad_norm": 6.947925567626953,
|
| 1407 |
+
"learning_rate": 3.1133240390360485e-05,
|
| 1408 |
+
"loss": 0.8759,
|
| 1409 |
+
"step": 17800
|
| 1410 |
+
},
|
| 1411 |
+
{
|
| 1412 |
+
"epoch": 10.486233157586408,
|
| 1413 |
+
"grad_norm": 10.188766479492188,
|
| 1414 |
+
"learning_rate": 3.0734913363871746e-05,
|
| 1415 |
+
"loss": 0.9053,
|
| 1416 |
+
"step": 17900
|
| 1417 |
+
},
|
| 1418 |
+
{
|
| 1419 |
+
"epoch": 10.54481546572935,
|
| 1420 |
+
"grad_norm": 8.809179306030273,
|
| 1421 |
+
"learning_rate": 3.0336586337382993e-05,
|
| 1422 |
+
"loss": 0.8203,
|
| 1423 |
+
"step": 18000
|
| 1424 |
+
},
|
| 1425 |
+
{
|
| 1426 |
+
"epoch": 10.54481546572935,
|
| 1427 |
+
"eval_loss": 0.08442338556051254,
|
| 1428 |
+
"eval_runtime": 157.987,
|
| 1429 |
+
"eval_samples_per_second": 3.165,
|
| 1430 |
+
"eval_steps_per_second": 0.399,
|
| 1431 |
+
"eval_wer": 0.1865605658709107,
|
| 1432 |
+
"step": 18000
|
| 1433 |
}
|
| 1434 |
],
|
| 1435 |
"logging_steps": 100,
|
| 1436 |
+
"max_steps": 25605,
|
| 1437 |
"num_input_tokens_seen": 0,
|
| 1438 |
+
"num_train_epochs": 15,
|
| 1439 |
"save_steps": 1000,
|
| 1440 |
"stateful_callbacks": {
|
| 1441 |
"TrainerControl": {
|
|
|
|
| 1444 |
"should_evaluate": false,
|
| 1445 |
"should_log": false,
|
| 1446 |
"should_save": true,
|
| 1447 |
+
"should_training_stop": false
|
| 1448 |
},
|
| 1449 |
"attributes": {}
|
| 1450 |
}
|
| 1451 |
},
|
| 1452 |
+
"total_flos": 2.34131599392768e+19,
|
| 1453 |
"train_batch_size": 8,
|
| 1454 |
"trial_name": null,
|
| 1455 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bddd4569a5055d59a70490c9da64158bf2e341f257865204c3154f99021841bd
|
| 3 |
size 5496
|