| { | |
| "best_metric": 0.12623194275685162, | |
| "best_model_checkpoint": "./whisper-base-ckb/checkpoint-2300", | |
| "epoch": 25.0, | |
| "eval_steps": 100, | |
| "global_step": 2300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.0735116692035353e-06, | |
| "loss": 3.7642, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.841064020920768e-06, | |
| "loss": 2.8375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 5.9179056274086315e-06, | |
| "loss": 1.8942, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.599279204266558e-06, | |
| "loss": 1.2854, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 7.098857723096461e-06, | |
| "loss": 0.9078, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 7.493495838168052e-06, | |
| "loss": 0.6876, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.819717921297098e-06, | |
| "loss": 0.5329, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.097777358972581e-06, | |
| "loss": 0.4405, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.340082901971508e-06, | |
| "loss": 0.3864, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 8.55479047323724e-06, | |
| "loss": 0.3434, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.384033203125, | |
| "eval_runtime": 114.2378, | |
| "eval_samples_per_second": 43.243, | |
| "eval_steps_per_second": 0.061, | |
| "eval_wer": 0.6053732955312542, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.747548830298285e-06, | |
| "loss": 0.3216, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 8.92243233400783e-06, | |
| "loss": 0.3038, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.082476609942404e-06, | |
| "loss": 0.2841, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 9.230004150181585e-06, | |
| "loss": 0.2701, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.36683155598379e-06, | |
| "loss": 0.255, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 9.494406569816089e-06, | |
| "loss": 0.2549, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 9.613901637842896e-06, | |
| "loss": 0.2433, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 9.726279567611587e-06, | |
| "loss": 0.2373, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 9.832340708264348e-06, | |
| "loss": 0.2232, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.93275756211191e-06, | |
| "loss": 0.2089, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.265380859375, | |
| "eval_runtime": 112.176, | |
| "eval_samples_per_second": 44.038, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.4739773187525314, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 9.975000000000002e-06, | |
| "loss": 0.2122, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.85e-06, | |
| "loss": 0.2123, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.725000000000001e-06, | |
| "loss": 0.2044, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.1994, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.475000000000002e-06, | |
| "loss": 0.1911, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 9.350000000000002e-06, | |
| "loss": 0.1927, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 9.225e-06, | |
| "loss": 0.1913, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 9.100000000000001e-06, | |
| "loss": 0.1812, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 8.975e-06, | |
| "loss": 0.1715, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 8.85e-06, | |
| "loss": 0.167, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_loss": 0.224609375, | |
| "eval_runtime": 112.6918, | |
| "eval_samples_per_second": 43.836, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.41899554475496154, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 8.725000000000002e-06, | |
| "loss": 0.1683, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 8.6e-06, | |
| "loss": 0.1728, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 8.475000000000001e-06, | |
| "loss": 0.1692, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 8.35e-06, | |
| "loss": 0.1654, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 8.225e-06, | |
| "loss": 0.1622, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 8.1e-06, | |
| "loss": 0.1597, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 7.975e-06, | |
| "loss": 0.1626, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 7.850000000000001e-06, | |
| "loss": 0.151, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 7.725e-06, | |
| "loss": 0.1481, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.1452, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "eval_loss": 0.1964111328125, | |
| "eval_runtime": 112.4796, | |
| "eval_samples_per_second": 43.919, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.38034966923180774, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 7.475000000000001e-06, | |
| "loss": 0.1437, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 7.350000000000001e-06, | |
| "loss": 0.1477, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 7.225000000000001e-06, | |
| "loss": 0.146, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 7.100000000000001e-06, | |
| "loss": 0.1432, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 6.975000000000001e-06, | |
| "loss": 0.1383, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 6.850000000000001e-06, | |
| "loss": 0.1389, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 6.725000000000001e-06, | |
| "loss": 0.1416, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 0.1356, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 6.475e-06, | |
| "loss": 0.1302, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 6.35e-06, | |
| "loss": 0.1287, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "eval_loss": 0.1788330078125, | |
| "eval_runtime": 133.6018, | |
| "eval_samples_per_second": 36.976, | |
| "eval_steps_per_second": 0.052, | |
| "eval_wer": 0.35419198055893075, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 6.225000000000001e-06, | |
| "loss": 0.1217, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 6.1e-06, | |
| "loss": 0.1332, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 5.975e-06, | |
| "loss": 0.1307, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 5.85e-06, | |
| "loss": 0.1281, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 5.725e-06, | |
| "loss": 0.1257, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.1176, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 5.475e-06, | |
| "loss": 0.1249, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 5.3500000000000004e-06, | |
| "loss": 0.1214, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 5.225e-06, | |
| "loss": 0.1166, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5.1e-06, | |
| "loss": 0.1163, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "eval_loss": 0.1650390625, | |
| "eval_runtime": 120.7458, | |
| "eval_samples_per_second": 40.912, | |
| "eval_steps_per_second": 0.058, | |
| "eval_wer": 0.33255704063723507, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 4.975000000000001e-06, | |
| "loss": 0.1139, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 4.85e-06, | |
| "loss": 0.1216, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 4.7250000000000005e-06, | |
| "loss": 0.1189, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.1167, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 4.475e-06, | |
| "loss": 0.1132, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 4.350000000000001e-06, | |
| "loss": 0.1046, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 4.225e-06, | |
| "loss": 0.111, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 4.1e-06, | |
| "loss": 0.115, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 3.975000000000001e-06, | |
| "loss": 0.1097, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 3.85e-06, | |
| "loss": 0.1068, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "eval_loss": 0.156005859375, | |
| "eval_runtime": 128.5945, | |
| "eval_samples_per_second": 38.415, | |
| "eval_steps_per_second": 0.054, | |
| "eval_wer": 0.31554610503577696, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 3.7250000000000003e-06, | |
| "loss": 0.1061, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.1112, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 3.475e-06, | |
| "loss": 0.1128, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 3.3500000000000005e-06, | |
| "loss": 0.107, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 3.2250000000000005e-06, | |
| "loss": 0.1022, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 3.1000000000000004e-06, | |
| "loss": 0.0977, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 2.9750000000000003e-06, | |
| "loss": 0.1016, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.1089, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 2.7250000000000006e-06, | |
| "loss": 0.105, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.1015, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_loss": 0.14892578125, | |
| "eval_runtime": 117.3243, | |
| "eval_samples_per_second": 42.106, | |
| "eval_steps_per_second": 0.06, | |
| "eval_wer": 0.30592682597542864, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 2.475e-06, | |
| "loss": 0.0977, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 2.35e-06, | |
| "loss": 0.1023, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 2.2250000000000003e-06, | |
| "loss": 0.1064, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 0.0964, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 1.975e-06, | |
| "loss": 0.0973, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 1.85e-06, | |
| "loss": 0.0946, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 1.725e-06, | |
| "loss": 0.0965, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.1048, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 1.475e-06, | |
| "loss": 0.0982, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 1.3500000000000002e-06, | |
| "loss": 0.0968, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "eval_loss": 0.14404296875, | |
| "eval_runtime": 113.4503, | |
| "eval_samples_per_second": 43.543, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.2953962467935736, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 1.2250000000000001e-06, | |
| "loss": 0.0951, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.1e-06, | |
| "loss": 0.0937, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 9.750000000000002e-07, | |
| "loss": 0.1012, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 8.500000000000001e-07, | |
| "loss": 0.0963, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 7.25e-07, | |
| "loss": 0.0941, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.0922, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 4.7500000000000006e-07, | |
| "loss": 0.0897, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 3.5000000000000004e-07, | |
| "loss": 0.1039, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 2.2500000000000002e-07, | |
| "loss": 0.0978, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 1.0000000000000001e-07, | |
| "loss": 0.0939, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "eval_loss": 0.1419677734375, | |
| "eval_runtime": 116.6648, | |
| "eval_samples_per_second": 42.344, | |
| "eval_steps_per_second": 0.06, | |
| "eval_wer": 0.2917510463075469, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 7.889473684210527e-06, | |
| "loss": 0.0921, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 7.863157894736842e-06, | |
| "loss": 0.0904, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 7.836842105263159e-06, | |
| "loss": 0.0983, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 7.810526315789474e-06, | |
| "loss": 0.0976, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "learning_rate": 7.78421052631579e-06, | |
| "loss": 0.094, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 7.757894736842105e-06, | |
| "loss": 0.0908, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 7.731578947368422e-06, | |
| "loss": 0.0882, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 7.705263157894738e-06, | |
| "loss": 0.096, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "learning_rate": 7.678947368421053e-06, | |
| "loss": 0.0949, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 7.65263157894737e-06, | |
| "loss": 0.0919, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "eval_loss": 0.1314697265625, | |
| "eval_runtime": 111.7237, | |
| "eval_samples_per_second": 44.216, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.27420008100445525, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 7.626315789473685e-06, | |
| "loss": 0.0866, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.0797, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 7.573684210526317e-06, | |
| "loss": 0.0859, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 7.547368421052632e-06, | |
| "loss": 0.0869, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 7.5210526315789475e-06, | |
| "loss": 0.0843, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 7.494736842105263e-06, | |
| "loss": 0.083, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "learning_rate": 7.468421052631579e-06, | |
| "loss": 0.0784, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 7.442105263157895e-06, | |
| "loss": 0.0853, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "learning_rate": 7.415789473684211e-06, | |
| "loss": 0.0861, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 7.3894736842105275e-06, | |
| "loss": 0.0839, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "eval_loss": 0.1217041015625, | |
| "eval_runtime": 110.1755, | |
| "eval_samples_per_second": 44.838, | |
| "eval_steps_per_second": 0.064, | |
| "eval_wer": 0.2596867827730525, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "learning_rate": 7.363157894736843e-06, | |
| "loss": 0.0759, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 7.336842105263159e-06, | |
| "loss": 0.0733, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 7.310526315789475e-06, | |
| "loss": 0.0768, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 7.28421052631579e-06, | |
| "loss": 0.0834, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 7.257894736842106e-06, | |
| "loss": 0.077, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "learning_rate": 7.2315789473684215e-06, | |
| "loss": 0.0738, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 7.205263157894737e-06, | |
| "loss": 0.0723, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 7.178947368421053e-06, | |
| "loss": 0.0752, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 7.152631578947369e-06, | |
| "loss": 0.0794, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "learning_rate": 7.126315789473685e-06, | |
| "loss": 0.0713, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "eval_loss": 0.11322021484375, | |
| "eval_runtime": 110.9942, | |
| "eval_samples_per_second": 44.507, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.23710679087349804, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "learning_rate": 7.100000000000001e-06, | |
| "loss": 0.0703, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 7.073684210526316e-06, | |
| "loss": 0.0662, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 7.047368421052631e-06, | |
| "loss": 0.0686, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 14.57, | |
| "learning_rate": 7.021052631578948e-06, | |
| "loss": 0.0771, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 6.994736842105264e-06, | |
| "loss": 0.0717, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 6.96842105263158e-06, | |
| "loss": 0.071, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "learning_rate": 6.9421052631578955e-06, | |
| "loss": 0.0674, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 6.915789473684211e-06, | |
| "loss": 0.0694, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "learning_rate": 6.889473684210527e-06, | |
| "loss": 0.0732, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 6.863157894736843e-06, | |
| "loss": 0.0687, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "eval_loss": 0.109130859375, | |
| "eval_runtime": 110.2423, | |
| "eval_samples_per_second": 44.81, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.23717429458620223, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 6.836842105263158e-06, | |
| "loss": 0.0635, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "learning_rate": 6.810526315789474e-06, | |
| "loss": 0.0644, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 15.54, | |
| "learning_rate": 6.78421052631579e-06, | |
| "loss": 0.0583, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 6.7578947368421054e-06, | |
| "loss": 0.0743, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "learning_rate": 6.731578947368421e-06, | |
| "loss": 0.069, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 15.87, | |
| "learning_rate": 6.705263157894737e-06, | |
| "loss": 0.0659, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 6.678947368421053e-06, | |
| "loss": 0.0637, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 6.6526315789473695e-06, | |
| "loss": 0.0586, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 6.626315789473685e-06, | |
| "loss": 0.0672, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 0.0647, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "eval_loss": 0.1021728515625, | |
| "eval_runtime": 111.1773, | |
| "eval_samples_per_second": 44.434, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.21726069933846362, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 16.41, | |
| "learning_rate": 6.573684210526316e-06, | |
| "loss": 0.0602, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 6.547368421052632e-06, | |
| "loss": 0.0586, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "learning_rate": 6.521052631578948e-06, | |
| "loss": 0.0556, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 6.494736842105264e-06, | |
| "loss": 0.0647, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 16.85, | |
| "learning_rate": 6.4684210526315794e-06, | |
| "loss": 0.064, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 6.442105263157895e-06, | |
| "loss": 0.0623, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 17.07, | |
| "learning_rate": 6.415789473684211e-06, | |
| "loss": 0.0551, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 6.389473684210527e-06, | |
| "loss": 0.053, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 6.363157894736842e-06, | |
| "loss": 0.0598, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 6.336842105263158e-06, | |
| "loss": 0.059, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "eval_loss": 0.09674072265625, | |
| "eval_runtime": 110.518, | |
| "eval_samples_per_second": 44.699, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.20433373835560956, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 6.310526315789474e-06, | |
| "loss": 0.0564, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 6.28421052631579e-06, | |
| "loss": 0.0536, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "learning_rate": 6.257894736842106e-06, | |
| "loss": 0.0523, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 6.231578947368422e-06, | |
| "loss": 0.0583, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 17.93, | |
| "learning_rate": 6.205263157894738e-06, | |
| "loss": 0.058, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "learning_rate": 6.1789473684210534e-06, | |
| "loss": 0.0549, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 6.152631578947369e-06, | |
| "loss": 0.0504, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "learning_rate": 6.126315789473685e-06, | |
| "loss": 0.0479, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 18.37, | |
| "learning_rate": 6.1e-06, | |
| "loss": 0.0516, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "learning_rate": 6.073684210526316e-06, | |
| "loss": 0.0539, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "eval_loss": 0.0897216796875, | |
| "eval_runtime": 110.8559, | |
| "eval_samples_per_second": 44.562, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.19289185905224787, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "learning_rate": 6.047368421052632e-06, | |
| "loss": 0.0525, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 6.0210526315789475e-06, | |
| "loss": 0.0519, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "learning_rate": 5.994736842105263e-06, | |
| "loss": 0.0463, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 18.91, | |
| "learning_rate": 5.968421052631579e-06, | |
| "loss": 0.054, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "learning_rate": 5.942105263157896e-06, | |
| "loss": 0.0552, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 5.915789473684212e-06, | |
| "loss": 0.0482, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 19.24, | |
| "learning_rate": 5.8894736842105274e-06, | |
| "loss": 0.0464, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 5.863157894736842e-06, | |
| "loss": 0.0433, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "learning_rate": 5.836842105263158e-06, | |
| "loss": 0.0456, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "learning_rate": 5.810526315789474e-06, | |
| "loss": 0.0518, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "eval_loss": 0.08270263671875, | |
| "eval_runtime": 114.8201, | |
| "eval_samples_per_second": 43.024, | |
| "eval_steps_per_second": 0.061, | |
| "eval_wer": 0.17183070068853787, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 19.67, | |
| "learning_rate": 5.78421052631579e-06, | |
| "loss": 0.048, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 5.757894736842106e-06, | |
| "loss": 0.0453, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "learning_rate": 5.7315789473684215e-06, | |
| "loss": 0.0438, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 5.705263157894737e-06, | |
| "loss": 0.0445, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "learning_rate": 5.678947368421053e-06, | |
| "loss": 0.048, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 20.22, | |
| "learning_rate": 5.652631578947368e-06, | |
| "loss": 0.0443, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "learning_rate": 5.626315789473684e-06, | |
| "loss": 0.0435, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 20.43, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.0408, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "learning_rate": 5.573684210526316e-06, | |
| "loss": 0.0382, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 5.547368421052632e-06, | |
| "loss": 0.0495, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "eval_loss": 0.07867431640625, | |
| "eval_runtime": 110.7928, | |
| "eval_samples_per_second": 44.588, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.16673417037937086, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 20.76, | |
| "learning_rate": 5.521052631578948e-06, | |
| "loss": 0.0451, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "learning_rate": 5.494736842105264e-06, | |
| "loss": 0.042, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 5.46842105263158e-06, | |
| "loss": 0.0413, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 21.09, | |
| "learning_rate": 5.4421052631578955e-06, | |
| "loss": 0.0374, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "learning_rate": 5.415789473684211e-06, | |
| "loss": 0.0445, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "learning_rate": 5.389473684210526e-06, | |
| "loss": 0.0406, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "learning_rate": 5.363157894736842e-06, | |
| "loss": 0.0389, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 21.52, | |
| "learning_rate": 5.336842105263158e-06, | |
| "loss": 0.0373, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 21.63, | |
| "learning_rate": 5.310526315789474e-06, | |
| "loss": 0.035, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "learning_rate": 5.2842105263157896e-06, | |
| "loss": 0.0444, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "eval_loss": 0.07183837890625, | |
| "eval_runtime": 112.6262, | |
| "eval_samples_per_second": 43.862, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.14692183070068854, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 21.85, | |
| "learning_rate": 5.257894736842105e-06, | |
| "loss": 0.04, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "learning_rate": 5.231578947368422e-06, | |
| "loss": 0.0391, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 22.07, | |
| "learning_rate": 5.205263157894738e-06, | |
| "loss": 0.037, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 22.17, | |
| "learning_rate": 5.178947368421054e-06, | |
| "loss": 0.0332, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 22.28, | |
| "learning_rate": 5.1526315789473695e-06, | |
| "loss": 0.0385, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "learning_rate": 5.1263157894736845e-06, | |
| "loss": 0.0377, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 5.1e-06, | |
| "loss": 0.0353, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 5.073684210526316e-06, | |
| "loss": 0.0338, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "learning_rate": 5.047368421052632e-06, | |
| "loss": 0.0327, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 22.83, | |
| "learning_rate": 5.021052631578948e-06, | |
| "loss": 0.0392, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 22.83, | |
| "eval_loss": 0.067138671875, | |
| "eval_runtime": 111.3072, | |
| "eval_samples_per_second": 44.382, | |
| "eval_steps_per_second": 0.063, | |
| "eval_wer": 0.13683002565141084, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 22.93, | |
| "learning_rate": 9.42857142857143e-07, | |
| "loss": 0.0362, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "learning_rate": 8.952380952380953e-07, | |
| "loss": 0.032, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 23.15, | |
| "learning_rate": 8.476190476190477e-07, | |
| "loss": 0.0309, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.0296, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 23.37, | |
| "learning_rate": 7.523809523809525e-07, | |
| "loss": 0.0338, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 7.047619047619048e-07, | |
| "loss": 0.035, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 23.59, | |
| "learning_rate": 6.571428571428571e-07, | |
| "loss": 0.0312, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "learning_rate": 6.095238095238095e-07, | |
| "loss": 0.0309, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "learning_rate": 5.619047619047619e-07, | |
| "loss": 0.0292, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 23.91, | |
| "learning_rate": 5.142857142857143e-07, | |
| "loss": 0.0335, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 23.91, | |
| "eval_loss": 0.064453125, | |
| "eval_runtime": 113.7883, | |
| "eval_samples_per_second": 43.414, | |
| "eval_steps_per_second": 0.062, | |
| "eval_wer": 0.12626569461320372, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 4.666666666666667e-07, | |
| "loss": 0.0349, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "learning_rate": 4.1904761904761906e-07, | |
| "loss": 0.0327, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 24.24, | |
| "learning_rate": 3.7142857142857145e-07, | |
| "loss": 0.0317, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 3.238095238095238e-07, | |
| "loss": 0.0298, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 24.46, | |
| "learning_rate": 2.7619047619047624e-07, | |
| "loss": 0.0302, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 24.57, | |
| "learning_rate": 2.285714285714286e-07, | |
| "loss": 0.0355, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "learning_rate": 1.8095238095238097e-07, | |
| "loss": 0.0309, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 24.78, | |
| "learning_rate": 1.3333333333333336e-07, | |
| "loss": 0.0299, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "learning_rate": 8.571428571428573e-08, | |
| "loss": 0.028, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 3.8095238095238096e-08, | |
| "loss": 0.0292, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.0640869140625, | |
| "eval_runtime": 116.9844, | |
| "eval_samples_per_second": 42.228, | |
| "eval_steps_per_second": 0.06, | |
| "eval_wer": 0.12623194275685162, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "step": 2300, | |
| "total_flos": 1.7185304344854004e+20, | |
| "train_loss": 0.0027616678631823995, | |
| "train_runtime": 6144.0313, | |
| "train_samples_per_second": 431.248, | |
| "train_steps_per_second": 0.374 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 100, | |
| "total_flos": 1.7185304344854004e+20, | |
| "train_batch_size": 192, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |