| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.981132075471699, | |
| "global_step": 390, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1e-08, | |
| "loss": 8.1684, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 8.85881519317627, | |
| "eval_runtime": 219.4921, | |
| "eval_samples_per_second": 19.481, | |
| "eval_steps_per_second": 9.741, | |
| "eval_wer": 1.0125384665750017, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.736842105263159e-09, | |
| "loss": 8.1428, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 8.856884956359863, | |
| "eval_runtime": 224.2813, | |
| "eval_samples_per_second": 19.065, | |
| "eval_steps_per_second": 9.533, | |
| "eval_wer": 1.012505729064362, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.473684210526316e-09, | |
| "loss": 8.1333, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 8.855155944824219, | |
| "eval_runtime": 226.0385, | |
| "eval_samples_per_second": 18.917, | |
| "eval_steps_per_second": 9.459, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 9.210526315789473e-09, | |
| "loss": 8.7873, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 8.85318660736084, | |
| "eval_runtime": 220.4335, | |
| "eval_samples_per_second": 19.398, | |
| "eval_steps_per_second": 9.699, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 8.947368421052632e-09, | |
| "loss": 8.1298, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 8.851649284362793, | |
| "eval_runtime": 224.7965, | |
| "eval_samples_per_second": 19.022, | |
| "eval_steps_per_second": 9.511, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 8.68421052631579e-09, | |
| "loss": 8.1445, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 8.84989070892334, | |
| "eval_runtime": 220.475, | |
| "eval_samples_per_second": 19.394, | |
| "eval_steps_per_second": 9.697, | |
| "eval_wer": 1.0123420415111635, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 8.421052631578947e-09, | |
| "loss": 8.1635, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 8.8483304977417, | |
| "eval_runtime": 222.7151, | |
| "eval_samples_per_second": 19.199, | |
| "eval_steps_per_second": 9.6, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 8.157894736842106e-09, | |
| "loss": 8.7587, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 8.846796989440918, | |
| "eval_runtime": 221.1122, | |
| "eval_samples_per_second": 19.339, | |
| "eval_steps_per_second": 9.669, | |
| "eval_wer": 1.0125384665750017, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.894736842105263e-09, | |
| "loss": 8.1424, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 8.845438957214355, | |
| "eval_runtime": 219.7737, | |
| "eval_samples_per_second": 19.456, | |
| "eval_steps_per_second": 9.728, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.631578947368422e-09, | |
| "loss": 8.1318, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 8.844048500061035, | |
| "eval_runtime": 222.4326, | |
| "eval_samples_per_second": 19.224, | |
| "eval_steps_per_second": 9.612, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 7.368421052631579e-09, | |
| "loss": 8.1469, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 8.842790603637695, | |
| "eval_runtime": 224.1044, | |
| "eval_samples_per_second": 19.08, | |
| "eval_steps_per_second": 9.54, | |
| "eval_wer": 1.012505729064362, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 7.105263157894737e-09, | |
| "loss": 8.7602, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_loss": 8.841601371765137, | |
| "eval_runtime": 222.7003, | |
| "eval_samples_per_second": 19.201, | |
| "eval_steps_per_second": 9.6, | |
| "eval_wer": 1.0124729915537223, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 6.842105263157895e-09, | |
| "loss": 8.1584, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "eval_loss": 8.840473175048828, | |
| "eval_runtime": 220.9442, | |
| "eval_samples_per_second": 19.353, | |
| "eval_steps_per_second": 9.677, | |
| "eval_wer": 1.0125712040856414, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 6.578947368421054e-09, | |
| "loss": 8.142, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "eval_loss": 8.839417457580566, | |
| "eval_runtime": 223.4762, | |
| "eval_samples_per_second": 19.134, | |
| "eval_steps_per_second": 9.567, | |
| "eval_wer": 1.0125712040856414, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 6.31578947368421e-09, | |
| "loss": 8.1285, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "eval_loss": 8.838351249694824, | |
| "eval_runtime": 225.8637, | |
| "eval_samples_per_second": 18.932, | |
| "eval_steps_per_second": 9.466, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 6.052631578947369e-09, | |
| "loss": 8.7756, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "eval_loss": 8.837142944335938, | |
| "eval_runtime": 224.8527, | |
| "eval_samples_per_second": 19.017, | |
| "eval_steps_per_second": 9.508, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 5.789473684210527e-09, | |
| "loss": 8.0991, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "eval_loss": 8.83634090423584, | |
| "eval_runtime": 220.3123, | |
| "eval_samples_per_second": 19.409, | |
| "eval_steps_per_second": 9.704, | |
| "eval_wer": 1.0124729915537223, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 5.526315789473685e-09, | |
| "loss": 8.1442, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "eval_loss": 8.83536434173584, | |
| "eval_runtime": 224.5432, | |
| "eval_samples_per_second": 19.043, | |
| "eval_steps_per_second": 9.522, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 5.263157894736842e-09, | |
| "loss": 8.1294, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_loss": 8.834578514099121, | |
| "eval_runtime": 220.0402, | |
| "eval_samples_per_second": 19.433, | |
| "eval_steps_per_second": 9.716, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 5e-09, | |
| "loss": 8.7276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_loss": 8.833772659301758, | |
| "eval_runtime": 224.0823, | |
| "eval_samples_per_second": 19.082, | |
| "eval_steps_per_second": 9.541, | |
| "eval_wer": 1.0125384665750017, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.736842105263158e-09, | |
| "loss": 8.1439, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "eval_loss": 8.832892417907715, | |
| "eval_runtime": 220.6908, | |
| "eval_samples_per_second": 19.376, | |
| "eval_steps_per_second": 9.688, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 4.473684210526316e-09, | |
| "loss": 8.1115, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "eval_loss": 8.832157135009766, | |
| "eval_runtime": 221.8649, | |
| "eval_samples_per_second": 19.273, | |
| "eval_steps_per_second": 9.636, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 4.210526315789473e-09, | |
| "loss": 8.1501, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "eval_loss": 8.831602096557617, | |
| "eval_runtime": 223.55, | |
| "eval_samples_per_second": 19.128, | |
| "eval_steps_per_second": 9.564, | |
| "eval_wer": 1.0125384665750017, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 3.947368421052631e-09, | |
| "loss": 8.7143, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "eval_loss": 8.830825805664062, | |
| "eval_runtime": 224.3279, | |
| "eval_samples_per_second": 19.061, | |
| "eval_steps_per_second": 9.531, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 3.6842105263157894e-09, | |
| "loss": 8.143, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_loss": 8.830228805541992, | |
| "eval_runtime": 225.4738, | |
| "eval_samples_per_second": 18.965, | |
| "eval_steps_per_second": 9.482, | |
| "eval_wer": 1.0123747790218032, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 3.4210526315789474e-09, | |
| "loss": 8.1528, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "eval_loss": 8.829960823059082, | |
| "eval_runtime": 222.8802, | |
| "eval_samples_per_second": 19.185, | |
| "eval_steps_per_second": 9.593, | |
| "eval_wer": 1.0124729915537223, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 3.1842105263157894e-09, | |
| "loss": 8.1293, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "eval_loss": 8.829716682434082, | |
| "eval_runtime": 223.7307, | |
| "eval_samples_per_second": 19.112, | |
| "eval_steps_per_second": 9.556, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 2.9210526315789475e-09, | |
| "loss": 8.7519, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "eval_loss": 8.829301834106445, | |
| "eval_runtime": 223.0404, | |
| "eval_samples_per_second": 19.171, | |
| "eval_steps_per_second": 9.586, | |
| "eval_wer": 1.0124729915537223, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 2.657894736842105e-09, | |
| "loss": 8.1153, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "eval_loss": 8.828947067260742, | |
| "eval_runtime": 219.8129, | |
| "eval_samples_per_second": 19.453, | |
| "eval_steps_per_second": 9.726, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 2.394736842105263e-09, | |
| "loss": 8.1292, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "eval_loss": 8.828753471374512, | |
| "eval_runtime": 222.9513, | |
| "eval_samples_per_second": 19.179, | |
| "eval_steps_per_second": 9.59, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.131578947368421e-09, | |
| "loss": 8.0904, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "eval_loss": 8.828449249267578, | |
| "eval_runtime": 224.0134, | |
| "eval_samples_per_second": 19.088, | |
| "eval_steps_per_second": 9.544, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 1.868421052631579e-09, | |
| "loss": 8.7425, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "eval_loss": 8.828290939331055, | |
| "eval_runtime": 219.9475, | |
| "eval_samples_per_second": 19.441, | |
| "eval_steps_per_second": 9.721, | |
| "eval_wer": 1.0125384665750017, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 1.605263157894737e-09, | |
| "loss": 8.0963, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "eval_loss": 8.828081130981445, | |
| "eval_runtime": 222.5212, | |
| "eval_samples_per_second": 19.216, | |
| "eval_steps_per_second": 9.608, | |
| "eval_wer": 1.0124075165324429, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.3421052631578948e-09, | |
| "loss": 8.1112, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_loss": 8.828051567077637, | |
| "eval_runtime": 222.696, | |
| "eval_samples_per_second": 19.201, | |
| "eval_steps_per_second": 9.601, | |
| "eval_wer": 1.0124402540430826, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 1.0789473684210528e-09, | |
| "loss": 8.124, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "eval_loss": 8.828123092651367, | |
| "eval_runtime": 222.2122, | |
| "eval_samples_per_second": 19.243, | |
| "eval_steps_per_second": 9.621, | |
| "eval_wer": 1.012505729064362, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 8.157894736842106e-10, | |
| "loss": 8.7327, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "eval_loss": 8.827865600585938, | |
| "eval_runtime": 222.5189, | |
| "eval_samples_per_second": 19.216, | |
| "eval_steps_per_second": 9.608, | |
| "eval_wer": 1.0123420415111635, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 5.526315789473684e-10, | |
| "loss": 8.1261, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "eval_loss": 8.827857971191406, | |
| "eval_runtime": 224.6034, | |
| "eval_samples_per_second": 19.038, | |
| "eval_steps_per_second": 9.519, | |
| "eval_wer": 1.012603941596281, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 2.894736842105263e-10, | |
| "loss": 8.1259, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "eval_loss": 8.827925682067871, | |
| "eval_runtime": 223.0189, | |
| "eval_samples_per_second": 19.173, | |
| "eval_steps_per_second": 9.587, | |
| "eval_wer": 1.0123747790218032, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 2.631578947368421e-11, | |
| "loss": 8.1116, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_loss": 8.827937126159668, | |
| "eval_runtime": 224.7494, | |
| "eval_samples_per_second": 19.026, | |
| "eval_steps_per_second": 9.513, | |
| "eval_wer": 1.0123093040005238, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "step": 390, | |
| "total_flos": 1.7181016563618468e+19, | |
| "train_loss": 8.274780469063002, | |
| "train_runtime": 14935.5952, | |
| "train_samples_per_second": 6.813, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "max_steps": 390, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.7181016563618468e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |