| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.272838002436053, |
| "eval_steps": 100, |
| "global_step": 3400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.243605359317905, |
| "grad_norm": 1.117086410522461, |
| "learning_rate": 0.0005595762717789524, |
| "loss": 2.3476, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.243605359317905, |
| "eval_loss": 0.41345393657684326, |
| "eval_runtime": 207.4783, |
| "eval_samples_per_second": 7.938, |
| "eval_steps_per_second": 0.993, |
| "eval_wer": 0.3729947315456106, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48721071863581, |
| "grad_norm": 1.6088380813598633, |
| "learning_rate": 0.0005480076079009505, |
| "loss": 0.4631, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.48721071863581, |
| "eval_loss": 0.3957911729812622, |
| "eval_runtime": 207.5213, |
| "eval_samples_per_second": 7.937, |
| "eval_steps_per_second": 0.993, |
| "eval_wer": 0.3639969218019298, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.730816077953715, |
| "grad_norm": 1.3071035146713257, |
| "learning_rate": 0.0005364389440229487, |
| "loss": 0.4441, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.730816077953715, |
| "eval_loss": 0.3825915455818176, |
| "eval_runtime": 207.3617, |
| "eval_samples_per_second": 7.943, |
| "eval_steps_per_second": 0.993, |
| "eval_wer": 0.34925708873497896, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.97442143727162, |
| "grad_norm": 1.313904047012329, |
| "learning_rate": 0.0005249859667837267, |
| "loss": 0.4367, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.97442143727162, |
| "eval_loss": 0.3784385323524475, |
| "eval_runtime": 207.3709, |
| "eval_samples_per_second": 7.942, |
| "eval_steps_per_second": 0.993, |
| "eval_wer": 0.35215769845498135, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2168087697929355, |
| "grad_norm": 0.9488235116004944, |
| "learning_rate": 0.0005134173029057248, |
| "loss": 0.424, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2168087697929355, |
| "eval_loss": 0.3734341263771057, |
| "eval_runtime": 209.8871, |
| "eval_samples_per_second": 7.847, |
| "eval_steps_per_second": 0.981, |
| "eval_wer": 0.3468300479488546, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4604141291108403, |
| "grad_norm": 0.875639021396637, |
| "learning_rate": 0.0005018486390277228, |
| "loss": 0.4256, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4604141291108403, |
| "eval_loss": 0.37602460384368896, |
| "eval_runtime": 207.528, |
| "eval_samples_per_second": 7.936, |
| "eval_steps_per_second": 0.993, |
| "eval_wer": 0.3515657372876339, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7040194884287454, |
| "grad_norm": 0.7033505439758301, |
| "learning_rate": 0.000490395661788501, |
| "loss": 0.4189, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7040194884287454, |
| "eval_loss": 0.3739420473575592, |
| "eval_runtime": 208.7563, |
| "eval_samples_per_second": 7.89, |
| "eval_steps_per_second": 0.987, |
| "eval_wer": 0.34742200911620197, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9476248477466505, |
| "grad_norm": 0.8537428975105286, |
| "learning_rate": 0.00047882699791049904, |
| "loss": 0.4293, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9476248477466505, |
| "eval_loss": 0.3677741289138794, |
| "eval_runtime": 209.3774, |
| "eval_samples_per_second": 7.866, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.34481737997987333, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.1900121802679657, |
| "grad_norm": 0.7106778621673584, |
| "learning_rate": 0.00046725833403249716, |
| "loss": 0.4105, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.1900121802679657, |
| "eval_loss": 0.36625751852989197, |
| "eval_runtime": 208.5161, |
| "eval_samples_per_second": 7.899, |
| "eval_steps_per_second": 0.988, |
| "eval_wer": 0.3465932634819156, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.433617539585871, |
| "grad_norm": 0.9586976766586304, |
| "learning_rate": 0.00045568967015449527, |
| "loss": 0.3981, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.433617539585871, |
| "eval_loss": 0.37355494499206543, |
| "eval_runtime": 209.5635, |
| "eval_samples_per_second": 7.859, |
| "eval_steps_per_second": 0.983, |
| "eval_wer": 0.3441070265790564, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.677222898903776, |
| "grad_norm": 7.491183757781982, |
| "learning_rate": 0.0004441210062764934, |
| "loss": 0.4161, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.677222898903776, |
| "eval_loss": 0.3725080192089081, |
| "eval_runtime": 209.8642, |
| "eval_samples_per_second": 7.848, |
| "eval_steps_per_second": 0.982, |
| "eval_wer": 0.34866512756763157, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.9208282582216807, |
| "grad_norm": 0.9329687356948853, |
| "learning_rate": 0.0004326680290372715, |
| "loss": 0.4188, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.9208282582216807, |
| "eval_loss": 0.3627680242061615, |
| "eval_runtime": 209.391, |
| "eval_samples_per_second": 7.866, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.3433966731782395, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.1632155907429964, |
| "grad_norm": 0.7644044756889343, |
| "learning_rate": 0.00042109936515926955, |
| "loss": 0.3984, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.1632155907429964, |
| "eval_loss": 0.3671010732650757, |
| "eval_runtime": 207.9613, |
| "eval_samples_per_second": 7.92, |
| "eval_steps_per_second": 0.991, |
| "eval_wer": 0.34262712366068787, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.4068209500609012, |
| "grad_norm": 1.272712230682373, |
| "learning_rate": 0.00040953070128126767, |
| "loss": 0.4108, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.4068209500609012, |
| "eval_loss": 0.3673802316188812, |
| "eval_runtime": 208.6721, |
| "eval_samples_per_second": 7.893, |
| "eval_steps_per_second": 0.987, |
| "eval_wer": 0.3410288285088498, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.6504263093788065, |
| "grad_norm": 0.9993807673454285, |
| "learning_rate": 0.00039796203740326573, |
| "loss": 0.3905, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.6504263093788065, |
| "eval_loss": 0.3593311309814453, |
| "eval_runtime": 208.7376, |
| "eval_samples_per_second": 7.89, |
| "eval_steps_per_second": 0.987, |
| "eval_wer": 0.3387793760729296, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.8940316686967114, |
| "grad_norm": 4.2975311279296875, |
| "learning_rate": 0.0003865090601640438, |
| "loss": 0.3977, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.8940316686967114, |
| "eval_loss": 0.36261168122291565, |
| "eval_runtime": 209.8526, |
| "eval_samples_per_second": 7.848, |
| "eval_steps_per_second": 0.982, |
| "eval_wer": 0.33872017995619486, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.136419001218027, |
| "grad_norm": 0.6952757835388184, |
| "learning_rate": 0.0003749403962860419, |
| "loss": 0.3961, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.136419001218027, |
| "eval_loss": 0.36098670959472656, |
| "eval_runtime": 209.1076, |
| "eval_samples_per_second": 7.876, |
| "eval_steps_per_second": 0.985, |
| "eval_wer": 0.33220860711537326, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.3800243605359315, |
| "grad_norm": 0.6217916011810303, |
| "learning_rate": 0.00036337173240804, |
| "loss": 0.3844, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.3800243605359315, |
| "eval_loss": 0.36441686749458313, |
| "eval_runtime": 208.6619, |
| "eval_samples_per_second": 7.893, |
| "eval_steps_per_second": 0.987, |
| "eval_wer": 0.3422719469602794, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.623629719853836, |
| "grad_norm": 0.6715461611747742, |
| "learning_rate": 0.00035180306853003807, |
| "loss": 0.3938, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.623629719853836, |
| "eval_loss": 0.35540714859962463, |
| "eval_runtime": 207.8758, |
| "eval_samples_per_second": 7.923, |
| "eval_steps_per_second": 0.991, |
| "eval_wer": 0.33546439353578406, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.867235079171742, |
| "grad_norm": 4.297011852264404, |
| "learning_rate": 0.00034035009129081617, |
| "loss": 0.3808, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.867235079171742, |
| "eval_loss": 0.35785403847694397, |
| "eval_runtime": 209.5813, |
| "eval_samples_per_second": 7.859, |
| "eval_steps_per_second": 0.983, |
| "eval_wer": 0.3348724323684366, |
| "step": 2000 |
| }, |
| { |
| "epoch": 5.109622411693057, |
| "grad_norm": 1.1115533113479614, |
| "learning_rate": 0.00032878142741281434, |
| "loss": 0.3822, |
| "step": 2100 |
| }, |
| { |
| "epoch": 5.109622411693057, |
| "eval_loss": 0.35621944069862366, |
| "eval_runtime": 208.4206, |
| "eval_samples_per_second": 7.902, |
| "eval_steps_per_second": 0.988, |
| "eval_wer": 0.3330373527496596, |
| "step": 2100 |
| }, |
| { |
| "epoch": 5.353227771010962, |
| "grad_norm": 1.375293493270874, |
| "learning_rate": 0.0003172127635348124, |
| "loss": 0.3755, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.353227771010962, |
| "eval_loss": 0.3556455075740814, |
| "eval_runtime": 208.4802, |
| "eval_samples_per_second": 7.9, |
| "eval_steps_per_second": 0.988, |
| "eval_wer": 0.3307287041970047, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.596833130328867, |
| "grad_norm": 4.188701629638672, |
| "learning_rate": 0.0003056440996568105, |
| "loss": 0.3789, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.596833130328867, |
| "eval_loss": 0.3514413833618164, |
| "eval_runtime": 209.2966, |
| "eval_samples_per_second": 7.869, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.3303143313798615, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.840438489646772, |
| "grad_norm": 1.3970204591751099, |
| "learning_rate": 0.0002941911224175886, |
| "loss": 0.3742, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.840438489646772, |
| "eval_loss": 0.34722429513931274, |
| "eval_runtime": 209.3711, |
| "eval_samples_per_second": 7.866, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.33280056828272064, |
| "step": 2400 |
| }, |
| { |
| "epoch": 6.082825822168088, |
| "grad_norm": 3.3088934421539307, |
| "learning_rate": 0.0002826224585395867, |
| "loss": 0.3608, |
| "step": 2500 |
| }, |
| { |
| "epoch": 6.082825822168088, |
| "eval_loss": 0.3470153510570526, |
| "eval_runtime": 208.973, |
| "eval_samples_per_second": 7.881, |
| "eval_steps_per_second": 0.986, |
| "eval_wer": 0.32759131001006336, |
| "step": 2500 |
| }, |
| { |
| "epoch": 6.326431181485993, |
| "grad_norm": 1.3504565954208374, |
| "learning_rate": 0.0002711694813003648, |
| "loss": 0.3647, |
| "step": 2600 |
| }, |
| { |
| "epoch": 6.326431181485993, |
| "eval_loss": 0.34682515263557434, |
| "eval_runtime": 209.4124, |
| "eval_samples_per_second": 7.865, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.32954478186230984, |
| "step": 2600 |
| }, |
| { |
| "epoch": 6.570036540803898, |
| "grad_norm": 3.7289445400238037, |
| "learning_rate": 0.0002596008174223629, |
| "loss": 0.3719, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.570036540803898, |
| "eval_loss": 0.3456605076789856, |
| "eval_runtime": 210.3062, |
| "eval_samples_per_second": 7.831, |
| "eval_steps_per_second": 0.98, |
| "eval_wer": 0.3259930148582253, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.8136419001218025, |
| "grad_norm": 0.8493024706840515, |
| "learning_rate": 0.00024803215354436095, |
| "loss": 0.3678, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.8136419001218025, |
| "eval_loss": 0.3423258364200592, |
| "eval_runtime": 210.1296, |
| "eval_samples_per_second": 7.838, |
| "eval_steps_per_second": 0.98, |
| "eval_wer": 0.31794234298230034, |
| "step": 2800 |
| }, |
| { |
| "epoch": 7.056029232643118, |
| "grad_norm": 2.6314430236816406, |
| "learning_rate": 0.00023646348966635904, |
| "loss": 0.3575, |
| "step": 2900 |
| }, |
| { |
| "epoch": 7.056029232643118, |
| "eval_loss": 0.3422372043132782, |
| "eval_runtime": 209.587, |
| "eval_samples_per_second": 7.858, |
| "eval_steps_per_second": 0.983, |
| "eval_wer": 0.3201325993014858, |
| "step": 2900 |
| }, |
| { |
| "epoch": 7.2996345919610235, |
| "grad_norm": 0.7139139175415039, |
| "learning_rate": 0.00022489482578835716, |
| "loss": 0.3427, |
| "step": 3000 |
| }, |
| { |
| "epoch": 7.2996345919610235, |
| "eval_loss": 0.35160067677497864, |
| "eval_runtime": 209.3155, |
| "eval_samples_per_second": 7.869, |
| "eval_steps_per_second": 0.984, |
| "eval_wer": 0.3231516012549577, |
| "step": 3000 |
| }, |
| { |
| "epoch": 7.543239951278928, |
| "grad_norm": 2.187338352203369, |
| "learning_rate": 0.00021344184854913526, |
| "loss": 0.3661, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.543239951278928, |
| "eval_loss": 0.342045396566391, |
| "eval_runtime": 210.2605, |
| "eval_samples_per_second": 7.833, |
| "eval_steps_per_second": 0.98, |
| "eval_wer": 0.32155330610311966, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.786845310596833, |
| "grad_norm": 2.156829357147217, |
| "learning_rate": 0.00020187318467113335, |
| "loss": 0.3502, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.786845310596833, |
| "eval_loss": 0.3429788053035736, |
| "eval_runtime": 210.9605, |
| "eval_samples_per_second": 7.807, |
| "eval_steps_per_second": 0.976, |
| "eval_wer": 0.32380275853903984, |
| "step": 3200 |
| }, |
| { |
| "epoch": 8.02923264311815, |
| "grad_norm": 0.6317552924156189, |
| "learning_rate": 0.00019030452079313144, |
| "loss": 0.3681, |
| "step": 3300 |
| }, |
| { |
| "epoch": 8.02923264311815, |
| "eval_loss": 0.33876505494117737, |
| "eval_runtime": 210.1517, |
| "eval_samples_per_second": 7.837, |
| "eval_steps_per_second": 0.98, |
| "eval_wer": 0.3200734031847511, |
| "step": 3300 |
| }, |
| { |
| "epoch": 8.272838002436053, |
| "grad_norm": 0.9605346322059631, |
| "learning_rate": 0.0001787358569151295, |
| "loss": 0.3454, |
| "step": 3400 |
| }, |
| { |
| "epoch": 8.272838002436053, |
| "eval_loss": 0.33973127603530884, |
| "eval_runtime": 209.8102, |
| "eval_samples_per_second": 7.85, |
| "eval_steps_per_second": 0.982, |
| "eval_wer": 0.32184928668679336, |
| "step": 3400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4932, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 12, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.771788544505309e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|