{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 1000, "global_step": 5600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17857142857142858, "grad_norm": 36795.6953125, "learning_rate": 1.785714285714286e-05, "loss": 11.5686, "step": 100 }, { "epoch": 0.35714285714285715, "grad_norm": 15962.9775390625, "learning_rate": 3.571428571428572e-05, "loss": 6.6701, "step": 200 }, { "epoch": 0.5357142857142857, "grad_norm": 22162.76953125, "learning_rate": 4.999825642177387e-05, "loss": 4.7406, "step": 300 }, { "epoch": 0.7142857142857143, "grad_norm": 22004.275390625, "learning_rate": 4.9937256716606394e-05, "loss": 4.7143, "step": 400 }, { "epoch": 0.8928571428571429, "grad_norm": 19477.404296875, "learning_rate": 4.978932115289164e-05, "loss": 4.6796, "step": 500 }, { "epoch": 1.0714285714285714, "grad_norm": 20002.865234375, "learning_rate": 4.955496546118439e-05, "loss": 4.6291, "step": 600 }, { "epoch": 1.25, "grad_norm": 19484.634765625, "learning_rate": 4.923500664848326e-05, "loss": 4.58, "step": 700 }, { "epoch": 1.4285714285714286, "grad_norm": 18751.654296875, "learning_rate": 4.883056014999423e-05, "loss": 4.5509, "step": 800 }, { "epoch": 1.6071428571428572, "grad_norm": 18834.03515625, "learning_rate": 4.834303594051854e-05, "loss": 4.5004, "step": 900 }, { "epoch": 1.7857142857142856, "grad_norm": 17574.287109375, "learning_rate": 4.7774133619021514e-05, "loss": 4.4528, "step": 1000 }, { "epoch": 1.7857142857142856, "eval_cer_score": 0.996767055081083, "eval_loss": 4.664954662322998, "eval_runtime": 7.0528, "eval_samples_per_second": 35.447, "eval_steps_per_second": 5.955, "eval_wer_score": 1.0, "step": 1000 }, { "epoch": 1.9642857142857144, "grad_norm": 17599.75, "learning_rate": 4.7125836483518276e-05, "loss": 4.4168, "step": 1100 }, { "epoch": 2.142857142857143, "grad_norm": 15682.912109375, "learning_rate": 4.6400404616932505e-05, "loss": 4.3702, "step": 1200 }, { "epoch": 2.3214285714285716, "grad_norm": 17537.24609375, "learning_rate": 4.560036700803213e-05, "loss": 4.3231, "step": 1300 }, { "epoch": 2.5, "grad_norm": 18234.52734375, "learning_rate": 4.4728512734909844e-05, "loss": 4.2926, "step": 1400 }, { "epoch": 2.678571428571429, "grad_norm": 18046.029296875, "learning_rate": 4.378788124174441e-05, "loss": 4.2644, "step": 1500 }, { "epoch": 2.857142857142857, "grad_norm": 17269.84765625, "learning_rate": 4.2781751742739885e-05, "loss": 4.2458, "step": 1600 }, { "epoch": 3.0357142857142856, "grad_norm": 16300.3486328125, "learning_rate": 4.1713631790182364e-05, "loss": 4.2229, "step": 1700 }, { "epoch": 3.2142857142857144, "grad_norm": 16482.568359375, "learning_rate": 4.058724504646834e-05, "loss": 4.1953, "step": 1800 }, { "epoch": 3.392857142857143, "grad_norm": 15220.8369140625, "learning_rate": 3.9406518302733416e-05, "loss": 4.1883, "step": 1900 }, { "epoch": 3.571428571428571, "grad_norm": 14242.6416015625, "learning_rate": 3.817556778933698e-05, "loss": 4.1858, "step": 2000 }, { "epoch": 3.571428571428571, "eval_cer_score": 0.9825155884420779, "eval_loss": 4.381639003753662, "eval_runtime": 6.4858, "eval_samples_per_second": 38.546, "eval_steps_per_second": 6.476, "eval_wer_score": 1.0, "step": 2000 }, { "epoch": 3.75, "grad_norm": 14843.748046875, "learning_rate": 3.689868482592684e-05, "loss": 4.1807, "step": 2100 }, { "epoch": 3.928571428571429, "grad_norm": 29719.095703125, "learning_rate": 3.5580320861110625e-05, "loss": 4.1251, "step": 2200 }, { "epoch": 4.107142857142857, "grad_norm": 17429.841796875, "learning_rate": 3.4225071953887976e-05, "loss": 4.031, "step": 2300 }, { "epoch": 4.285714285714286, "grad_norm": 28763.1015625, "learning_rate": 3.2837662750944535e-05, "loss": 4.0189, "step": 2400 }, { "epoch": 4.464285714285714, "grad_norm": 16039.8037109375, "learning_rate": 3.1422930015665484e-05, "loss": 4.0042, "step": 2500 }, { "epoch": 4.642857142857143, "grad_norm": 27426.4765625, "learning_rate": 2.9985805766289817e-05, "loss": 3.9906, "step": 2600 }, { "epoch": 4.821428571428571, "grad_norm": 17122.708984375, "learning_rate": 2.853130008198855e-05, "loss": 3.9787, "step": 2700 }, { "epoch": 5.0, "grad_norm": 25625.185546875, "learning_rate": 2.7064483636808313e-05, "loss": 3.9773, "step": 2800 }, { "epoch": 5.178571428571429, "grad_norm": 14854.916015625, "learning_rate": 2.559047002236995e-05, "loss": 3.9664, "step": 2900 }, { "epoch": 5.357142857142857, "grad_norm": 22451.359375, "learning_rate": 2.4114397920948657e-05, "loss": 3.9619, "step": 3000 }, { "epoch": 5.357142857142857, "eval_cer_score": 0.9613026290008414, "eval_loss": 4.328557968139648, "eval_runtime": 6.6271, "eval_samples_per_second": 37.724, "eval_steps_per_second": 6.338, "eval_wer_score": 1.437584410573027, "step": 3000 }, { "epoch": 5.535714285714286, "grad_norm": 14314.1396484375, "learning_rate": 2.2641413191083445e-05, "loss": 3.9524, "step": 3100 }, { "epoch": 5.714285714285714, "grad_norm": 21002.537109375, "learning_rate": 2.117665092816885e-05, "loss": 3.9353, "step": 3200 }, { "epoch": 5.892857142857143, "grad_norm": 12572.75, "learning_rate": 1.9725217562568948e-05, "loss": 3.9404, "step": 3300 }, { "epoch": 6.071428571428571, "grad_norm": 18614.310546875, "learning_rate": 1.829217305766289e-05, "loss": 3.9334, "step": 3400 }, { "epoch": 6.25, "grad_norm": 11758.970703125, "learning_rate": 1.6882513269882917e-05, "loss": 3.9161, "step": 3500 }, { "epoch": 6.428571428571429, "grad_norm": 16748.3984375, "learning_rate": 1.5501152532241005e-05, "loss": 3.9079, "step": 3600 }, { "epoch": 6.607142857142857, "grad_norm": 9738.3134765625, "learning_rate": 1.4152906522061048e-05, "loss": 3.9186, "step": 3700 }, { "epoch": 6.785714285714286, "grad_norm": 15285.71875, "learning_rate": 1.2842475472642968e-05, "loss": 3.9058, "step": 3800 }, { "epoch": 6.964285714285714, "grad_norm": 8818.5263671875, "learning_rate": 1.1574427787385852e-05, "loss": 3.9063, "step": 3900 }, { "epoch": 7.142857142857143, "grad_norm": 12031.4169921875, "learning_rate": 1.0353184113494386e-05, "loss": 3.8988, "step": 4000 }, { "epoch": 7.142857142857143, "eval_cer_score": 0.9185309406083239, "eval_loss": 4.195689678192139, "eval_runtime": 6.7441, "eval_samples_per_second": 37.069, "eval_steps_per_second": 6.228, "eval_wer_score": 1.2407871888867452, "step": 4000 }, { "epoch": 7.321428571428571, "grad_norm": 7690.8115234375, "learning_rate": 9.183001930790483e-06, "loss": 3.9009, "step": 4100 }, { "epoch": 7.5, "grad_norm": 9677.2919921875, "learning_rate": 8.067960709356478e-06, "loss": 3.8754, "step": 4200 }, { "epoch": 7.678571428571429, "grad_norm": 6019.1201171875, "learning_rate": 7.011947687752804e-06, "loss": 3.884, "step": 4300 }, { "epoch": 7.857142857142857, "grad_norm": 7279.31005859375, "learning_rate": 6.018644321390288e-06, "loss": 3.8809, "step": 4400 }, { "epoch": 8.035714285714286, "grad_norm": 4656.92138671875, "learning_rate": 5.091513448300142e-06, "loss": 3.8814, "step": 4500 }, { "epoch": 8.214285714285714, "grad_norm": 4017.18896484375, "learning_rate": 4.23378721704443e-06, "loss": 3.8856, "step": 4600 }, { "epoch": 8.392857142857142, "grad_norm": 4232.76953125, "learning_rate": 3.448455818852267e-06, "loss": 3.8654, "step": 4700 }, { "epoch": 8.571428571428571, "grad_norm": 2418.802490234375, "learning_rate": 2.7382570632638854e-06, "loss": 3.8729, "step": 4800 }, { "epoch": 8.75, "grad_norm": 69.66502380371094, "learning_rate": 2.1056668336235622e-06, "loss": 3.8814, "step": 4900 }, { "epoch": 8.928571428571429, "grad_norm": 919.9892578125, "learning_rate": 1.552890455695369e-06, "loss": 3.8713, "step": 5000 }, { "epoch": 8.928571428571429, "eval_cer_score": 0.9372601224024065, "eval_loss": 4.1322102546691895, "eval_runtime": 6.5532, "eval_samples_per_second": 38.149, "eval_steps_per_second": 6.409, "eval_wer_score": 1.0818059039166505, "step": 5000 }, { "epoch": 9.107142857142858, "grad_norm": 866.0170288085938, "learning_rate": 1.081855009492383e-06, "loss": 3.8652, "step": 5100 }, { "epoch": 9.285714285714286, "grad_norm": 1821.4014892578125, "learning_rate": 6.942026111217359e-07, "loss": 3.8662, "step": 5200 }, { "epoch": 9.464285714285714, "grad_norm": 121.43854522705078, "learning_rate": 3.9128468806614306e-07, "loss": 3.8734, "step": 5300 }, { "epoch": 9.642857142857142, "grad_norm": 804.0866088867188, "learning_rate": 1.7415726785939834e-07, "loss": 3.8641, "step": 5400 }, { "epoch": 9.821428571428571, "grad_norm": 238.31356811523438, "learning_rate": 4.357729658039378e-08, "loss": 3.8754, "step": 5500 }, { "epoch": 10.0, "grad_norm": 12.470285415649414, "learning_rate": 0.0, "loss": 3.8738, "step": 5600 } ], "logging_steps": 100, "max_steps": 5600, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.4758827926528e+16, "train_batch_size": 224, "trial_name": null, "trial_params": null }