| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 1000, | |
| "global_step": 5600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17857142857142858, | |
| "grad_norm": 36795.6953125, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 11.5686, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 15962.9775390625, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 6.6701, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5357142857142857, | |
| "grad_norm": 22162.76953125, | |
| "learning_rate": 4.999825642177387e-05, | |
| "loss": 4.7406, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 22004.275390625, | |
| "learning_rate": 4.9937256716606394e-05, | |
| "loss": 4.7143, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8928571428571429, | |
| "grad_norm": 19477.404296875, | |
| "learning_rate": 4.978932115289164e-05, | |
| "loss": 4.6796, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0714285714285714, | |
| "grad_norm": 20002.865234375, | |
| "learning_rate": 4.955496546118439e-05, | |
| "loss": 4.6291, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 19484.634765625, | |
| "learning_rate": 4.923500664848326e-05, | |
| "loss": 4.58, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 18751.654296875, | |
| "learning_rate": 4.883056014999423e-05, | |
| "loss": 4.5509, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6071428571428572, | |
| "grad_norm": 18834.03515625, | |
| "learning_rate": 4.834303594051854e-05, | |
| "loss": 4.5004, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7857142857142856, | |
| "grad_norm": 17574.287109375, | |
| "learning_rate": 4.7774133619021514e-05, | |
| "loss": 4.4528, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7857142857142856, | |
| "eval_cer_score": 0.996767055081083, | |
| "eval_loss": 4.664954662322998, | |
| "eval_runtime": 7.0528, | |
| "eval_samples_per_second": 35.447, | |
| "eval_steps_per_second": 5.955, | |
| "eval_wer_score": 1.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.9642857142857144, | |
| "grad_norm": 17599.75, | |
| "learning_rate": 4.7125836483518276e-05, | |
| "loss": 4.4168, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 15682.912109375, | |
| "learning_rate": 4.6400404616932505e-05, | |
| "loss": 4.3702, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.3214285714285716, | |
| "grad_norm": 17537.24609375, | |
| "learning_rate": 4.560036700803213e-05, | |
| "loss": 4.3231, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 18234.52734375, | |
| "learning_rate": 4.4728512734909844e-05, | |
| "loss": 4.2926, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.678571428571429, | |
| "grad_norm": 18046.029296875, | |
| "learning_rate": 4.378788124174441e-05, | |
| "loss": 4.2644, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 17269.84765625, | |
| "learning_rate": 4.2781751742739885e-05, | |
| "loss": 4.2458, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.0357142857142856, | |
| "grad_norm": 16300.3486328125, | |
| "learning_rate": 4.1713631790182364e-05, | |
| "loss": 4.2229, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.2142857142857144, | |
| "grad_norm": 16482.568359375, | |
| "learning_rate": 4.058724504646834e-05, | |
| "loss": 4.1953, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.392857142857143, | |
| "grad_norm": 15220.8369140625, | |
| "learning_rate": 3.9406518302733416e-05, | |
| "loss": 4.1883, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "grad_norm": 14242.6416015625, | |
| "learning_rate": 3.817556778933698e-05, | |
| "loss": 4.1858, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "eval_cer_score": 0.9825155884420779, | |
| "eval_loss": 4.381639003753662, | |
| "eval_runtime": 6.4858, | |
| "eval_samples_per_second": 38.546, | |
| "eval_steps_per_second": 6.476, | |
| "eval_wer_score": 1.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 14843.748046875, | |
| "learning_rate": 3.689868482592684e-05, | |
| "loss": 4.1807, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.928571428571429, | |
| "grad_norm": 29719.095703125, | |
| "learning_rate": 3.5580320861110625e-05, | |
| "loss": 4.1251, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.107142857142857, | |
| "grad_norm": 17429.841796875, | |
| "learning_rate": 3.4225071953887976e-05, | |
| "loss": 4.031, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 28763.1015625, | |
| "learning_rate": 3.2837662750944535e-05, | |
| "loss": 4.0189, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.464285714285714, | |
| "grad_norm": 16039.8037109375, | |
| "learning_rate": 3.1422930015665484e-05, | |
| "loss": 4.0042, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.642857142857143, | |
| "grad_norm": 27426.4765625, | |
| "learning_rate": 2.9985805766289817e-05, | |
| "loss": 3.9906, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.821428571428571, | |
| "grad_norm": 17122.708984375, | |
| "learning_rate": 2.853130008198855e-05, | |
| "loss": 3.9787, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 25625.185546875, | |
| "learning_rate": 2.7064483636808313e-05, | |
| "loss": 3.9773, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.178571428571429, | |
| "grad_norm": 14854.916015625, | |
| "learning_rate": 2.559047002236995e-05, | |
| "loss": 3.9664, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.357142857142857, | |
| "grad_norm": 22451.359375, | |
| "learning_rate": 2.4114397920948657e-05, | |
| "loss": 3.9619, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.357142857142857, | |
| "eval_cer_score": 0.9613026290008414, | |
| "eval_loss": 4.328557968139648, | |
| "eval_runtime": 6.6271, | |
| "eval_samples_per_second": 37.724, | |
| "eval_steps_per_second": 6.338, | |
| "eval_wer_score": 1.437584410573027, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.535714285714286, | |
| "grad_norm": 14314.1396484375, | |
| "learning_rate": 2.2641413191083445e-05, | |
| "loss": 3.9524, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 21002.537109375, | |
| "learning_rate": 2.117665092816885e-05, | |
| "loss": 3.9353, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.892857142857143, | |
| "grad_norm": 12572.75, | |
| "learning_rate": 1.9725217562568948e-05, | |
| "loss": 3.9404, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.071428571428571, | |
| "grad_norm": 18614.310546875, | |
| "learning_rate": 1.829217305766289e-05, | |
| "loss": 3.9334, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 11758.970703125, | |
| "learning_rate": 1.6882513269882917e-05, | |
| "loss": 3.9161, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.428571428571429, | |
| "grad_norm": 16748.3984375, | |
| "learning_rate": 1.5501152532241005e-05, | |
| "loss": 3.9079, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.607142857142857, | |
| "grad_norm": 9738.3134765625, | |
| "learning_rate": 1.4152906522061048e-05, | |
| "loss": 3.9186, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.785714285714286, | |
| "grad_norm": 15285.71875, | |
| "learning_rate": 1.2842475472642968e-05, | |
| "loss": 3.9058, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.964285714285714, | |
| "grad_norm": 8818.5263671875, | |
| "learning_rate": 1.1574427787385852e-05, | |
| "loss": 3.9063, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 12031.4169921875, | |
| "learning_rate": 1.0353184113494386e-05, | |
| "loss": 3.8988, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "eval_cer_score": 0.9185309406083239, | |
| "eval_loss": 4.195689678192139, | |
| "eval_runtime": 6.7441, | |
| "eval_samples_per_second": 37.069, | |
| "eval_steps_per_second": 6.228, | |
| "eval_wer_score": 1.2407871888867452, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.321428571428571, | |
| "grad_norm": 7690.8115234375, | |
| "learning_rate": 9.183001930790483e-06, | |
| "loss": 3.9009, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 9677.2919921875, | |
| "learning_rate": 8.067960709356478e-06, | |
| "loss": 3.8754, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.678571428571429, | |
| "grad_norm": 6019.1201171875, | |
| "learning_rate": 7.011947687752804e-06, | |
| "loss": 3.884, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.857142857142857, | |
| "grad_norm": 7279.31005859375, | |
| "learning_rate": 6.018644321390288e-06, | |
| "loss": 3.8809, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 8.035714285714286, | |
| "grad_norm": 4656.92138671875, | |
| "learning_rate": 5.091513448300142e-06, | |
| "loss": 3.8814, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.214285714285714, | |
| "grad_norm": 4017.18896484375, | |
| "learning_rate": 4.23378721704443e-06, | |
| "loss": 3.8856, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.392857142857142, | |
| "grad_norm": 4232.76953125, | |
| "learning_rate": 3.448455818852267e-06, | |
| "loss": 3.8654, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.571428571428571, | |
| "grad_norm": 2418.802490234375, | |
| "learning_rate": 2.7382570632638854e-06, | |
| "loss": 3.8729, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 69.66502380371094, | |
| "learning_rate": 2.1056668336235622e-06, | |
| "loss": 3.8814, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 8.928571428571429, | |
| "grad_norm": 919.9892578125, | |
| "learning_rate": 1.552890455695369e-06, | |
| "loss": 3.8713, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.928571428571429, | |
| "eval_cer_score": 0.9372601224024065, | |
| "eval_loss": 4.1322102546691895, | |
| "eval_runtime": 6.5532, | |
| "eval_samples_per_second": 38.149, | |
| "eval_steps_per_second": 6.409, | |
| "eval_wer_score": 1.0818059039166505, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.107142857142858, | |
| "grad_norm": 866.0170288085938, | |
| "learning_rate": 1.081855009492383e-06, | |
| "loss": 3.8652, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.285714285714286, | |
| "grad_norm": 1821.4014892578125, | |
| "learning_rate": 6.942026111217359e-07, | |
| "loss": 3.8662, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.464285714285714, | |
| "grad_norm": 121.43854522705078, | |
| "learning_rate": 3.9128468806614306e-07, | |
| "loss": 3.8734, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 9.642857142857142, | |
| "grad_norm": 804.0866088867188, | |
| "learning_rate": 1.7415726785939834e-07, | |
| "loss": 3.8641, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 9.821428571428571, | |
| "grad_norm": 238.31356811523438, | |
| "learning_rate": 4.357729658039378e-08, | |
| "loss": 3.8754, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 12.470285415649414, | |
| "learning_rate": 0.0, | |
| "loss": 3.8738, | |
| "step": 5600 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.4758827926528e+16, | |
| "train_batch_size": 224, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |