{ "best_global_step": 19272, "best_metric": 0.029430464545251396, "best_model_checkpoint": "checkpoints/checkpoint-19272", "epoch": 2.9999610849515506, "eval_steps": 1606, "global_step": 19272, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07783009689847063, "grad_norm": 0.2925865948200226, "learning_rate": 4.991766654390158e-05, "loss": 2.2108, "step": 500 }, { "epoch": 0.15566019379694127, "grad_norm": 0.11884481459856033, "learning_rate": 4.966988962239142e-05, "loss": 0.3886, "step": 1000 }, { "epoch": 0.2334902906954119, "grad_norm": 0.09392867982387543, "learning_rate": 4.925831306092169e-05, "loss": 0.2271, "step": 1500 }, { "epoch": 0.2499902712378877, "eval_avg": 26.749637281629344, "eval_cer": 0.6640905490705347, "eval_der": 78.87487905513909, "eval_loss": 0.13775022327899933, "eval_runtime": 1439.866, "eval_samples_per_second": 94.261, "eval_steps_per_second": 0.369, "eval_wer": 0.7099422406784007, "step": 1606 }, { "epoch": 0.31132038759388253, "grad_norm": 0.0717502012848854, "learning_rate": 4.8685669587267704e-05, "loss": 0.1657, "step": 2000 }, { "epoch": 0.3891504844923532, "grad_norm": 0.0638226792216301, "learning_rate": 4.795576135848184e-05, "loss": 0.1334, "step": 2500 }, { "epoch": 0.4669805813908238, "grad_norm": 0.05441045016050339, "learning_rate": 4.707343471586959e-05, "loss": 0.1135, "step": 3000 }, { "epoch": 0.4999805424757754, "eval_avg": 26.867260983703588, "eval_cer": 0.6597599235984197, "eval_der": 79.24906581788363, "eval_loss": 0.07722582668066025, "eval_runtime": 1442.4649, "eval_samples_per_second": 94.091, "eval_steps_per_second": 0.368, "eval_wer": 0.6929572096287142, "step": 3212 }, { "epoch": 0.5448106782892945, "grad_norm": 0.050636373460292816, "learning_rate": 4.604454800693874e-05, "loss": 0.1002, "step": 3500 }, { "epoch": 0.6226407751877651, "grad_norm": 0.0581156350672245, "learning_rate": 4.48759326879731e-05, "loss": 0.0906, "step": 4000 }, { "epoch": 0.7004708720862357, "grad_norm": 0.050500743091106415, "learning_rate": 4.3575347965496405e-05, "loss": 0.0828, "step": 4500 }, { "epoch": 0.749970813713663, "eval_avg": 26.888053782081823, "eval_cer": 0.6586451367564623, "eval_der": 79.3192047037025, "eval_loss": 0.05902250111103058, "eval_runtime": 1431.2383, "eval_samples_per_second": 94.829, "eval_steps_per_second": 0.371, "eval_wer": 0.6863115057865005, "step": 4818 }, { "epoch": 0.7783009689847064, "grad_norm": 0.04594194516539574, "learning_rate": 4.9989118673869795e-05, "loss": 0.0765, "step": 5000 }, { "epoch": 0.856131065883177, "grad_norm": 0.04987065866589546, "learning_rate": 4.984611214237347e-05, "loss": 0.0724, "step": 5500 }, { "epoch": 0.9339611627816476, "grad_norm": 0.04362853616476059, "learning_rate": 4.953813591358179e-05, "loss": 0.0683, "step": 6000 }, { "epoch": 0.9999610849515508, "eval_avg": 0.708712044715695, "eval_cer": 0.6546741642910598, "eval_der": 0.7934032551594553, "eval_loss": 0.049141544848680496, "eval_runtime": 305.8639, "eval_samples_per_second": 443.737, "eval_steps_per_second": 1.736, "eval_wer": 0.6780587146965699, "step": 6424 }, { "epoch": 1.0118301747285676, "grad_norm": 0.04417261481285095, "learning_rate": 4.99981315851881e-05, "loss": 0.0626, "step": 6500 }, { "epoch": 1.0896602716270383, "grad_norm": 0.039167486131191254, "learning_rate": 4.9890257744619245e-05, "loss": 0.061, "step": 7000 }, { "epoch": 1.1674903685255087, "grad_norm": 0.035239290446043015, "learning_rate": 4.961712109503609e-05, "loss": 0.0583, "step": 7500 }, { "epoch": 1.2453204654239793, "grad_norm": 0.03707383945584297, "learning_rate": 4.918053517048949e-05, "loss": 0.056, "step": 8000 }, { "epoch": 1.2499902712378876, "eval_avg": 0.7079419812877328, "eval_cer": 0.6543333833811898, "eval_der": 0.793713077370868, "eval_loss": 0.043703265488147736, "eval_runtime": 307.3798, "eval_samples_per_second": 441.548, "eval_steps_per_second": 1.728, "eval_wer": 0.6757794831111406, "step": 8030 }, { "epoch": 1.32315056232245, "grad_norm": 0.035194575786590576, "learning_rate": 4.8583398752382485e-05, "loss": 0.0539, "step": 8500 }, { "epoch": 1.4009806592209206, "grad_norm": 0.03347332403063774, "learning_rate": 4.782967662255196e-05, "loss": 0.0523, "step": 9000 }, { "epoch": 1.4788107561193913, "grad_norm": 0.06645191460847855, "learning_rate": 4.692437323847159e-05, "loss": 0.0507, "step": 9500 }, { "epoch": 1.4999805424757753, "eval_avg": 0.7073388677263619, "eval_cer": 0.6541002942380697, "eval_der": 0.7938397323961762, "eval_loss": 0.03928952291607857, "eval_runtime": 307.3123, "eval_samples_per_second": 441.645, "eval_steps_per_second": 1.728, "eval_wer": 0.6740765765448399, "step": 9636 }, { "epoch": 1.556640853017862, "grad_norm": 0.031485334038734436, "learning_rate": 4.587349950536374e-05, "loss": 0.0492, "step": 10000 }, { "epoch": 1.6344709499163326, "grad_norm": 0.031592607498168945, "learning_rate": 4.46840328658421e-05, "loss": 0.0481, "step": 10500 }, { "epoch": 1.7123010468148032, "grad_norm": 0.031952131539583206, "learning_rate": 4.336387097207617e-05, "loss": 0.0468, "step": 11000 }, { "epoch": 1.749970813713663, "eval_avg": 0.7068964281422255, "eval_cer": 0.6539495257646198, "eval_der": 0.7937636951733035, "eval_loss": 0.036437951028347015, "eval_runtime": 308.1694, "eval_samples_per_second": 440.417, "eval_steps_per_second": 1.723, "eval_wer": 0.6729760634887532, "step": 11242 }, { "epoch": 1.7901311437132739, "grad_norm": 0.030866818502545357, "learning_rate": 4.9978063925651226e-05, "loss": 0.0457, "step": 11500 }, { "epoch": 1.8679612406117445, "grad_norm": 0.029339410364627838, "learning_rate": 4.980989359026424e-05, "loss": 0.0451, "step": 12000 }, { "epoch": 1.9457913375102152, "grad_norm": 0.02953779138624668, "learning_rate": 4.94769940363958e-05, "loss": 0.0439, "step": 12500 }, { "epoch": 1.9999610849515508, "eval_avg": 0.706664474285978, "eval_cer": 0.6538952007882788, "eval_der": 0.7938883608526655, "eval_loss": 0.0344935841858387, "eval_runtime": 304.0677, "eval_samples_per_second": 446.358, "eval_steps_per_second": 1.746, "eval_wer": 0.6722098612169899, "step": 12848 }, { "epoch": 2.0236603494571352, "grad_norm": 0.028502434492111206, "learning_rate": 4.898157560336646e-05, "loss": 0.043, "step": 13000 }, { "epoch": 2.101490446355606, "grad_norm": 0.030778545886278152, "learning_rate": 4.832692770033863e-05, "loss": 0.0417, "step": 13500 }, { "epoch": 2.1793205432540765, "grad_norm": 0.0269626472145319, "learning_rate": 4.7517396965763394e-05, "loss": 0.041, "step": 14000 }, { "epoch": 2.2499902712378876, "eval_avg": 0.7064455694431908, "eval_cer": 0.6538135050224774, "eval_der": 0.7939745658437141, "eval_loss": 0.032875534147024155, "eval_runtime": 304.6489, "eval_samples_per_second": 445.506, "eval_steps_per_second": 1.743, "eval_wer": 0.6715486374633808, "step": 14454 }, { "epoch": 2.257150640152547, "grad_norm": 0.03818966820836067, "learning_rate": 4.6558358407185946e-05, "loss": 0.0402, "step": 14500 }, { "epoch": 2.3349807370510174, "grad_norm": 0.029628828167915344, "learning_rate": 4.5456179713031586e-05, "loss": 0.0395, "step": 15000 }, { "epoch": 2.4128108339494885, "grad_norm": 0.02792350761592388, "learning_rate": 4.4218178973329804e-05, "loss": 0.0392, "step": 15500 }, { "epoch": 2.4906409308479587, "grad_norm": 0.03364017978310585, "learning_rate": 4.2852576090096216e-05, "loss": 0.0384, "step": 16000 }, { "epoch": 2.4999805424757753, "eval_avg": 0.7061941356307186, "eval_cer": 0.6537081045699137, "eval_der": 0.7939727975362082, "eval_loss": 0.031167298555374146, "eval_runtime": 303.0246, "eval_samples_per_second": 447.894, "eval_steps_per_second": 1.752, "eval_wer": 0.670901504786034, "step": 16060 }, { "epoch": 2.5684710277464298, "grad_norm": 0.032605357468128204, "learning_rate": 4.136843819999117e-05, "loss": 0.038, "step": 16500 }, { "epoch": 2.6463011246449, "grad_norm": 0.027949590235948563, "learning_rate": 3.977561947162954e-05, "loss": 0.0376, "step": 17000 }, { "epoch": 2.724131221543371, "grad_norm": 0.026584528386592865, "learning_rate": 3.80846956772673e-05, "loss": 0.0372, "step": 17500 }, { "epoch": 2.749970813713663, "eval_avg": 0.706001258948505, "eval_cer": 0.6536609451579365, "eval_der": 0.7939834810607371, "eval_loss": 0.030004331842064857, "eval_runtime": 304.4012, "eval_samples_per_second": 445.869, "eval_steps_per_second": 1.744, "eval_wer": 0.6703593506268415, "step": 17666 }, { "epoch": 2.8019613184418413, "grad_norm": 0.027681950479745865, "learning_rate": 4.9963175402849494e-05, "loss": 0.0369, "step": 18000 }, { "epoch": 2.8797914153403124, "grad_norm": 0.037826769053936005, "learning_rate": 4.976986707530646e-05, "loss": 0.0367, "step": 18500 }, { "epoch": 2.9576215122387826, "grad_norm": 0.024277370423078537, "learning_rate": 4.94120952916684e-05, "loss": 0.0361, "step": 19000 }, { "epoch": 2.9999610849515506, "eval_avg": 0.029430464545251396, "eval_cer": 0.014860838907614712, "eval_der": 0.7972269254290619, "eval_loss": 0.029503749683499336, "eval_runtime": 898.4638, "eval_samples_per_second": 151.061, "eval_steps_per_second": 0.591, "eval_wer": 0.04400009018288808, "step": 19272 } ], "logging_steps": 500, "max_steps": 19272, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1606, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5041733416362967e+18, "train_batch_size": 256, "trial_name": null, "trial_params": null }