| { | |
| "best_global_step": 19272, | |
| "best_metric": 0.029430464545251396, | |
| "best_model_checkpoint": "checkpoints/checkpoint-19272", | |
| "epoch": 2.9999610849515506, | |
| "eval_steps": 1606, | |
| "global_step": 19272, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07783009689847063, | |
| "grad_norm": 0.2925865948200226, | |
| "learning_rate": 4.991766654390158e-05, | |
| "loss": 2.2108, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15566019379694127, | |
| "grad_norm": 0.11884481459856033, | |
| "learning_rate": 4.966988962239142e-05, | |
| "loss": 0.3886, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2334902906954119, | |
| "grad_norm": 0.09392867982387543, | |
| "learning_rate": 4.925831306092169e-05, | |
| "loss": 0.2271, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2499902712378877, | |
| "eval_avg": 26.749637281629344, | |
| "eval_cer": 0.6640905490705347, | |
| "eval_der": 78.87487905513909, | |
| "eval_loss": 0.13775022327899933, | |
| "eval_runtime": 1439.866, | |
| "eval_samples_per_second": 94.261, | |
| "eval_steps_per_second": 0.369, | |
| "eval_wer": 0.7099422406784007, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.31132038759388253, | |
| "grad_norm": 0.0717502012848854, | |
| "learning_rate": 4.8685669587267704e-05, | |
| "loss": 0.1657, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3891504844923532, | |
| "grad_norm": 0.0638226792216301, | |
| "learning_rate": 4.795576135848184e-05, | |
| "loss": 0.1334, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4669805813908238, | |
| "grad_norm": 0.05441045016050339, | |
| "learning_rate": 4.707343471586959e-05, | |
| "loss": 0.1135, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4999805424757754, | |
| "eval_avg": 26.867260983703588, | |
| "eval_cer": 0.6597599235984197, | |
| "eval_der": 79.24906581788363, | |
| "eval_loss": 0.07722582668066025, | |
| "eval_runtime": 1442.4649, | |
| "eval_samples_per_second": 94.091, | |
| "eval_steps_per_second": 0.368, | |
| "eval_wer": 0.6929572096287142, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 0.5448106782892945, | |
| "grad_norm": 0.050636373460292816, | |
| "learning_rate": 4.604454800693874e-05, | |
| "loss": 0.1002, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6226407751877651, | |
| "grad_norm": 0.0581156350672245, | |
| "learning_rate": 4.48759326879731e-05, | |
| "loss": 0.0906, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7004708720862357, | |
| "grad_norm": 0.050500743091106415, | |
| "learning_rate": 4.3575347965496405e-05, | |
| "loss": 0.0828, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.749970813713663, | |
| "eval_avg": 26.888053782081823, | |
| "eval_cer": 0.6586451367564623, | |
| "eval_der": 79.3192047037025, | |
| "eval_loss": 0.05902250111103058, | |
| "eval_runtime": 1431.2383, | |
| "eval_samples_per_second": 94.829, | |
| "eval_steps_per_second": 0.371, | |
| "eval_wer": 0.6863115057865005, | |
| "step": 4818 | |
| }, | |
| { | |
| "epoch": 0.7783009689847064, | |
| "grad_norm": 0.04594194516539574, | |
| "learning_rate": 4.9989118673869795e-05, | |
| "loss": 0.0765, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.856131065883177, | |
| "grad_norm": 0.04987065866589546, | |
| "learning_rate": 4.984611214237347e-05, | |
| "loss": 0.0724, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.9339611627816476, | |
| "grad_norm": 0.04362853616476059, | |
| "learning_rate": 4.953813591358179e-05, | |
| "loss": 0.0683, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9999610849515508, | |
| "eval_avg": 0.708712044715695, | |
| "eval_cer": 0.6546741642910598, | |
| "eval_der": 0.7934032551594553, | |
| "eval_loss": 0.049141544848680496, | |
| "eval_runtime": 305.8639, | |
| "eval_samples_per_second": 443.737, | |
| "eval_steps_per_second": 1.736, | |
| "eval_wer": 0.6780587146965699, | |
| "step": 6424 | |
| }, | |
| { | |
| "epoch": 1.0118301747285676, | |
| "grad_norm": 0.04417261481285095, | |
| "learning_rate": 4.99981315851881e-05, | |
| "loss": 0.0626, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0896602716270383, | |
| "grad_norm": 0.039167486131191254, | |
| "learning_rate": 4.9890257744619245e-05, | |
| "loss": 0.061, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1674903685255087, | |
| "grad_norm": 0.035239290446043015, | |
| "learning_rate": 4.961712109503609e-05, | |
| "loss": 0.0583, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.2453204654239793, | |
| "grad_norm": 0.03707383945584297, | |
| "learning_rate": 4.918053517048949e-05, | |
| "loss": 0.056, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.2499902712378876, | |
| "eval_avg": 0.7079419812877328, | |
| "eval_cer": 0.6543333833811898, | |
| "eval_der": 0.793713077370868, | |
| "eval_loss": 0.043703265488147736, | |
| "eval_runtime": 307.3798, | |
| "eval_samples_per_second": 441.548, | |
| "eval_steps_per_second": 1.728, | |
| "eval_wer": 0.6757794831111406, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.32315056232245, | |
| "grad_norm": 0.035194575786590576, | |
| "learning_rate": 4.8583398752382485e-05, | |
| "loss": 0.0539, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.4009806592209206, | |
| "grad_norm": 0.03347332403063774, | |
| "learning_rate": 4.782967662255196e-05, | |
| "loss": 0.0523, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.4788107561193913, | |
| "grad_norm": 0.06645191460847855, | |
| "learning_rate": 4.692437323847159e-05, | |
| "loss": 0.0507, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.4999805424757753, | |
| "eval_avg": 0.7073388677263619, | |
| "eval_cer": 0.6541002942380697, | |
| "eval_der": 0.7938397323961762, | |
| "eval_loss": 0.03928952291607857, | |
| "eval_runtime": 307.3123, | |
| "eval_samples_per_second": 441.645, | |
| "eval_steps_per_second": 1.728, | |
| "eval_wer": 0.6740765765448399, | |
| "step": 9636 | |
| }, | |
| { | |
| "epoch": 1.556640853017862, | |
| "grad_norm": 0.031485334038734436, | |
| "learning_rate": 4.587349950536374e-05, | |
| "loss": 0.0492, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6344709499163326, | |
| "grad_norm": 0.031592607498168945, | |
| "learning_rate": 4.46840328658421e-05, | |
| "loss": 0.0481, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.7123010468148032, | |
| "grad_norm": 0.031952131539583206, | |
| "learning_rate": 4.336387097207617e-05, | |
| "loss": 0.0468, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.749970813713663, | |
| "eval_avg": 0.7068964281422255, | |
| "eval_cer": 0.6539495257646198, | |
| "eval_der": 0.7937636951733035, | |
| "eval_loss": 0.036437951028347015, | |
| "eval_runtime": 308.1694, | |
| "eval_samples_per_second": 440.417, | |
| "eval_steps_per_second": 1.723, | |
| "eval_wer": 0.6729760634887532, | |
| "step": 11242 | |
| }, | |
| { | |
| "epoch": 1.7901311437132739, | |
| "grad_norm": 0.030866818502545357, | |
| "learning_rate": 4.9978063925651226e-05, | |
| "loss": 0.0457, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.8679612406117445, | |
| "grad_norm": 0.029339410364627838, | |
| "learning_rate": 4.980989359026424e-05, | |
| "loss": 0.0451, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.9457913375102152, | |
| "grad_norm": 0.02953779138624668, | |
| "learning_rate": 4.94769940363958e-05, | |
| "loss": 0.0439, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.9999610849515508, | |
| "eval_avg": 0.706664474285978, | |
| "eval_cer": 0.6538952007882788, | |
| "eval_der": 0.7938883608526655, | |
| "eval_loss": 0.0344935841858387, | |
| "eval_runtime": 304.0677, | |
| "eval_samples_per_second": 446.358, | |
| "eval_steps_per_second": 1.746, | |
| "eval_wer": 0.6722098612169899, | |
| "step": 12848 | |
| }, | |
| { | |
| "epoch": 2.0236603494571352, | |
| "grad_norm": 0.028502434492111206, | |
| "learning_rate": 4.898157560336646e-05, | |
| "loss": 0.043, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.101490446355606, | |
| "grad_norm": 0.030778545886278152, | |
| "learning_rate": 4.832692770033863e-05, | |
| "loss": 0.0417, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.1793205432540765, | |
| "grad_norm": 0.0269626472145319, | |
| "learning_rate": 4.7517396965763394e-05, | |
| "loss": 0.041, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.2499902712378876, | |
| "eval_avg": 0.7064455694431908, | |
| "eval_cer": 0.6538135050224774, | |
| "eval_der": 0.7939745658437141, | |
| "eval_loss": 0.032875534147024155, | |
| "eval_runtime": 304.6489, | |
| "eval_samples_per_second": 445.506, | |
| "eval_steps_per_second": 1.743, | |
| "eval_wer": 0.6715486374633808, | |
| "step": 14454 | |
| }, | |
| { | |
| "epoch": 2.257150640152547, | |
| "grad_norm": 0.03818966820836067, | |
| "learning_rate": 4.6558358407185946e-05, | |
| "loss": 0.0402, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.3349807370510174, | |
| "grad_norm": 0.029628828167915344, | |
| "learning_rate": 4.5456179713031586e-05, | |
| "loss": 0.0395, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.4128108339494885, | |
| "grad_norm": 0.02792350761592388, | |
| "learning_rate": 4.4218178973329804e-05, | |
| "loss": 0.0392, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.4906409308479587, | |
| "grad_norm": 0.03364017978310585, | |
| "learning_rate": 4.2852576090096216e-05, | |
| "loss": 0.0384, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.4999805424757753, | |
| "eval_avg": 0.7061941356307186, | |
| "eval_cer": 0.6537081045699137, | |
| "eval_der": 0.7939727975362082, | |
| "eval_loss": 0.031167298555374146, | |
| "eval_runtime": 303.0246, | |
| "eval_samples_per_second": 447.894, | |
| "eval_steps_per_second": 1.752, | |
| "eval_wer": 0.670901504786034, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 2.5684710277464298, | |
| "grad_norm": 0.032605357468128204, | |
| "learning_rate": 4.136843819999117e-05, | |
| "loss": 0.038, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.6463011246449, | |
| "grad_norm": 0.027949590235948563, | |
| "learning_rate": 3.977561947162954e-05, | |
| "loss": 0.0376, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.724131221543371, | |
| "grad_norm": 0.026584528386592865, | |
| "learning_rate": 3.80846956772673e-05, | |
| "loss": 0.0372, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.749970813713663, | |
| "eval_avg": 0.706001258948505, | |
| "eval_cer": 0.6536609451579365, | |
| "eval_der": 0.7939834810607371, | |
| "eval_loss": 0.030004331842064857, | |
| "eval_runtime": 304.4012, | |
| "eval_samples_per_second": 445.869, | |
| "eval_steps_per_second": 1.744, | |
| "eval_wer": 0.6703593506268415, | |
| "step": 17666 | |
| }, | |
| { | |
| "epoch": 2.8019613184418413, | |
| "grad_norm": 0.027681950479745865, | |
| "learning_rate": 4.9963175402849494e-05, | |
| "loss": 0.0369, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.8797914153403124, | |
| "grad_norm": 0.037826769053936005, | |
| "learning_rate": 4.976986707530646e-05, | |
| "loss": 0.0367, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.9576215122387826, | |
| "grad_norm": 0.024277370423078537, | |
| "learning_rate": 4.94120952916684e-05, | |
| "loss": 0.0361, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.9999610849515506, | |
| "eval_avg": 0.029430464545251396, | |
| "eval_cer": 0.014860838907614712, | |
| "eval_der": 0.7972269254290619, | |
| "eval_loss": 0.029503749683499336, | |
| "eval_runtime": 898.4638, | |
| "eval_samples_per_second": 151.061, | |
| "eval_steps_per_second": 0.591, | |
| "eval_wer": 0.04400009018288808, | |
| "step": 19272 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 19272, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1606, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5041733416362967e+18, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |