| { |
| "best_global_step": 19272, |
| "best_metric": 0.029430464545251396, |
| "best_model_checkpoint": "checkpoints/checkpoint-19272", |
| "epoch": 2.9999610849515506, |
| "eval_steps": 1606, |
| "global_step": 19272, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07783009689847063, |
| "grad_norm": 0.2925865948200226, |
| "learning_rate": 4.991766654390158e-05, |
| "loss": 2.2108, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15566019379694127, |
| "grad_norm": 0.11884481459856033, |
| "learning_rate": 4.966988962239142e-05, |
| "loss": 0.3886, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2334902906954119, |
| "grad_norm": 0.09392867982387543, |
| "learning_rate": 4.925831306092169e-05, |
| "loss": 0.2271, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2499902712378877, |
| "eval_avg": 26.749637281629344, |
| "eval_cer": 0.6640905490705347, |
| "eval_der": 78.87487905513909, |
| "eval_loss": 0.13775022327899933, |
| "eval_runtime": 1439.866, |
| "eval_samples_per_second": 94.261, |
| "eval_steps_per_second": 0.369, |
| "eval_wer": 0.7099422406784007, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.31132038759388253, |
| "grad_norm": 0.0717502012848854, |
| "learning_rate": 4.8685669587267704e-05, |
| "loss": 0.1657, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3891504844923532, |
| "grad_norm": 0.0638226792216301, |
| "learning_rate": 4.795576135848184e-05, |
| "loss": 0.1334, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4669805813908238, |
| "grad_norm": 0.05441045016050339, |
| "learning_rate": 4.707343471586959e-05, |
| "loss": 0.1135, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4999805424757754, |
| "eval_avg": 26.867260983703588, |
| "eval_cer": 0.6597599235984197, |
| "eval_der": 79.24906581788363, |
| "eval_loss": 0.07722582668066025, |
| "eval_runtime": 1442.4649, |
| "eval_samples_per_second": 94.091, |
| "eval_steps_per_second": 0.368, |
| "eval_wer": 0.6929572096287142, |
| "step": 3212 |
| }, |
| { |
| "epoch": 0.5448106782892945, |
| "grad_norm": 0.050636373460292816, |
| "learning_rate": 4.604454800693874e-05, |
| "loss": 0.1002, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6226407751877651, |
| "grad_norm": 0.0581156350672245, |
| "learning_rate": 4.48759326879731e-05, |
| "loss": 0.0906, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7004708720862357, |
| "grad_norm": 0.050500743091106415, |
| "learning_rate": 4.3575347965496405e-05, |
| "loss": 0.0828, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.749970813713663, |
| "eval_avg": 26.888053782081823, |
| "eval_cer": 0.6586451367564623, |
| "eval_der": 79.3192047037025, |
| "eval_loss": 0.05902250111103058, |
| "eval_runtime": 1431.2383, |
| "eval_samples_per_second": 94.829, |
| "eval_steps_per_second": 0.371, |
| "eval_wer": 0.6863115057865005, |
| "step": 4818 |
| }, |
| { |
| "epoch": 0.7783009689847064, |
| "grad_norm": 0.04594194516539574, |
| "learning_rate": 4.9989118673869795e-05, |
| "loss": 0.0765, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.856131065883177, |
| "grad_norm": 0.04987065866589546, |
| "learning_rate": 4.984611214237347e-05, |
| "loss": 0.0724, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9339611627816476, |
| "grad_norm": 0.04362853616476059, |
| "learning_rate": 4.953813591358179e-05, |
| "loss": 0.0683, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.9999610849515508, |
| "eval_avg": 0.708712044715695, |
| "eval_cer": 0.6546741642910598, |
| "eval_der": 0.7934032551594553, |
| "eval_loss": 0.049141544848680496, |
| "eval_runtime": 305.8639, |
| "eval_samples_per_second": 443.737, |
| "eval_steps_per_second": 1.736, |
| "eval_wer": 0.6780587146965699, |
| "step": 6424 |
| }, |
| { |
| "epoch": 1.0118301747285676, |
| "grad_norm": 0.04417261481285095, |
| "learning_rate": 4.99981315851881e-05, |
| "loss": 0.0626, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.0896602716270383, |
| "grad_norm": 0.039167486131191254, |
| "learning_rate": 4.9890257744619245e-05, |
| "loss": 0.061, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.1674903685255087, |
| "grad_norm": 0.035239290446043015, |
| "learning_rate": 4.961712109503609e-05, |
| "loss": 0.0583, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.2453204654239793, |
| "grad_norm": 0.03707383945584297, |
| "learning_rate": 4.918053517048949e-05, |
| "loss": 0.056, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.2499902712378876, |
| "eval_avg": 0.7079419812877328, |
| "eval_cer": 0.6543333833811898, |
| "eval_der": 0.793713077370868, |
| "eval_loss": 0.043703265488147736, |
| "eval_runtime": 307.3798, |
| "eval_samples_per_second": 441.548, |
| "eval_steps_per_second": 1.728, |
| "eval_wer": 0.6757794831111406, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.32315056232245, |
| "grad_norm": 0.035194575786590576, |
| "learning_rate": 4.8583398752382485e-05, |
| "loss": 0.0539, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.4009806592209206, |
| "grad_norm": 0.03347332403063774, |
| "learning_rate": 4.782967662255196e-05, |
| "loss": 0.0523, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.4788107561193913, |
| "grad_norm": 0.06645191460847855, |
| "learning_rate": 4.692437323847159e-05, |
| "loss": 0.0507, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.4999805424757753, |
| "eval_avg": 0.7073388677263619, |
| "eval_cer": 0.6541002942380697, |
| "eval_der": 0.7938397323961762, |
| "eval_loss": 0.03928952291607857, |
| "eval_runtime": 307.3123, |
| "eval_samples_per_second": 441.645, |
| "eval_steps_per_second": 1.728, |
| "eval_wer": 0.6740765765448399, |
| "step": 9636 |
| }, |
| { |
| "epoch": 1.556640853017862, |
| "grad_norm": 0.031485334038734436, |
| "learning_rate": 4.587349950536374e-05, |
| "loss": 0.0492, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.6344709499163326, |
| "grad_norm": 0.031592607498168945, |
| "learning_rate": 4.46840328658421e-05, |
| "loss": 0.0481, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.7123010468148032, |
| "grad_norm": 0.031952131539583206, |
| "learning_rate": 4.336387097207617e-05, |
| "loss": 0.0468, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.749970813713663, |
| "eval_avg": 0.7068964281422255, |
| "eval_cer": 0.6539495257646198, |
| "eval_der": 0.7937636951733035, |
| "eval_loss": 0.036437951028347015, |
| "eval_runtime": 308.1694, |
| "eval_samples_per_second": 440.417, |
| "eval_steps_per_second": 1.723, |
| "eval_wer": 0.6729760634887532, |
| "step": 11242 |
| }, |
| { |
| "epoch": 1.7901311437132739, |
| "grad_norm": 0.030866818502545357, |
| "learning_rate": 4.9978063925651226e-05, |
| "loss": 0.0457, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.8679612406117445, |
| "grad_norm": 0.029339410364627838, |
| "learning_rate": 4.980989359026424e-05, |
| "loss": 0.0451, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.9457913375102152, |
| "grad_norm": 0.02953779138624668, |
| "learning_rate": 4.94769940363958e-05, |
| "loss": 0.0439, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.9999610849515508, |
| "eval_avg": 0.706664474285978, |
| "eval_cer": 0.6538952007882788, |
| "eval_der": 0.7938883608526655, |
| "eval_loss": 0.0344935841858387, |
| "eval_runtime": 304.0677, |
| "eval_samples_per_second": 446.358, |
| "eval_steps_per_second": 1.746, |
| "eval_wer": 0.6722098612169899, |
| "step": 12848 |
| }, |
| { |
| "epoch": 2.0236603494571352, |
| "grad_norm": 0.028502434492111206, |
| "learning_rate": 4.898157560336646e-05, |
| "loss": 0.043, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.101490446355606, |
| "grad_norm": 0.030778545886278152, |
| "learning_rate": 4.832692770033863e-05, |
| "loss": 0.0417, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.1793205432540765, |
| "grad_norm": 0.0269626472145319, |
| "learning_rate": 4.7517396965763394e-05, |
| "loss": 0.041, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.2499902712378876, |
| "eval_avg": 0.7064455694431908, |
| "eval_cer": 0.6538135050224774, |
| "eval_der": 0.7939745658437141, |
| "eval_loss": 0.032875534147024155, |
| "eval_runtime": 304.6489, |
| "eval_samples_per_second": 445.506, |
| "eval_steps_per_second": 1.743, |
| "eval_wer": 0.6715486374633808, |
| "step": 14454 |
| }, |
| { |
| "epoch": 2.257150640152547, |
| "grad_norm": 0.03818966820836067, |
| "learning_rate": 4.6558358407185946e-05, |
| "loss": 0.0402, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.3349807370510174, |
| "grad_norm": 0.029628828167915344, |
| "learning_rate": 4.5456179713031586e-05, |
| "loss": 0.0395, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.4128108339494885, |
| "grad_norm": 0.02792350761592388, |
| "learning_rate": 4.4218178973329804e-05, |
| "loss": 0.0392, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.4906409308479587, |
| "grad_norm": 0.03364017978310585, |
| "learning_rate": 4.2852576090096216e-05, |
| "loss": 0.0384, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.4999805424757753, |
| "eval_avg": 0.7061941356307186, |
| "eval_cer": 0.6537081045699137, |
| "eval_der": 0.7939727975362082, |
| "eval_loss": 0.031167298555374146, |
| "eval_runtime": 303.0246, |
| "eval_samples_per_second": 447.894, |
| "eval_steps_per_second": 1.752, |
| "eval_wer": 0.670901504786034, |
| "step": 16060 |
| }, |
| { |
| "epoch": 2.5684710277464298, |
| "grad_norm": 0.032605357468128204, |
| "learning_rate": 4.136843819999117e-05, |
| "loss": 0.038, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.6463011246449, |
| "grad_norm": 0.027949590235948563, |
| "learning_rate": 3.977561947162954e-05, |
| "loss": 0.0376, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.724131221543371, |
| "grad_norm": 0.026584528386592865, |
| "learning_rate": 3.80846956772673e-05, |
| "loss": 0.0372, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.749970813713663, |
| "eval_avg": 0.706001258948505, |
| "eval_cer": 0.6536609451579365, |
| "eval_der": 0.7939834810607371, |
| "eval_loss": 0.030004331842064857, |
| "eval_runtime": 304.4012, |
| "eval_samples_per_second": 445.869, |
| "eval_steps_per_second": 1.744, |
| "eval_wer": 0.6703593506268415, |
| "step": 17666 |
| }, |
| { |
| "epoch": 2.8019613184418413, |
| "grad_norm": 0.027681950479745865, |
| "learning_rate": 4.9963175402849494e-05, |
| "loss": 0.0369, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.8797914153403124, |
| "grad_norm": 0.037826769053936005, |
| "learning_rate": 4.976986707530646e-05, |
| "loss": 0.0367, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.9576215122387826, |
| "grad_norm": 0.024277370423078537, |
| "learning_rate": 4.94120952916684e-05, |
| "loss": 0.0361, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.9999610849515506, |
| "eval_avg": 0.029430464545251396, |
| "eval_cer": 0.014860838907614712, |
| "eval_der": 0.7972269254290619, |
| "eval_loss": 0.029503749683499336, |
| "eval_runtime": 898.4638, |
| "eval_samples_per_second": 151.061, |
| "eval_steps_per_second": 0.591, |
| "eval_wer": 0.04400009018288808, |
| "step": 19272 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 19272, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1606, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5041733416362967e+18, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|