| { |
| "best_global_step": 11242, |
| "best_metric": 0.7068964281422255, |
| "best_model_checkpoint": "checkpoints/checkpoint-11242", |
| "epoch": 1.749970813713663, |
| "eval_steps": 1606, |
| "global_step": 11242, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07783009689847063, |
| "grad_norm": 0.2925865948200226, |
| "learning_rate": 4.991766654390158e-05, |
| "loss": 2.2108, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15566019379694127, |
| "grad_norm": 0.11884481459856033, |
| "learning_rate": 4.966988962239142e-05, |
| "loss": 0.3886, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2334902906954119, |
| "grad_norm": 0.09392867982387543, |
| "learning_rate": 4.925831306092169e-05, |
| "loss": 0.2271, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2499902712378877, |
| "eval_avg": 26.749637281629344, |
| "eval_cer": 0.6640905490705347, |
| "eval_der": 78.87487905513909, |
| "eval_loss": 0.13775022327899933, |
| "eval_runtime": 1439.866, |
| "eval_samples_per_second": 94.261, |
| "eval_steps_per_second": 0.369, |
| "eval_wer": 0.7099422406784007, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.31132038759388253, |
| "grad_norm": 0.0717502012848854, |
| "learning_rate": 4.8685669587267704e-05, |
| "loss": 0.1657, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3891504844923532, |
| "grad_norm": 0.0638226792216301, |
| "learning_rate": 4.795576135848184e-05, |
| "loss": 0.1334, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.4669805813908238, |
| "grad_norm": 0.05441045016050339, |
| "learning_rate": 4.707343471586959e-05, |
| "loss": 0.1135, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4999805424757754, |
| "eval_avg": 26.867260983703588, |
| "eval_cer": 0.6597599235984197, |
| "eval_der": 79.24906581788363, |
| "eval_loss": 0.07722582668066025, |
| "eval_runtime": 1442.4649, |
| "eval_samples_per_second": 94.091, |
| "eval_steps_per_second": 0.368, |
| "eval_wer": 0.6929572096287142, |
| "step": 3212 |
| }, |
| { |
| "epoch": 0.5448106782892945, |
| "grad_norm": 0.050636373460292816, |
| "learning_rate": 4.604454800693874e-05, |
| "loss": 0.1002, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6226407751877651, |
| "grad_norm": 0.0581156350672245, |
| "learning_rate": 4.48759326879731e-05, |
| "loss": 0.0906, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7004708720862357, |
| "grad_norm": 0.050500743091106415, |
| "learning_rate": 4.3575347965496405e-05, |
| "loss": 0.0828, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.749970813713663, |
| "eval_avg": 26.888053782081823, |
| "eval_cer": 0.6586451367564623, |
| "eval_der": 79.3192047037025, |
| "eval_loss": 0.05902250111103058, |
| "eval_runtime": 1431.2383, |
| "eval_samples_per_second": 94.829, |
| "eval_steps_per_second": 0.371, |
| "eval_wer": 0.6863115057865005, |
| "step": 4818 |
| }, |
| { |
| "epoch": 0.7783009689847064, |
| "grad_norm": 0.04594194516539574, |
| "learning_rate": 4.9989118673869795e-05, |
| "loss": 0.0765, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.856131065883177, |
| "grad_norm": 0.04987065866589546, |
| "learning_rate": 4.984611214237347e-05, |
| "loss": 0.0724, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9339611627816476, |
| "grad_norm": 0.04362853616476059, |
| "learning_rate": 4.953813591358179e-05, |
| "loss": 0.0683, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.9999610849515508, |
| "eval_avg": 0.708712044715695, |
| "eval_cer": 0.6546741642910598, |
| "eval_der": 0.7934032551594553, |
| "eval_loss": 0.049141544848680496, |
| "eval_runtime": 305.8639, |
| "eval_samples_per_second": 443.737, |
| "eval_steps_per_second": 1.736, |
| "eval_wer": 0.6780587146965699, |
| "step": 6424 |
| }, |
| { |
| "epoch": 1.0118301747285676, |
| "grad_norm": 0.04417261481285095, |
| "learning_rate": 4.99981315851881e-05, |
| "loss": 0.0626, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.0896602716270383, |
| "grad_norm": 0.039167486131191254, |
| "learning_rate": 4.9890257744619245e-05, |
| "loss": 0.061, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.1674903685255087, |
| "grad_norm": 0.035239290446043015, |
| "learning_rate": 4.961712109503609e-05, |
| "loss": 0.0583, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.2453204654239793, |
| "grad_norm": 0.03707383945584297, |
| "learning_rate": 4.918053517048949e-05, |
| "loss": 0.056, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.2499902712378876, |
| "eval_avg": 0.7079419812877328, |
| "eval_cer": 0.6543333833811898, |
| "eval_der": 0.793713077370868, |
| "eval_loss": 0.043703265488147736, |
| "eval_runtime": 307.3798, |
| "eval_samples_per_second": 441.548, |
| "eval_steps_per_second": 1.728, |
| "eval_wer": 0.6757794831111406, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.32315056232245, |
| "grad_norm": 0.035194575786590576, |
| "learning_rate": 4.8583398752382485e-05, |
| "loss": 0.0539, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.4009806592209206, |
| "grad_norm": 0.03347332403063774, |
| "learning_rate": 4.782967662255196e-05, |
| "loss": 0.0523, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.4788107561193913, |
| "grad_norm": 0.06645191460847855, |
| "learning_rate": 4.692437323847159e-05, |
| "loss": 0.0507, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.4999805424757753, |
| "eval_avg": 0.7073388677263619, |
| "eval_cer": 0.6541002942380697, |
| "eval_der": 0.7938397323961762, |
| "eval_loss": 0.03928952291607857, |
| "eval_runtime": 307.3123, |
| "eval_samples_per_second": 441.645, |
| "eval_steps_per_second": 1.728, |
| "eval_wer": 0.6740765765448399, |
| "step": 9636 |
| }, |
| { |
| "epoch": 1.556640853017862, |
| "grad_norm": 0.031485334038734436, |
| "learning_rate": 4.587349950536374e-05, |
| "loss": 0.0492, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.6344709499163326, |
| "grad_norm": 0.031592607498168945, |
| "learning_rate": 4.46840328658421e-05, |
| "loss": 0.0481, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.7123010468148032, |
| "grad_norm": 0.031952131539583206, |
| "learning_rate": 4.336387097207617e-05, |
| "loss": 0.0468, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.749970813713663, |
| "eval_avg": 0.7068964281422255, |
| "eval_cer": 0.6539495257646198, |
| "eval_der": 0.7937636951733035, |
| "eval_loss": 0.036437951028347015, |
| "eval_runtime": 308.1694, |
| "eval_samples_per_second": 440.417, |
| "eval_steps_per_second": 1.723, |
| "eval_wer": 0.6729760634887532, |
| "step": 11242 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 19272, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1606, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.774232894949294e+17, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|