| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 48.608996539792386, | |
| "global_step": 7000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.00015, | |
| "loss": 5.7552, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "eval_cer": 0.9804542678163042, | |
| "eval_loss": 3.6432340145111084, | |
| "eval_runtime": 117.6193, | |
| "eval_samples_per_second": 14.283, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 0.0003, | |
| "loss": 1.9259, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "eval_cer": 0.1655705336723533, | |
| "eval_loss": 0.387952595949173, | |
| "eval_runtime": 117.5642, | |
| "eval_samples_per_second": 14.29, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 0.0002758064516129032, | |
| "loss": 0.372, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "eval_cer": 0.15922101166209748, | |
| "eval_loss": 0.35147520899772644, | |
| "eval_runtime": 117.7259, | |
| "eval_samples_per_second": 14.27, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "learning_rate": 0.00025161290322580645, | |
| "loss": 0.2859, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "eval_cer": 0.16001647750678277, | |
| "eval_loss": 0.3656272292137146, | |
| "eval_runtime": 116.8438, | |
| "eval_samples_per_second": 14.378, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "learning_rate": 0.00022741935483870966, | |
| "loss": 0.2384, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "eval_cer": 0.15967556357334622, | |
| "eval_loss": 0.39938414096832275, | |
| "eval_runtime": 117.3198, | |
| "eval_samples_per_second": 14.32, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.83, | |
| "learning_rate": 0.00020322580645161287, | |
| "loss": 0.2047, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 20.83, | |
| "eval_cer": 0.16220400857966732, | |
| "eval_loss": 0.41744646430015564, | |
| "eval_runtime": 117.2101, | |
| "eval_samples_per_second": 14.333, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 0.00017903225806451613, | |
| "loss": 0.1699, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "eval_cer": 0.16042841517635192, | |
| "eval_loss": 0.4581703841686249, | |
| "eval_runtime": 117.0983, | |
| "eval_samples_per_second": 14.347, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "learning_rate": 0.00015483870967741934, | |
| "loss": 0.1457, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "eval_cer": 0.1622466228213469, | |
| "eval_loss": 0.5069878101348877, | |
| "eval_runtime": 117.1696, | |
| "eval_samples_per_second": 14.338, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 0.00013064516129032258, | |
| "loss": 0.1233, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "eval_cer": 0.16633759002258555, | |
| "eval_loss": 0.5665308237075806, | |
| "eval_runtime": 117.8469, | |
| "eval_samples_per_second": 14.256, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 34.72, | |
| "learning_rate": 0.0001064516129032258, | |
| "loss": 0.1077, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 34.72, | |
| "eval_cer": 0.16429210642196623, | |
| "eval_loss": 0.5954398512840271, | |
| "eval_runtime": 117.3401, | |
| "eval_samples_per_second": 14.317, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 38.19, | |
| "learning_rate": 8.225806451612902e-05, | |
| "loss": 0.0944, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 38.19, | |
| "eval_cer": 0.16356766431341355, | |
| "eval_loss": 0.623413622379303, | |
| "eval_runtime": 116.7948, | |
| "eval_samples_per_second": 14.384, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 41.66, | |
| "learning_rate": 5.806451612903225e-05, | |
| "loss": 0.08, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 41.66, | |
| "eval_cer": 0.16270117473259563, | |
| "eval_loss": 0.6403368711471558, | |
| "eval_runtime": 116.8584, | |
| "eval_samples_per_second": 14.376, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "learning_rate": 3.387096774193548e-05, | |
| "loss": 0.0746, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "eval_cer": 0.16326936462165656, | |
| "eval_loss": 0.6701669096946716, | |
| "eval_runtime": 117.2831, | |
| "eval_samples_per_second": 14.324, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 48.61, | |
| "learning_rate": 9.677419354838709e-06, | |
| "loss": 0.0664, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 48.61, | |
| "eval_cer": 0.16285742695208738, | |
| "eval_loss": 0.684356153011322, | |
| "eval_runtime": 117.2078, | |
| "eval_samples_per_second": 14.334, | |
| "step": 7000 | |
| } | |
| ], | |
| "max_steps": 7200, | |
| "num_train_epochs": 50, | |
| "total_flos": 2.1349156888097853e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |