{ "best_metric": null, "best_model_checkpoint": null, "epoch": 48.608996539792386, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.47, "learning_rate": 0.00015, "loss": 5.7552, "step": 500 }, { "epoch": 3.47, "eval_cer": 0.9804542678163042, "eval_loss": 3.6432340145111084, "eval_runtime": 117.6193, "eval_samples_per_second": 14.283, "step": 500 }, { "epoch": 6.94, "learning_rate": 0.0003, "loss": 1.9259, "step": 1000 }, { "epoch": 6.94, "eval_cer": 0.1655705336723533, "eval_loss": 0.387952595949173, "eval_runtime": 117.5642, "eval_samples_per_second": 14.29, "step": 1000 }, { "epoch": 10.42, "learning_rate": 0.0002758064516129032, "loss": 0.372, "step": 1500 }, { "epoch": 10.42, "eval_cer": 0.15922101166209748, "eval_loss": 0.35147520899772644, "eval_runtime": 117.7259, "eval_samples_per_second": 14.27, "step": 1500 }, { "epoch": 13.89, "learning_rate": 0.00025161290322580645, "loss": 0.2859, "step": 2000 }, { "epoch": 13.89, "eval_cer": 0.16001647750678277, "eval_loss": 0.3656272292137146, "eval_runtime": 116.8438, "eval_samples_per_second": 14.378, "step": 2000 }, { "epoch": 17.36, "learning_rate": 0.00022741935483870966, "loss": 0.2384, "step": 2500 }, { "epoch": 17.36, "eval_cer": 0.15967556357334622, "eval_loss": 0.39938414096832275, "eval_runtime": 117.3198, "eval_samples_per_second": 14.32, "step": 2500 }, { "epoch": 20.83, "learning_rate": 0.00020322580645161287, "loss": 0.2047, "step": 3000 }, { "epoch": 20.83, "eval_cer": 0.16220400857966732, "eval_loss": 0.41744646430015564, "eval_runtime": 117.2101, "eval_samples_per_second": 14.333, "step": 3000 }, { "epoch": 24.3, "learning_rate": 0.00017903225806451613, "loss": 0.1699, "step": 3500 }, { "epoch": 24.3, "eval_cer": 0.16042841517635192, "eval_loss": 0.4581703841686249, "eval_runtime": 117.0983, "eval_samples_per_second": 14.347, "step": 3500 }, { "epoch": 27.78, "learning_rate": 0.00015483870967741934, "loss": 0.1457, "step": 4000 }, { "epoch": 27.78, "eval_cer": 0.1622466228213469, "eval_loss": 0.5069878101348877, "eval_runtime": 117.1696, "eval_samples_per_second": 14.338, "step": 4000 }, { "epoch": 31.25, "learning_rate": 0.00013064516129032258, "loss": 0.1233, "step": 4500 }, { "epoch": 31.25, "eval_cer": 0.16633759002258555, "eval_loss": 0.5665308237075806, "eval_runtime": 117.8469, "eval_samples_per_second": 14.256, "step": 4500 }, { "epoch": 34.72, "learning_rate": 0.0001064516129032258, "loss": 0.1077, "step": 5000 }, { "epoch": 34.72, "eval_cer": 0.16429210642196623, "eval_loss": 0.5954398512840271, "eval_runtime": 117.3401, "eval_samples_per_second": 14.317, "step": 5000 }, { "epoch": 38.19, "learning_rate": 8.225806451612902e-05, "loss": 0.0944, "step": 5500 }, { "epoch": 38.19, "eval_cer": 0.16356766431341355, "eval_loss": 0.623413622379303, "eval_runtime": 116.7948, "eval_samples_per_second": 14.384, "step": 5500 }, { "epoch": 41.66, "learning_rate": 5.806451612903225e-05, "loss": 0.08, "step": 6000 }, { "epoch": 41.66, "eval_cer": 0.16270117473259563, "eval_loss": 0.6403368711471558, "eval_runtime": 116.8584, "eval_samples_per_second": 14.376, "step": 6000 }, { "epoch": 45.14, "learning_rate": 3.387096774193548e-05, "loss": 0.0746, "step": 6500 }, { "epoch": 45.14, "eval_cer": 0.16326936462165656, "eval_loss": 0.6701669096946716, "eval_runtime": 117.2831, "eval_samples_per_second": 14.324, "step": 6500 }, { "epoch": 48.61, "learning_rate": 9.677419354838709e-06, "loss": 0.0664, "step": 7000 }, { "epoch": 48.61, "eval_cer": 0.16285742695208738, "eval_loss": 0.684356153011322, "eval_runtime": 117.2078, "eval_samples_per_second": 14.334, "step": 7000 } ], "max_steps": 7200, "num_train_epochs": 50, "total_flos": 2.1349156888097853e+19, "trial_name": null, "trial_params": null }