| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 31.46853146853147, | |
| "global_step": 4500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 4.6313, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_cer": 0.9993038029762423, | |
| "eval_loss": 3.357034921646118, | |
| "eval_runtime": 144.6064, | |
| "eval_samples_per_second": 7.897, | |
| "eval_steps_per_second": 0.989, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 9.98e-05, | |
| "loss": 2.8906, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_cer": 0.1851720912018101, | |
| "eval_loss": 0.7587499022483826, | |
| "eval_runtime": 144.5652, | |
| "eval_samples_per_second": 7.9, | |
| "eval_steps_per_second": 0.989, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 9.190243902439025e-05, | |
| "loss": 0.4635, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "eval_cer": 0.04839657122965799, | |
| "eval_loss": 0.18757499754428864, | |
| "eval_runtime": 146.4828, | |
| "eval_samples_per_second": 7.796, | |
| "eval_steps_per_second": 0.976, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 8.377235772357724e-05, | |
| "loss": 0.218, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_cer": 0.03471738752066835, | |
| "eval_loss": 0.14170190691947937, | |
| "eval_runtime": 145.5208, | |
| "eval_samples_per_second": 7.848, | |
| "eval_steps_per_second": 0.983, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "learning_rate": 7.564227642276424e-05, | |
| "loss": 0.1514, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "eval_cer": 0.032465625271951966, | |
| "eval_loss": 0.13693825900554657, | |
| "eval_runtime": 145.4673, | |
| "eval_samples_per_second": 7.851, | |
| "eval_steps_per_second": 0.983, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "learning_rate": 6.751219512195123e-05, | |
| "loss": 0.1149, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "eval_cer": 0.02969715429466539, | |
| "eval_loss": 0.13597196340560913, | |
| "eval_runtime": 145.3558, | |
| "eval_samples_per_second": 7.857, | |
| "eval_steps_per_second": 0.984, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "learning_rate": 5.938211382113822e-05, | |
| "loss": 0.1253, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "eval_cer": 0.029827691236619963, | |
| "eval_loss": 0.1414954513311386, | |
| "eval_runtime": 146.3702, | |
| "eval_samples_per_second": 7.802, | |
| "eval_steps_per_second": 0.977, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 27.97, | |
| "learning_rate": 5.125203252032521e-05, | |
| "loss": 0.1019, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 27.97, | |
| "eval_cer": 0.028647419719780697, | |
| "eval_loss": 0.1363374888896942, | |
| "eval_runtime": 145.0727, | |
| "eval_samples_per_second": 7.872, | |
| "eval_steps_per_second": 0.986, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 31.47, | |
| "learning_rate": 4.31219512195122e-05, | |
| "loss": 0.0806, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 31.47, | |
| "eval_cer": 0.02749434339918197, | |
| "eval_loss": 0.13236147165298462, | |
| "eval_runtime": 145.7007, | |
| "eval_samples_per_second": 7.838, | |
| "eval_steps_per_second": 0.981, | |
| "step": 4500 | |
| } | |
| ], | |
| "max_steps": 7150, | |
| "num_train_epochs": 50, | |
| "total_flos": 1.6733035055952587e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |