{ "best_global_step": 400, "best_metric": 37.20698874588074, "best_model_checkpoint": "./JUDIC/checkpoint-400", "epoch": 5.0, "eval_steps": 200, "global_step": 425, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23529411764705882, "grad_norm": 39.55274200439453, "learning_rate": 8.000000000000001e-07, "loss": 2.8704, "step": 20 }, { "epoch": 0.47058823529411764, "grad_norm": 16.861303329467773, "learning_rate": 1.8000000000000001e-06, "loss": 2.4422, "step": 40 }, { "epoch": 0.7058823529411765, "grad_norm": 10.871281623840332, "learning_rate": 2.8000000000000003e-06, "loss": 1.9044, "step": 60 }, { "epoch": 0.9411764705882353, "grad_norm": 7.858098030090332, "learning_rate": 3.8000000000000005e-06, "loss": 1.4465, "step": 80 }, { "epoch": 1.1764705882352942, "grad_norm": 7.4879231452941895, "learning_rate": 4.800000000000001e-06, "loss": 1.3179, "step": 100 }, { "epoch": 1.4117647058823528, "grad_norm": 7.5675835609436035, "learning_rate": 5.8e-06, "loss": 1.0743, "step": 120 }, { "epoch": 1.6470588235294117, "grad_norm": 6.898285865783691, "learning_rate": 6.800000000000001e-06, "loss": 1.0679, "step": 140 }, { "epoch": 1.8823529411764706, "grad_norm": 7.592809677124023, "learning_rate": 7.800000000000002e-06, "loss": 1.0381, "step": 160 }, { "epoch": 2.1176470588235294, "grad_norm": 8.085546493530273, "learning_rate": 8.8e-06, "loss": 0.8211, "step": 180 }, { "epoch": 2.3529411764705883, "grad_norm": 5.359922885894775, "learning_rate": 9.800000000000001e-06, "loss": 0.7182, "step": 200 }, { "epoch": 2.3529411764705883, "eval_loss": 0.6759204864501953, "eval_runtime": 326.8541, "eval_samples_per_second": 2.083, "eval_steps_per_second": 0.263, "eval_wer": 41.783249393769815, "step": 200 }, { "epoch": 2.588235294117647, "grad_norm": 3.346689462661743, "learning_rate": 9.28888888888889e-06, "loss": 0.7775, "step": 220 }, { "epoch": 2.8235294117647056, "grad_norm": 4.038958549499512, "learning_rate": 8.400000000000001e-06, "loss": 0.6748, "step": 240 }, { "epoch": 3.0588235294117645, "grad_norm": 3.325453758239746, "learning_rate": 7.511111111111111e-06, "loss": 0.66, "step": 260 }, { "epoch": 3.2941176470588234, "grad_norm": 3.325045108795166, "learning_rate": 6.6222222222222236e-06, "loss": 0.5344, "step": 280 }, { "epoch": 3.5294117647058822, "grad_norm": 3.9428045749664307, "learning_rate": 5.733333333333334e-06, "loss": 0.5228, "step": 300 }, { "epoch": 3.764705882352941, "grad_norm": 3.552807092666626, "learning_rate": 4.8444444444444446e-06, "loss": 0.5771, "step": 320 }, { "epoch": 4.0, "grad_norm": 4.557582378387451, "learning_rate": 3.955555555555556e-06, "loss": 0.4784, "step": 340 }, { "epoch": 4.235294117647059, "grad_norm": 4.543866157531738, "learning_rate": 3.066666666666667e-06, "loss": 0.4485, "step": 360 }, { "epoch": 4.470588235294118, "grad_norm": 3.4673140048980713, "learning_rate": 2.1777777777777777e-06, "loss": 0.4194, "step": 380 }, { "epoch": 4.705882352941177, "grad_norm": 3.8203773498535156, "learning_rate": 1.288888888888889e-06, "loss": 0.3985, "step": 400 }, { "epoch": 4.705882352941177, "eval_loss": 0.44990459084510803, "eval_runtime": 307.4758, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.28, "eval_wer": 37.20698874588074, "step": 400 }, { "epoch": 4.9411764705882355, "grad_norm": 3.171309471130371, "learning_rate": 4.0000000000000003e-07, "loss": 0.4031, "step": 420 } ], "logging_steps": 20, "max_steps": 425, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.924761444352e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }