| { | |
| "best_global_step": 400, | |
| "best_metric": 37.20698874588074, | |
| "best_model_checkpoint": "./JUDIC/checkpoint-400", | |
| "epoch": 5.0, | |
| "eval_steps": 200, | |
| "global_step": 425, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 39.55274200439453, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 2.8704, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 16.861303329467773, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 2.4422, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 10.871281623840332, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 1.9044, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 7.858098030090332, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 1.4465, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 7.4879231452941895, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 1.3179, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 7.5675835609436035, | |
| "learning_rate": 5.8e-06, | |
| "loss": 1.0743, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 6.898285865783691, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 1.0679, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 7.592809677124023, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 1.0381, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 8.085546493530273, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.8211, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 5.359922885894775, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.7182, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "eval_loss": 0.6759204864501953, | |
| "eval_runtime": 326.8541, | |
| "eval_samples_per_second": 2.083, | |
| "eval_steps_per_second": 0.263, | |
| "eval_wer": 41.783249393769815, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 3.346689462661743, | |
| "learning_rate": 9.28888888888889e-06, | |
| "loss": 0.7775, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 4.038958549499512, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.6748, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.0588235294117645, | |
| "grad_norm": 3.325453758239746, | |
| "learning_rate": 7.511111111111111e-06, | |
| "loss": 0.66, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.2941176470588234, | |
| "grad_norm": 3.325045108795166, | |
| "learning_rate": 6.6222222222222236e-06, | |
| "loss": 0.5344, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.5294117647058822, | |
| "grad_norm": 3.9428045749664307, | |
| "learning_rate": 5.733333333333334e-06, | |
| "loss": 0.5228, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.764705882352941, | |
| "grad_norm": 3.552807092666626, | |
| "learning_rate": 4.8444444444444446e-06, | |
| "loss": 0.5771, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 4.557582378387451, | |
| "learning_rate": 3.955555555555556e-06, | |
| "loss": 0.4784, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.235294117647059, | |
| "grad_norm": 4.543866157531738, | |
| "learning_rate": 3.066666666666667e-06, | |
| "loss": 0.4485, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.470588235294118, | |
| "grad_norm": 3.4673140048980713, | |
| "learning_rate": 2.1777777777777777e-06, | |
| "loss": 0.4194, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 3.8203773498535156, | |
| "learning_rate": 1.288888888888889e-06, | |
| "loss": 0.3985, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "eval_loss": 0.44990459084510803, | |
| "eval_runtime": 307.4758, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.28, | |
| "eval_wer": 37.20698874588074, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "grad_norm": 3.171309471130371, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.4031, | |
| "step": 420 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 425, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.924761444352e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |