{ "best_global_step": 1000, "best_metric": 240.63380883232256, "best_model_checkpoint": "/workspace/output/whisper-changhua-8_1_1/checkpoint-1000", "epoch": 13.157894736842104, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6578947368421053, "grad_norm": 20.157838821411133, "learning_rate": 9.800000000000001e-07, "loss": 5.3937, "step": 50 }, { "epoch": 1.3157894736842106, "grad_norm": 12.150752067565918, "learning_rate": 1.98e-06, "loss": 2.8495, "step": 100 }, { "epoch": 1.3157894736842106, "eval_cer": 260.2801742547194, "eval_loss": 2.078307867050171, "eval_runtime": 40.147, "eval_samples_per_second": 11.284, "eval_steps_per_second": 2.84, "step": 100 }, { "epoch": 1.973684210526316, "grad_norm": 9.51569652557373, "learning_rate": 2.9800000000000003e-06, "loss": 1.65, "step": 150 }, { "epoch": 2.6315789473684212, "grad_norm": 9.225106239318848, "learning_rate": 3.980000000000001e-06, "loss": 1.2547, "step": 200 }, { "epoch": 2.6315789473684212, "eval_cer": 243.76868540189628, "eval_loss": 1.3091058731079102, "eval_runtime": 40.2136, "eval_samples_per_second": 11.265, "eval_steps_per_second": 2.835, "step": 200 }, { "epoch": 3.2894736842105265, "grad_norm": 8.73460865020752, "learning_rate": 4.980000000000001e-06, "loss": 1.0415, "step": 250 }, { "epoch": 3.9473684210526314, "grad_norm": 6.480662822723389, "learning_rate": 5.98e-06, "loss": 0.7814, "step": 300 }, { "epoch": 3.9473684210526314, "eval_cer": 251.57598018279663, "eval_loss": 0.6244341731071472, "eval_runtime": 40.4043, "eval_samples_per_second": 11.212, "eval_steps_per_second": 2.821, "step": 300 }, { "epoch": 4.605263157894737, "grad_norm": 4.831464767456055, "learning_rate": 6.98e-06, "loss": 0.4204, "step": 350 }, { "epoch": 5.2631578947368425, "grad_norm": 4.215816497802734, "learning_rate": 7.980000000000002e-06, "loss": 0.3407, "step": 400 }, { "epoch": 5.2631578947368425, "eval_cer": 290.749124455454, "eval_loss": 0.5503445863723755, "eval_runtime": 41.2087, "eval_samples_per_second": 10.993, "eval_steps_per_second": 2.766, "step": 400 }, { "epoch": 5.921052631578947, "grad_norm": 4.6707634925842285, "learning_rate": 8.98e-06, "loss": 0.2667, "step": 450 }, { "epoch": 6.578947368421053, "grad_norm": 3.476262331008911, "learning_rate": 9.980000000000001e-06, "loss": 0.1763, "step": 500 }, { "epoch": 6.578947368421053, "eval_cer": 274.81848466729303, "eval_loss": 0.5615935921669006, "eval_runtime": 40.95, "eval_samples_per_second": 11.062, "eval_steps_per_second": 2.784, "step": 500 }, { "epoch": 7.2368421052631575, "grad_norm": 2.525753974914551, "learning_rate": 9.94842105263158e-06, "loss": 0.1409, "step": 550 }, { "epoch": 7.894736842105263, "grad_norm": 2.9115219116210938, "learning_rate": 9.895789473684212e-06, "loss": 0.1026, "step": 600 }, { "epoch": 7.894736842105263, "eval_cer": 261.57854275219955, "eval_loss": 0.586604654788971, "eval_runtime": 40.7596, "eval_samples_per_second": 11.114, "eval_steps_per_second": 2.797, "step": 600 }, { "epoch": 8.552631578947368, "grad_norm": 2.416334390640259, "learning_rate": 9.843157894736843e-06, "loss": 0.0628, "step": 650 }, { "epoch": 9.210526315789474, "grad_norm": 1.3677974939346313, "learning_rate": 9.790526315789475e-06, "loss": 0.0485, "step": 700 }, { "epoch": 9.210526315789474, "eval_cer": 269.804390535577, "eval_loss": 0.6309823393821716, "eval_runtime": 40.7742, "eval_samples_per_second": 11.11, "eval_steps_per_second": 2.796, "step": 700 }, { "epoch": 9.868421052631579, "grad_norm": 2.056694984436035, "learning_rate": 9.737894736842107e-06, "loss": 0.0351, "step": 750 }, { "epoch": 10.526315789473685, "grad_norm": 1.416398525238037, "learning_rate": 9.685263157894738e-06, "loss": 0.0245, "step": 800 }, { "epoch": 10.526315789473685, "eval_cer": 275.9033057145298, "eval_loss": 0.6414592862129211, "eval_runtime": 40.752, "eval_samples_per_second": 11.116, "eval_steps_per_second": 2.797, "step": 800 }, { "epoch": 11.18421052631579, "grad_norm": 1.9776651859283447, "learning_rate": 9.63263157894737e-06, "loss": 0.0223, "step": 850 }, { "epoch": 11.842105263157894, "grad_norm": 2.234473705291748, "learning_rate": 9.58e-06, "loss": 0.017, "step": 900 }, { "epoch": 11.842105263157894, "eval_cer": 250.7132484838131, "eval_loss": 0.6816866397857666, "eval_runtime": 40.3529, "eval_samples_per_second": 11.226, "eval_steps_per_second": 2.825, "step": 900 }, { "epoch": 12.5, "grad_norm": 1.4661343097686768, "learning_rate": 9.527368421052631e-06, "loss": 0.0149, "step": 950 }, { "epoch": 13.157894736842104, "grad_norm": 3.28507137298584, "learning_rate": 9.474736842105265e-06, "loss": 0.0157, "step": 1000 }, { "epoch": 13.157894736842104, "eval_cer": 240.63380883232256, "eval_loss": 0.7029738426208496, "eval_runtime": 40.3273, "eval_samples_per_second": 11.233, "eval_steps_per_second": 2.827, "step": 1000 } ], "logging_steps": 50, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 132, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.376956379086848e+19, "train_batch_size": 24, "trial_name": null, "trial_params": null }