| { | |
| "best_metric": 2.027944564819336, | |
| "best_model_checkpoint": "./checkpoint-700", | |
| "epoch": 100.0, | |
| "global_step": 700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.2499999999999994e-08, | |
| "loss": 3.5252, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.275e-07, | |
| "loss": 3.509, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.025e-07, | |
| "loss": 3.481, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.775e-07, | |
| "loss": 3.4536, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.86875e-07, | |
| "loss": 3.3407, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 2.6812500000000003e-07, | |
| "loss": 3.1837, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2.49375e-07, | |
| "loss": 3.1471, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 2.3062500000000002e-07, | |
| "loss": 3.0484, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "learning_rate": 2.11875e-07, | |
| "loss": 2.9713, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "learning_rate": 1.93125e-07, | |
| "loss": 2.9393, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "eval_loss": 2.902493715286255, | |
| "eval_runtime": 131.7731, | |
| "eval_samples_per_second": 3.885, | |
| "eval_steps_per_second": 0.121, | |
| "eval_wer": 369.26452784503635, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 15.71, | |
| "learning_rate": 1.74375e-07, | |
| "loss": 2.8874, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 1.5562500000000002e-07, | |
| "loss": 2.8192, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 18.57, | |
| "learning_rate": 1.3687499999999999e-07, | |
| "loss": 2.8025, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.18125e-07, | |
| "loss": 2.7738, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 21.43, | |
| "learning_rate": 9.937499999999999e-08, | |
| "loss": 2.7684, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 8.0625e-08, | |
| "loss": 2.7294, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "learning_rate": 6.187499999999999e-08, | |
| "loss": 2.7318, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "learning_rate": 4.3125e-08, | |
| "loss": 2.7239, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "learning_rate": 2.4374999999999998e-08, | |
| "loss": 2.6988, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 5.625e-09, | |
| "loss": 2.7124, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "eval_loss": 2.716507911682129, | |
| "eval_runtime": 138.682, | |
| "eval_samples_per_second": 3.692, | |
| "eval_steps_per_second": 0.115, | |
| "eval_wer": 407.92221549636804, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.9108695652173912e-07, | |
| "loss": 2.6955, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 31.43, | |
| "learning_rate": 1.8456521739130434e-07, | |
| "loss": 2.6743, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "learning_rate": 1.7804347826086957e-07, | |
| "loss": 2.6255, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 1.715217391304348e-07, | |
| "loss": 2.6113, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 35.71, | |
| "learning_rate": 1.65e-07, | |
| "loss": 2.5752, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 37.14, | |
| "learning_rate": 1.584782608695652e-07, | |
| "loss": 2.5542, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 38.57, | |
| "learning_rate": 1.5195652173913042e-07, | |
| "loss": 2.514, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 1.4543478260869565e-07, | |
| "loss": 2.5026, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 41.43, | |
| "learning_rate": 1.3891304347826087e-07, | |
| "loss": 2.4645, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "learning_rate": 1.323913043478261e-07, | |
| "loss": 2.4773, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "eval_loss": 2.4649124145507812, | |
| "eval_runtime": 248.4635, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 0.064, | |
| "eval_wer": 449.50060532687655, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 44.29, | |
| "learning_rate": 1.258695652173913e-07, | |
| "loss": 2.428, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 45.71, | |
| "learning_rate": 1.193478260869565e-07, | |
| "loss": 2.4309, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 47.14, | |
| "learning_rate": 1.1282608695652174e-07, | |
| "loss": 2.395, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 48.57, | |
| "learning_rate": 1.0630434782608696e-07, | |
| "loss": 2.3836, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 9.978260869565218e-08, | |
| "loss": 2.3801, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 51.43, | |
| "learning_rate": 9.326086956521739e-08, | |
| "loss": 2.3529, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 52.86, | |
| "learning_rate": 8.673913043478261e-08, | |
| "loss": 2.3635, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 54.29, | |
| "learning_rate": 8.021739130434783e-08, | |
| "loss": 2.3338, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 55.71, | |
| "learning_rate": 7.369565217391304e-08, | |
| "loss": 2.3501, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "learning_rate": 6.717391304347826e-08, | |
| "loss": 2.3142, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "eval_loss": 2.346592664718628, | |
| "eval_runtime": 217.0162, | |
| "eval_samples_per_second": 2.359, | |
| "eval_steps_per_second": 0.074, | |
| "eval_wer": 473.600181598063, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 58.57, | |
| "learning_rate": 6.065217391304348e-08, | |
| "loss": 2.3374, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 5.4130434782608695e-08, | |
| "loss": 2.2977, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 61.43, | |
| "learning_rate": 4.760869565217391e-08, | |
| "loss": 2.3, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 62.86, | |
| "learning_rate": 4.1086956521739124e-08, | |
| "loss": 2.2979, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 64.29, | |
| "learning_rate": 3.4565217391304345e-08, | |
| "loss": 2.2983, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 65.71, | |
| "learning_rate": 2.8043478260869563e-08, | |
| "loss": 2.3046, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 67.14, | |
| "learning_rate": 2.152173913043478e-08, | |
| "loss": 2.2772, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 68.57, | |
| "learning_rate": 1.5e-08, | |
| "loss": 2.2866, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 8.478260869565216e-09, | |
| "loss": 2.2822, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 71.43, | |
| "learning_rate": 1.9565217391304347e-09, | |
| "loss": 2.2942, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 71.43, | |
| "eval_loss": 2.310106039047241, | |
| "eval_runtime": 207.4467, | |
| "eval_samples_per_second": 2.468, | |
| "eval_steps_per_second": 0.077, | |
| "eval_wer": 485.9866828087167, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 72.86, | |
| "learning_rate": 2.895e-07, | |
| "loss": 2.2567, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 74.29, | |
| "learning_rate": 2.745e-07, | |
| "loss": 2.2426, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 75.71, | |
| "learning_rate": 2.5949999999999996e-07, | |
| "loss": 2.2056, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 77.14, | |
| "learning_rate": 2.445e-07, | |
| "loss": 2.2007, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 78.57, | |
| "learning_rate": 2.2949999999999998e-07, | |
| "loss": 2.1627, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 2.1449999999999997e-07, | |
| "loss": 2.1338, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 81.43, | |
| "learning_rate": 1.995e-07, | |
| "loss": 2.1042, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 82.86, | |
| "learning_rate": 1.845e-07, | |
| "loss": 2.1054, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 84.29, | |
| "learning_rate": 1.6949999999999998e-07, | |
| "loss": 2.0796, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 85.71, | |
| "learning_rate": 1.545e-07, | |
| "loss": 2.0644, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 85.71, | |
| "eval_loss": 2.092550039291382, | |
| "eval_runtime": 188.6334, | |
| "eval_samples_per_second": 2.714, | |
| "eval_steps_per_second": 0.085, | |
| "eval_wer": 491.48002421307507, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 87.14, | |
| "learning_rate": 1.395e-07, | |
| "loss": 2.0308, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 88.57, | |
| "learning_rate": 1.2449999999999998e-07, | |
| "loss": 2.0388, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 1.095e-07, | |
| "loss": 2.0281, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 91.43, | |
| "learning_rate": 9.449999999999999e-08, | |
| "loss": 2.0146, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 92.86, | |
| "learning_rate": 7.95e-08, | |
| "loss": 2.0052, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 94.29, | |
| "learning_rate": 6.45e-08, | |
| "loss": 1.998, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 95.71, | |
| "learning_rate": 4.95e-08, | |
| "loss": 1.9969, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 97.14, | |
| "learning_rate": 3.45e-08, | |
| "loss": 1.9842, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 98.57, | |
| "learning_rate": 1.95e-08, | |
| "loss": 1.9933, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 4.5e-09, | |
| "loss": 1.9799, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 2.027944564819336, | |
| "eval_runtime": 284.3457, | |
| "eval_samples_per_second": 1.801, | |
| "eval_steps_per_second": 0.056, | |
| "eval_wer": 497.22306295399517, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 700, | |
| "total_flos": 1.061073948672e+18, | |
| "train_loss": 0.5946499960763114, | |
| "train_runtime": 764.6061, | |
| "train_samples_per_second": 58.592, | |
| "train_steps_per_second": 0.916 | |
| } | |
| ], | |
| "max_steps": 700, | |
| "num_train_epochs": 100, | |
| "total_flos": 1.061073948672e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |