| { |
| "best_metric": 43.67604267701261, |
| "best_model_checkpoint": "./checkpoint-5000", |
| "epoch": 399.0025, |
| "eval_steps": 1000, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 5e-09, |
| "loss": 0.3934, |
| "step": 25 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1e-08, |
| "loss": 0.3953, |
| "step": 50 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 1.5e-08, |
| "loss": 0.392, |
| "step": 75 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 2e-08, |
| "loss": 0.3899, |
| "step": 100 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 2.5e-08, |
| "loss": 0.3868, |
| "step": 125 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 3e-08, |
| "loss": 0.3835, |
| "step": 150 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 3.4999999999999996e-08, |
| "loss": 0.3789, |
| "step": 175 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 4e-08, |
| "loss": 0.377, |
| "step": 200 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 4.5e-08, |
| "loss": 0.3706, |
| "step": 225 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 5e-08, |
| "loss": 0.3669, |
| "step": 250 |
| }, |
| { |
| "epoch": 21.0, |
| "learning_rate": 5.5e-08, |
| "loss": 0.3616, |
| "step": 275 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 6e-08, |
| "loss": 0.3557, |
| "step": 300 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 6.5e-08, |
| "loss": 0.3527, |
| "step": 325 |
| }, |
| { |
| "epoch": 27.0, |
| "learning_rate": 6.999999999999999e-08, |
| "loss": 0.3482, |
| "step": 350 |
| }, |
| { |
| "epoch": 29.0, |
| "learning_rate": 7.5e-08, |
| "loss": 0.3453, |
| "step": 375 |
| }, |
| { |
| "epoch": 31.0, |
| "learning_rate": 8e-08, |
| "loss": 0.3423, |
| "step": 400 |
| }, |
| { |
| "epoch": 33.0, |
| "learning_rate": 8.5e-08, |
| "loss": 0.3398, |
| "step": 425 |
| }, |
| { |
| "epoch": 35.0, |
| "learning_rate": 9e-08, |
| "loss": 0.3336, |
| "step": 450 |
| }, |
| { |
| "epoch": 37.0, |
| "learning_rate": 9.499999999999999e-08, |
| "loss": 0.3316, |
| "step": 475 |
| }, |
| { |
| "epoch": 39.0, |
| "learning_rate": 1e-07, |
| "loss": 0.3282, |
| "step": 500 |
| }, |
| { |
| "epoch": 41.0, |
| "learning_rate": 9.944444444444444e-08, |
| "loss": 0.3281, |
| "step": 525 |
| }, |
| { |
| "epoch": 43.0, |
| "learning_rate": 9.888888888888889e-08, |
| "loss": 0.3235, |
| "step": 550 |
| }, |
| { |
| "epoch": 45.0, |
| "learning_rate": 9.833333333333333e-08, |
| "loss": 0.3191, |
| "step": 575 |
| }, |
| { |
| "epoch": 47.0, |
| "learning_rate": 9.777777777777778e-08, |
| "loss": 0.3169, |
| "step": 600 |
| }, |
| { |
| "epoch": 49.0, |
| "learning_rate": 9.722222222222221e-08, |
| "loss": 0.3145, |
| "step": 625 |
| }, |
| { |
| "epoch": 51.0, |
| "learning_rate": 9.666666666666666e-08, |
| "loss": 0.3132, |
| "step": 650 |
| }, |
| { |
| "epoch": 53.0, |
| "learning_rate": 9.611111111111111e-08, |
| "loss": 0.3098, |
| "step": 675 |
| }, |
| { |
| "epoch": 55.0, |
| "learning_rate": 9.555555555555556e-08, |
| "loss": 0.3081, |
| "step": 700 |
| }, |
| { |
| "epoch": 57.0, |
| "learning_rate": 9.499999999999999e-08, |
| "loss": 0.3071, |
| "step": 725 |
| }, |
| { |
| "epoch": 59.0, |
| "learning_rate": 9.444444444444444e-08, |
| "loss": 0.3049, |
| "step": 750 |
| }, |
| { |
| "epoch": 61.0, |
| "learning_rate": 9.388888888888889e-08, |
| "loss": 0.3055, |
| "step": 775 |
| }, |
| { |
| "epoch": 63.0, |
| "learning_rate": 9.333333333333334e-08, |
| "loss": 0.3025, |
| "step": 800 |
| }, |
| { |
| "epoch": 65.0, |
| "learning_rate": 9.277777777777778e-08, |
| "loss": 0.299, |
| "step": 825 |
| }, |
| { |
| "epoch": 67.0, |
| "learning_rate": 9.222222222222222e-08, |
| "loss": 0.2982, |
| "step": 850 |
| }, |
| { |
| "epoch": 69.0, |
| "learning_rate": 9.166666666666665e-08, |
| "loss": 0.2941, |
| "step": 875 |
| }, |
| { |
| "epoch": 71.0, |
| "learning_rate": 9.11111111111111e-08, |
| "loss": 0.2956, |
| "step": 900 |
| }, |
| { |
| "epoch": 73.0, |
| "learning_rate": 9.055555555555555e-08, |
| "loss": 0.296, |
| "step": 925 |
| }, |
| { |
| "epoch": 75.0, |
| "learning_rate": 9e-08, |
| "loss": 0.2913, |
| "step": 950 |
| }, |
| { |
| "epoch": 77.0, |
| "learning_rate": 8.944444444444445e-08, |
| "loss": 0.2905, |
| "step": 975 |
| }, |
| { |
| "epoch": 79.0, |
| "learning_rate": 8.888888888888888e-08, |
| "loss": 0.2889, |
| "step": 1000 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_loss": 0.27302461862564087, |
| "eval_runtime": 254.0636, |
| "eval_samples_per_second": 3.491, |
| "eval_steps_per_second": 0.11, |
| "eval_wer": 45.024248302618815, |
| "step": 1000 |
| }, |
| { |
| "epoch": 81.0, |
| "learning_rate": 8.833333333333333e-08, |
| "loss": 0.2881, |
| "step": 1025 |
| }, |
| { |
| "epoch": 83.0, |
| "learning_rate": 8.777777777777778e-08, |
| "loss": 0.2864, |
| "step": 1050 |
| }, |
| { |
| "epoch": 85.0, |
| "learning_rate": 8.722222222222221e-08, |
| "loss": 0.2846, |
| "step": 1075 |
| }, |
| { |
| "epoch": 87.0, |
| "learning_rate": 8.666666666666666e-08, |
| "loss": 0.2832, |
| "step": 1100 |
| }, |
| { |
| "epoch": 89.0, |
| "learning_rate": 8.611111111111111e-08, |
| "loss": 0.2821, |
| "step": 1125 |
| }, |
| { |
| "epoch": 91.0, |
| "learning_rate": 8.555555555555555e-08, |
| "loss": 0.2809, |
| "step": 1150 |
| }, |
| { |
| "epoch": 93.0, |
| "learning_rate": 8.5e-08, |
| "loss": 0.2802, |
| "step": 1175 |
| }, |
| { |
| "epoch": 95.0, |
| "learning_rate": 8.444444444444444e-08, |
| "loss": 0.2809, |
| "step": 1200 |
| }, |
| { |
| "epoch": 97.0, |
| "learning_rate": 8.388888888888889e-08, |
| "loss": 0.2774, |
| "step": 1225 |
| }, |
| { |
| "epoch": 99.0, |
| "learning_rate": 8.333333333333334e-08, |
| "loss": 0.2772, |
| "step": 1250 |
| }, |
| { |
| "epoch": 101.0, |
| "learning_rate": 8.277777777777777e-08, |
| "loss": 0.2761, |
| "step": 1275 |
| }, |
| { |
| "epoch": 103.0, |
| "learning_rate": 8.222222222222221e-08, |
| "loss": 0.2739, |
| "step": 1300 |
| }, |
| { |
| "epoch": 105.0, |
| "learning_rate": 8.166666666666666e-08, |
| "loss": 0.2738, |
| "step": 1325 |
| }, |
| { |
| "epoch": 107.0, |
| "learning_rate": 8.11111111111111e-08, |
| "loss": 0.2719, |
| "step": 1350 |
| }, |
| { |
| "epoch": 109.0, |
| "learning_rate": 8.055555555555555e-08, |
| "loss": 0.2725, |
| "step": 1375 |
| }, |
| { |
| "epoch": 111.0, |
| "learning_rate": 8e-08, |
| "loss": 0.2704, |
| "step": 1400 |
| }, |
| { |
| "epoch": 113.0, |
| "learning_rate": 7.944444444444444e-08, |
| "loss": 0.2699, |
| "step": 1425 |
| }, |
| { |
| "epoch": 115.0, |
| "learning_rate": 7.888888888888889e-08, |
| "loss": 0.2698, |
| "step": 1450 |
| }, |
| { |
| "epoch": 117.0, |
| "learning_rate": 7.833333333333333e-08, |
| "loss": 0.2675, |
| "step": 1475 |
| }, |
| { |
| "epoch": 119.0, |
| "learning_rate": 7.777777777777778e-08, |
| "loss": 0.2676, |
| "step": 1500 |
| }, |
| { |
| "epoch": 121.0, |
| "learning_rate": 7.722222222222222e-08, |
| "loss": 0.2664, |
| "step": 1525 |
| }, |
| { |
| "epoch": 123.0, |
| "learning_rate": 7.666666666666666e-08, |
| "loss": 0.2638, |
| "step": 1550 |
| }, |
| { |
| "epoch": 125.0, |
| "learning_rate": 7.61111111111111e-08, |
| "loss": 0.2645, |
| "step": 1575 |
| }, |
| { |
| "epoch": 127.0, |
| "learning_rate": 7.555555555555555e-08, |
| "loss": 0.2631, |
| "step": 1600 |
| }, |
| { |
| "epoch": 129.0, |
| "learning_rate": 7.5e-08, |
| "loss": 0.263, |
| "step": 1625 |
| }, |
| { |
| "epoch": 131.0, |
| "learning_rate": 7.444444444444444e-08, |
| "loss": 0.2617, |
| "step": 1650 |
| }, |
| { |
| "epoch": 133.0, |
| "learning_rate": 7.388888888888889e-08, |
| "loss": 0.2608, |
| "step": 1675 |
| }, |
| { |
| "epoch": 135.0, |
| "learning_rate": 7.333333333333333e-08, |
| "loss": 0.2592, |
| "step": 1700 |
| }, |
| { |
| "epoch": 137.0, |
| "learning_rate": 7.277777777777778e-08, |
| "loss": 0.2582, |
| "step": 1725 |
| }, |
| { |
| "epoch": 139.0, |
| "learning_rate": 7.222222222222221e-08, |
| "loss": 0.2577, |
| "step": 1750 |
| }, |
| { |
| "epoch": 141.0, |
| "learning_rate": 7.166666666666666e-08, |
| "loss": 0.2568, |
| "step": 1775 |
| }, |
| { |
| "epoch": 143.0, |
| "learning_rate": 7.111111111111111e-08, |
| "loss": 0.2576, |
| "step": 1800 |
| }, |
| { |
| "epoch": 145.0, |
| "learning_rate": 7.055555555555556e-08, |
| "loss": 0.2562, |
| "step": 1825 |
| }, |
| { |
| "epoch": 147.0, |
| "learning_rate": 6.999999999999999e-08, |
| "loss": 0.2552, |
| "step": 1850 |
| }, |
| { |
| "epoch": 149.0, |
| "learning_rate": 6.944444444444444e-08, |
| "loss": 0.2566, |
| "step": 1875 |
| }, |
| { |
| "epoch": 151.0, |
| "learning_rate": 6.888888888888889e-08, |
| "loss": 0.253, |
| "step": 1900 |
| }, |
| { |
| "epoch": 153.0, |
| "learning_rate": 6.833333333333334e-08, |
| "loss": 0.2518, |
| "step": 1925 |
| }, |
| { |
| "epoch": 155.0, |
| "learning_rate": 6.777777777777778e-08, |
| "loss": 0.2521, |
| "step": 1950 |
| }, |
| { |
| "epoch": 157.0, |
| "learning_rate": 6.722222222222222e-08, |
| "loss": 0.2508, |
| "step": 1975 |
| }, |
| { |
| "epoch": 159.0, |
| "learning_rate": 6.666666666666665e-08, |
| "loss": 0.2527, |
| "step": 2000 |
| }, |
| { |
| "epoch": 159.0, |
| "eval_loss": 0.2593269646167755, |
| "eval_runtime": 240.0382, |
| "eval_samples_per_second": 3.695, |
| "eval_steps_per_second": 0.117, |
| "eval_wer": 44.46168768186227, |
| "step": 2000 |
| }, |
| { |
| "epoch": 161.0, |
| "learning_rate": 6.61111111111111e-08, |
| "loss": 0.2506, |
| "step": 2025 |
| }, |
| { |
| "epoch": 163.0, |
| "learning_rate": 6.555555555555555e-08, |
| "loss": 0.2492, |
| "step": 2050 |
| }, |
| { |
| "epoch": 165.0, |
| "learning_rate": 6.5e-08, |
| "loss": 0.2494, |
| "step": 2075 |
| }, |
| { |
| "epoch": 167.0, |
| "learning_rate": 6.444444444444445e-08, |
| "loss": 0.2481, |
| "step": 2100 |
| }, |
| { |
| "epoch": 169.0, |
| "learning_rate": 6.388888888888888e-08, |
| "loss": 0.2478, |
| "step": 2125 |
| }, |
| { |
| "epoch": 171.0, |
| "learning_rate": 6.333333333333333e-08, |
| "loss": 0.2472, |
| "step": 2150 |
| }, |
| { |
| "epoch": 173.0, |
| "learning_rate": 6.277777777777778e-08, |
| "loss": 0.2468, |
| "step": 2175 |
| }, |
| { |
| "epoch": 175.0, |
| "learning_rate": 6.222222222222221e-08, |
| "loss": 0.2475, |
| "step": 2200 |
| }, |
| { |
| "epoch": 177.0, |
| "learning_rate": 6.166666666666666e-08, |
| "loss": 0.2463, |
| "step": 2225 |
| }, |
| { |
| "epoch": 179.0, |
| "learning_rate": 6.111111111111111e-08, |
| "loss": 0.2447, |
| "step": 2250 |
| }, |
| { |
| "epoch": 181.0, |
| "learning_rate": 6.055555555555555e-08, |
| "loss": 0.2441, |
| "step": 2275 |
| }, |
| { |
| "epoch": 183.0, |
| "learning_rate": 6e-08, |
| "loss": 0.243, |
| "step": 2300 |
| }, |
| { |
| "epoch": 185.0, |
| "learning_rate": 5.944444444444444e-08, |
| "loss": 0.2428, |
| "step": 2325 |
| }, |
| { |
| "epoch": 187.0, |
| "learning_rate": 5.888888888888889e-08, |
| "loss": 0.2431, |
| "step": 2350 |
| }, |
| { |
| "epoch": 189.0, |
| "learning_rate": 5.833333333333333e-08, |
| "loss": 0.2417, |
| "step": 2375 |
| }, |
| { |
| "epoch": 191.0, |
| "learning_rate": 5.777777777777777e-08, |
| "loss": 0.2413, |
| "step": 2400 |
| }, |
| { |
| "epoch": 193.0, |
| "learning_rate": 5.7222222222222216e-08, |
| "loss": 0.2425, |
| "step": 2425 |
| }, |
| { |
| "epoch": 195.0, |
| "learning_rate": 5.6666666666666665e-08, |
| "loss": 0.2403, |
| "step": 2450 |
| }, |
| { |
| "epoch": 197.0, |
| "learning_rate": 5.6111111111111106e-08, |
| "loss": 0.2405, |
| "step": 2475 |
| }, |
| { |
| "epoch": 199.0, |
| "learning_rate": 5.5555555555555555e-08, |
| "loss": 0.2406, |
| "step": 2500 |
| }, |
| { |
| "epoch": 201.0, |
| "learning_rate": 5.5e-08, |
| "loss": 0.2387, |
| "step": 2525 |
| }, |
| { |
| "epoch": 203.0, |
| "learning_rate": 5.444444444444444e-08, |
| "loss": 0.2382, |
| "step": 2550 |
| }, |
| { |
| "epoch": 205.0, |
| "learning_rate": 5.3888888888888886e-08, |
| "loss": 0.2373, |
| "step": 2575 |
| }, |
| { |
| "epoch": 207.0, |
| "learning_rate": 5.333333333333333e-08, |
| "loss": 0.2377, |
| "step": 2600 |
| }, |
| { |
| "epoch": 209.0, |
| "learning_rate": 5.2777777777777776e-08, |
| "loss": 0.2379, |
| "step": 2625 |
| }, |
| { |
| "epoch": 211.0, |
| "learning_rate": 5.2222222222222224e-08, |
| "loss": 0.2356, |
| "step": 2650 |
| }, |
| { |
| "epoch": 213.0, |
| "learning_rate": 5.166666666666667e-08, |
| "loss": 0.2367, |
| "step": 2675 |
| }, |
| { |
| "epoch": 215.0, |
| "learning_rate": 5.111111111111111e-08, |
| "loss": 0.2355, |
| "step": 2700 |
| }, |
| { |
| "epoch": 217.0, |
| "learning_rate": 5.055555555555555e-08, |
| "loss": 0.2346, |
| "step": 2725 |
| }, |
| { |
| "epoch": 219.0, |
| "learning_rate": 5e-08, |
| "loss": 0.2341, |
| "step": 2750 |
| }, |
| { |
| "epoch": 221.0, |
| "learning_rate": 4.9444444444444446e-08, |
| "loss": 0.2343, |
| "step": 2775 |
| }, |
| { |
| "epoch": 223.0, |
| "learning_rate": 4.888888888888889e-08, |
| "loss": 0.2343, |
| "step": 2800 |
| }, |
| { |
| "epoch": 225.0, |
| "learning_rate": 4.833333333333333e-08, |
| "loss": 0.234, |
| "step": 2825 |
| }, |
| { |
| "epoch": 227.0, |
| "learning_rate": 4.777777777777778e-08, |
| "loss": 0.2321, |
| "step": 2850 |
| }, |
| { |
| "epoch": 229.0, |
| "learning_rate": 4.722222222222222e-08, |
| "loss": 0.2324, |
| "step": 2875 |
| }, |
| { |
| "epoch": 231.0, |
| "learning_rate": 4.666666666666667e-08, |
| "loss": 0.2322, |
| "step": 2900 |
| }, |
| { |
| "epoch": 233.0, |
| "learning_rate": 4.611111111111111e-08, |
| "loss": 0.2322, |
| "step": 2925 |
| }, |
| { |
| "epoch": 235.0, |
| "learning_rate": 4.555555555555555e-08, |
| "loss": 0.2316, |
| "step": 2950 |
| }, |
| { |
| "epoch": 237.0, |
| "learning_rate": 4.5e-08, |
| "loss": 0.2313, |
| "step": 2975 |
| }, |
| { |
| "epoch": 239.0, |
| "learning_rate": 4.444444444444444e-08, |
| "loss": 0.2306, |
| "step": 3000 |
| }, |
| { |
| "epoch": 239.0, |
| "eval_loss": 0.2538779079914093, |
| "eval_runtime": 204.9012, |
| "eval_samples_per_second": 4.329, |
| "eval_steps_per_second": 0.137, |
| "eval_wer": 44.06159068865179, |
| "step": 3000 |
| }, |
| { |
| "epoch": 241.0, |
| "learning_rate": 4.388888888888889e-08, |
| "loss": 0.2297, |
| "step": 3025 |
| }, |
| { |
| "epoch": 243.0, |
| "learning_rate": 4.333333333333333e-08, |
| "loss": 0.2298, |
| "step": 3050 |
| }, |
| { |
| "epoch": 245.0, |
| "learning_rate": 4.277777777777777e-08, |
| "loss": 0.2299, |
| "step": 3075 |
| }, |
| { |
| "epoch": 247.0, |
| "learning_rate": 4.222222222222222e-08, |
| "loss": 0.2291, |
| "step": 3100 |
| }, |
| { |
| "epoch": 249.0, |
| "learning_rate": 4.166666666666667e-08, |
| "loss": 0.229, |
| "step": 3125 |
| }, |
| { |
| "epoch": 251.0, |
| "learning_rate": 4.1111111111111104e-08, |
| "loss": 0.2289, |
| "step": 3150 |
| }, |
| { |
| "epoch": 253.0, |
| "learning_rate": 4.055555555555555e-08, |
| "loss": 0.2276, |
| "step": 3175 |
| }, |
| { |
| "epoch": 255.0, |
| "learning_rate": 4e-08, |
| "loss": 0.227, |
| "step": 3200 |
| }, |
| { |
| "epoch": 257.0, |
| "learning_rate": 3.944444444444444e-08, |
| "loss": 0.2274, |
| "step": 3225 |
| }, |
| { |
| "epoch": 259.0, |
| "learning_rate": 3.888888888888889e-08, |
| "loss": 0.2258, |
| "step": 3250 |
| }, |
| { |
| "epoch": 261.0, |
| "learning_rate": 3.833333333333333e-08, |
| "loss": 0.2274, |
| "step": 3275 |
| }, |
| { |
| "epoch": 263.0, |
| "learning_rate": 3.7777777777777774e-08, |
| "loss": 0.2259, |
| "step": 3300 |
| }, |
| { |
| "epoch": 265.0, |
| "learning_rate": 3.722222222222222e-08, |
| "loss": 0.2263, |
| "step": 3325 |
| }, |
| { |
| "epoch": 267.0, |
| "learning_rate": 3.6666666666666664e-08, |
| "loss": 0.2269, |
| "step": 3350 |
| }, |
| { |
| "epoch": 269.0, |
| "learning_rate": 3.6111111111111106e-08, |
| "loss": 0.2258, |
| "step": 3375 |
| }, |
| { |
| "epoch": 271.0, |
| "learning_rate": 3.5555555555555554e-08, |
| "loss": 0.2238, |
| "step": 3400 |
| }, |
| { |
| "epoch": 273.0, |
| "learning_rate": 3.4999999999999996e-08, |
| "loss": 0.2268, |
| "step": 3425 |
| }, |
| { |
| "epoch": 275.0, |
| "learning_rate": 3.4444444444444444e-08, |
| "loss": 0.2261, |
| "step": 3450 |
| }, |
| { |
| "epoch": 277.0, |
| "learning_rate": 3.388888888888889e-08, |
| "loss": 0.2256, |
| "step": 3475 |
| }, |
| { |
| "epoch": 279.0, |
| "learning_rate": 3.333333333333333e-08, |
| "loss": 0.2244, |
| "step": 3500 |
| }, |
| { |
| "epoch": 281.0, |
| "learning_rate": 3.2777777777777776e-08, |
| "loss": 0.2238, |
| "step": 3525 |
| }, |
| { |
| "epoch": 283.0, |
| "learning_rate": 3.2222222222222224e-08, |
| "loss": 0.224, |
| "step": 3550 |
| }, |
| { |
| "epoch": 285.0, |
| "learning_rate": 3.1666666666666666e-08, |
| "loss": 0.223, |
| "step": 3575 |
| }, |
| { |
| "epoch": 287.0, |
| "learning_rate": 3.111111111111111e-08, |
| "loss": 0.2227, |
| "step": 3600 |
| }, |
| { |
| "epoch": 289.0, |
| "learning_rate": 3.0555555555555556e-08, |
| "loss": 0.2226, |
| "step": 3625 |
| }, |
| { |
| "epoch": 291.0, |
| "learning_rate": 3e-08, |
| "loss": 0.2227, |
| "step": 3650 |
| }, |
| { |
| "epoch": 293.0, |
| "learning_rate": 2.9444444444444446e-08, |
| "loss": 0.223, |
| "step": 3675 |
| }, |
| { |
| "epoch": 295.0, |
| "learning_rate": 2.8888888888888884e-08, |
| "loss": 0.2226, |
| "step": 3700 |
| }, |
| { |
| "epoch": 297.0, |
| "learning_rate": 2.8333333333333332e-08, |
| "loss": 0.222, |
| "step": 3725 |
| }, |
| { |
| "epoch": 299.0, |
| "learning_rate": 2.7777777777777777e-08, |
| "loss": 0.223, |
| "step": 3750 |
| }, |
| { |
| "epoch": 301.0, |
| "learning_rate": 2.722222222222222e-08, |
| "loss": 0.222, |
| "step": 3775 |
| }, |
| { |
| "epoch": 303.0, |
| "learning_rate": 2.6666666666666664e-08, |
| "loss": 0.2205, |
| "step": 3800 |
| }, |
| { |
| "epoch": 305.0, |
| "learning_rate": 2.6111111111111112e-08, |
| "loss": 0.22, |
| "step": 3825 |
| }, |
| { |
| "epoch": 307.0, |
| "learning_rate": 2.5555555555555554e-08, |
| "loss": 0.2215, |
| "step": 3850 |
| }, |
| { |
| "epoch": 309.0, |
| "learning_rate": 2.5e-08, |
| "loss": 0.2208, |
| "step": 3875 |
| }, |
| { |
| "epoch": 311.0, |
| "learning_rate": 2.4444444444444444e-08, |
| "loss": 0.2205, |
| "step": 3900 |
| }, |
| { |
| "epoch": 313.0, |
| "learning_rate": 2.388888888888889e-08, |
| "loss": 0.2201, |
| "step": 3925 |
| }, |
| { |
| "epoch": 315.0, |
| "learning_rate": 2.3333333333333334e-08, |
| "loss": 0.22, |
| "step": 3950 |
| }, |
| { |
| "epoch": 317.0, |
| "learning_rate": 2.2777777777777775e-08, |
| "loss": 0.2204, |
| "step": 3975 |
| }, |
| { |
| "epoch": 319.0, |
| "learning_rate": 2.222222222222222e-08, |
| "loss": 0.2191, |
| "step": 4000 |
| }, |
| { |
| "epoch": 319.0, |
| "eval_loss": 0.2515379786491394, |
| "eval_runtime": 204.8512, |
| "eval_samples_per_second": 4.33, |
| "eval_steps_per_second": 0.137, |
| "eval_wer": 43.736663433559656, |
| "step": 4000 |
| }, |
| { |
| "epoch": 321.0, |
| "learning_rate": 2.1666666666666665e-08, |
| "loss": 0.2207, |
| "step": 4025 |
| }, |
| { |
| "epoch": 323.0, |
| "learning_rate": 2.111111111111111e-08, |
| "loss": 0.2188, |
| "step": 4050 |
| }, |
| { |
| "epoch": 325.0, |
| "learning_rate": 2.0555555555555552e-08, |
| "loss": 0.2191, |
| "step": 4075 |
| }, |
| { |
| "epoch": 327.0, |
| "learning_rate": 2e-08, |
| "loss": 0.2192, |
| "step": 4100 |
| }, |
| { |
| "epoch": 329.0, |
| "learning_rate": 1.9466666666666666e-08, |
| "loss": 0.2188, |
| "step": 4125 |
| }, |
| { |
| "epoch": 331.0, |
| "learning_rate": 1.891111111111111e-08, |
| "loss": 0.2185, |
| "step": 4150 |
| }, |
| { |
| "epoch": 333.0, |
| "learning_rate": 1.8355555555555556e-08, |
| "loss": 0.2201, |
| "step": 4175 |
| }, |
| { |
| "epoch": 335.0, |
| "learning_rate": 1.7799999999999997e-08, |
| "loss": 0.2177, |
| "step": 4200 |
| }, |
| { |
| "epoch": 337.0, |
| "learning_rate": 1.7244444444444446e-08, |
| "loss": 0.2186, |
| "step": 4225 |
| }, |
| { |
| "epoch": 339.0, |
| "learning_rate": 1.6688888888888887e-08, |
| "loss": 0.2176, |
| "step": 4250 |
| }, |
| { |
| "epoch": 341.0, |
| "learning_rate": 1.6133333333333332e-08, |
| "loss": 0.2184, |
| "step": 4275 |
| }, |
| { |
| "epoch": 343.0, |
| "learning_rate": 1.5577777777777777e-08, |
| "loss": 0.2189, |
| "step": 4300 |
| }, |
| { |
| "epoch": 345.0, |
| "learning_rate": 1.5022222222222222e-08, |
| "loss": 0.2179, |
| "step": 4325 |
| }, |
| { |
| "epoch": 347.0, |
| "learning_rate": 1.4466666666666666e-08, |
| "loss": 0.2173, |
| "step": 4350 |
| }, |
| { |
| "epoch": 349.0, |
| "learning_rate": 1.3911111111111109e-08, |
| "loss": 0.2169, |
| "step": 4375 |
| }, |
| { |
| "epoch": 351.0, |
| "learning_rate": 1.3355555555555555e-08, |
| "loss": 0.2172, |
| "step": 4400 |
| }, |
| { |
| "epoch": 353.0, |
| "learning_rate": 1.28e-08, |
| "loss": 0.2168, |
| "step": 4425 |
| }, |
| { |
| "epoch": 355.0, |
| "learning_rate": 1.2244444444444444e-08, |
| "loss": 0.217, |
| "step": 4450 |
| }, |
| { |
| "epoch": 357.0, |
| "learning_rate": 1.1688888888888889e-08, |
| "loss": 0.2185, |
| "step": 4475 |
| }, |
| { |
| "epoch": 359.0, |
| "learning_rate": 1.1133333333333334e-08, |
| "loss": 0.2172, |
| "step": 4500 |
| }, |
| { |
| "epoch": 361.0, |
| "learning_rate": 1.0577777777777777e-08, |
| "loss": 0.2173, |
| "step": 4525 |
| }, |
| { |
| "epoch": 363.0, |
| "learning_rate": 1.0022222222222222e-08, |
| "loss": 0.2176, |
| "step": 4550 |
| }, |
| { |
| "epoch": 365.0, |
| "learning_rate": 9.466666666666665e-09, |
| "loss": 0.2158, |
| "step": 4575 |
| }, |
| { |
| "epoch": 367.0, |
| "learning_rate": 8.91111111111111e-09, |
| "loss": 0.2158, |
| "step": 4600 |
| }, |
| { |
| "epoch": 369.0, |
| "learning_rate": 8.355555555555555e-09, |
| "loss": 0.2167, |
| "step": 4625 |
| }, |
| { |
| "epoch": 371.0, |
| "learning_rate": 7.8e-09, |
| "loss": 0.2172, |
| "step": 4650 |
| }, |
| { |
| "epoch": 373.0, |
| "learning_rate": 7.2444444444444445e-09, |
| "loss": 0.2161, |
| "step": 4675 |
| }, |
| { |
| "epoch": 375.0, |
| "learning_rate": 6.688888888888889e-09, |
| "loss": 0.2155, |
| "step": 4700 |
| }, |
| { |
| "epoch": 377.0, |
| "learning_rate": 6.133333333333333e-09, |
| "loss": 0.2166, |
| "step": 4725 |
| }, |
| { |
| "epoch": 379.0, |
| "learning_rate": 5.577777777777778e-09, |
| "loss": 0.2168, |
| "step": 4750 |
| }, |
| { |
| "epoch": 381.0, |
| "learning_rate": 5.022222222222222e-09, |
| "loss": 0.2172, |
| "step": 4775 |
| }, |
| { |
| "epoch": 383.0, |
| "learning_rate": 4.466666666666666e-09, |
| "loss": 0.2149, |
| "step": 4800 |
| }, |
| { |
| "epoch": 385.0, |
| "learning_rate": 3.911111111111111e-09, |
| "loss": 0.2161, |
| "step": 4825 |
| }, |
| { |
| "epoch": 387.0, |
| "learning_rate": 3.3555555555555553e-09, |
| "loss": 0.2164, |
| "step": 4850 |
| }, |
| { |
| "epoch": 389.0, |
| "learning_rate": 2.8e-09, |
| "loss": 0.2162, |
| "step": 4875 |
| }, |
| { |
| "epoch": 391.0, |
| "learning_rate": 2.2444444444444444e-09, |
| "loss": 0.2157, |
| "step": 4900 |
| }, |
| { |
| "epoch": 393.0, |
| "learning_rate": 1.6888888888888886e-09, |
| "loss": 0.2178, |
| "step": 4925 |
| }, |
| { |
| "epoch": 395.0, |
| "learning_rate": 1.1333333333333333e-09, |
| "loss": 0.2158, |
| "step": 4950 |
| }, |
| { |
| "epoch": 397.0, |
| "learning_rate": 5.777777777777777e-10, |
| "loss": 0.2157, |
| "step": 4975 |
| }, |
| { |
| "epoch": 399.0, |
| "learning_rate": 2.2222222222222222e-11, |
| "loss": 0.2164, |
| "step": 5000 |
| }, |
| { |
| "epoch": 399.0, |
| "eval_loss": 0.25085458159446716, |
| "eval_runtime": 204.438, |
| "eval_samples_per_second": 4.339, |
| "eval_steps_per_second": 0.137, |
| "eval_wer": 43.67604267701261, |
| "step": 5000 |
| }, |
| { |
| "epoch": 399.0, |
| "step": 5000, |
| "total_flos": 2.0184430804992e+19, |
| "train_loss": 0.2566693991661072, |
| "train_runtime": 24404.6676, |
| "train_samples_per_second": 13.112, |
| "train_steps_per_second": 0.205 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "total_flos": 2.0184430804992e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|