| { | |
| "best_metric": 31.733787724874663, | |
| "best_model_checkpoint": "./LMT2\\checkpoint-4000", | |
| "epoch": 0.062114694783918505, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.2000000000000006e-07, | |
| "loss": 4.0064, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.200000000000001e-07, | |
| "loss": 3.2941, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.42e-06, | |
| "loss": 2.3704, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.9200000000000003e-06, | |
| "loss": 1.3666, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.42e-06, | |
| "loss": 0.9502, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.92e-06, | |
| "loss": 0.8216, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.4200000000000007e-06, | |
| "loss": 0.7506, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.920000000000001e-06, | |
| "loss": 0.7069, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.42e-06, | |
| "loss": 0.6066, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.92e-06, | |
| "loss": 0.5391, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.420000000000001e-06, | |
| "loss": 0.4676, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.92e-06, | |
| "loss": 0.3422, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.42e-06, | |
| "loss": 0.3114, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.92e-06, | |
| "loss": 0.2746, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.420000000000001e-06, | |
| "loss": 0.2893, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.92e-06, | |
| "loss": 0.2989, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.42e-06, | |
| "loss": 0.2677, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.920000000000001e-06, | |
| "loss": 0.3219, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.42e-06, | |
| "loss": 0.3024, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.920000000000002e-06, | |
| "loss": 0.2838, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.940000000000001e-06, | |
| "loss": 0.2785, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.86857142857143e-06, | |
| "loss": 0.2667, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.797142857142858e-06, | |
| "loss": 0.296, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.725714285714287e-06, | |
| "loss": 0.2913, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.654285714285716e-06, | |
| "loss": 0.3022, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.582857142857143e-06, | |
| "loss": 0.2617, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.511428571428572e-06, | |
| "loss": 0.2747, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.440000000000001e-06, | |
| "loss": 0.289, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.368571428571428e-06, | |
| "loss": 0.2664, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.297142857142857e-06, | |
| "loss": 0.2704, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.225714285714286e-06, | |
| "loss": 0.2659, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.154285714285715e-06, | |
| "loss": 0.2679, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.082857142857143e-06, | |
| "loss": 0.2833, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.011428571428572e-06, | |
| "loss": 0.2651, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.94e-06, | |
| "loss": 0.2936, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.86857142857143e-06, | |
| "loss": 0.2829, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.797142857142857e-06, | |
| "loss": 0.2706, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.725714285714286e-06, | |
| "loss": 0.2938, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.654285714285715e-06, | |
| "loss": 0.2975, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.582857142857144e-06, | |
| "loss": 0.254, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 0.3774094581604004, | |
| "eval_runtime": 6578.4668, | |
| "eval_samples_per_second": 2.489, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 38.74037421764918, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.511428571428571e-06, | |
| "loss": 0.276, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.44e-06, | |
| "loss": 0.2429, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.36857142857143e-06, | |
| "loss": 0.2897, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.297142857142859e-06, | |
| "loss": 0.25, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.225714285714288e-06, | |
| "loss": 0.2531, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.154285714285715e-06, | |
| "loss": 0.2671, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.082857142857144e-06, | |
| "loss": 0.2584, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.011428571428573e-06, | |
| "loss": 0.2791, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.94e-06, | |
| "loss": 0.2686, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.86857142857143e-06, | |
| "loss": 0.2511, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.797142857142858e-06, | |
| "loss": 0.2821, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.725714285714286e-06, | |
| "loss": 0.2645, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.654285714285715e-06, | |
| "loss": 0.2608, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.5828571428571444e-06, | |
| "loss": 0.2745, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.511428571428572e-06, | |
| "loss": 0.2865, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.440000000000001e-06, | |
| "loss": 0.2767, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.36857142857143e-06, | |
| "loss": 0.2775, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.297142857142858e-06, | |
| "loss": 0.2997, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.225714285714286e-06, | |
| "loss": 0.2723, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.154285714285715e-06, | |
| "loss": 0.2512, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.082857142857143e-06, | |
| "loss": 0.2803, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.011428571428572e-06, | |
| "loss": 0.2905, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.9400000000000005e-06, | |
| "loss": 0.2778, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6.868571428571429e-06, | |
| "loss": 0.2503, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.797142857142858e-06, | |
| "loss": 0.2909, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.725714285714287e-06, | |
| "loss": 0.2437, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.654285714285716e-06, | |
| "loss": 0.2407, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.582857142857143e-06, | |
| "loss": 0.2376, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.511428571428572e-06, | |
| "loss": 0.2798, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.440000000000001e-06, | |
| "loss": 0.2616, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.368571428571429e-06, | |
| "loss": 0.2834, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.297142857142857e-06, | |
| "loss": 0.26, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.225714285714286e-06, | |
| "loss": 0.2766, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.1542857142857145e-06, | |
| "loss": 0.2877, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.0828571428571435e-06, | |
| "loss": 0.2387, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.011428571428572e-06, | |
| "loss": 0.2561, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.94e-06, | |
| "loss": 0.2661, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.868571428571429e-06, | |
| "loss": 0.2718, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.797142857142858e-06, | |
| "loss": 0.2819, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.725714285714287e-06, | |
| "loss": 0.2612, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 0.35927361249923706, | |
| "eval_runtime": 5393.364, | |
| "eval_samples_per_second": 3.036, | |
| "eval_steps_per_second": 0.38, | |
| "eval_wer": 34.29825998623718, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.654285714285714e-06, | |
| "loss": 0.2286, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.582857142857143e-06, | |
| "loss": 0.2463, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.511428571428572e-06, | |
| "loss": 0.268, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.4400000000000004e-06, | |
| "loss": 0.2583, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.368571428571429e-06, | |
| "loss": 0.2887, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.297142857142858e-06, | |
| "loss": 0.2491, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.225714285714286e-06, | |
| "loss": 0.235, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.154285714285715e-06, | |
| "loss": 0.2838, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.082857142857144e-06, | |
| "loss": 0.253, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.011428571428571e-06, | |
| "loss": 0.2565, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.94e-06, | |
| "loss": 0.2459, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.868571428571429e-06, | |
| "loss": 0.2722, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.797142857142857e-06, | |
| "loss": 0.2533, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.725714285714286e-06, | |
| "loss": 0.2543, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.6542857142857145e-06, | |
| "loss": 0.255, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.5828571428571435e-06, | |
| "loss": 0.2366, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.511428571428572e-06, | |
| "loss": 0.2674, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.440000000000001e-06, | |
| "loss": 0.2538, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.368571428571429e-06, | |
| "loss": 0.2612, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.297142857142858e-06, | |
| "loss": 0.2386, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.225714285714286e-06, | |
| "loss": 0.2496, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.154285714285714e-06, | |
| "loss": 0.273, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.082857142857143e-06, | |
| "loss": 0.2424, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.011428571428571e-06, | |
| "loss": 0.2719, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.94e-06, | |
| "loss": 0.2584, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.8685714285714286e-06, | |
| "loss": 0.27, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.7971428571428576e-06, | |
| "loss": 0.2698, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.7257142857142857e-06, | |
| "loss": 0.2584, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.6542857142857148e-06, | |
| "loss": 0.2359, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.582857142857143e-06, | |
| "loss": 0.2294, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.511428571428572e-06, | |
| "loss": 0.267, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.44e-06, | |
| "loss": 0.233, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.3685714285714287e-06, | |
| "loss": 0.256, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.2971428571428577e-06, | |
| "loss": 0.2619, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.225714285714286e-06, | |
| "loss": 0.2632, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.154285714285715e-06, | |
| "loss": 0.2671, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.082857142857143e-06, | |
| "loss": 0.2621, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.0114285714285716e-06, | |
| "loss": 0.2423, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9400000000000002e-06, | |
| "loss": 0.2379, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.868571428571429e-06, | |
| "loss": 0.237, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 0.34778067469596863, | |
| "eval_runtime": 4475.7535, | |
| "eval_samples_per_second": 3.658, | |
| "eval_steps_per_second": 0.457, | |
| "eval_wer": 32.0188747255628, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.797142857142857e-06, | |
| "loss": 0.2485, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.725714285714286e-06, | |
| "loss": 0.2483, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.654285714285714e-06, | |
| "loss": 0.2453, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.582857142857143e-06, | |
| "loss": 0.2582, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.5114285714285718e-06, | |
| "loss": 0.2556, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4400000000000004e-06, | |
| "loss": 0.2425, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.3685714285714285e-06, | |
| "loss": 0.2826, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.297142857142857e-06, | |
| "loss": 0.2603, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.2257142857142857e-06, | |
| "loss": 0.2592, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.1542857142857147e-06, | |
| "loss": 0.2603, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.0828571428571433e-06, | |
| "loss": 0.2402, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.0114285714285715e-06, | |
| "loss": 0.2401, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.94e-06, | |
| "loss": 0.2606, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.8685714285714289e-06, | |
| "loss": 0.2367, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.7971428571428572e-06, | |
| "loss": 0.2352, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.7257142857142858e-06, | |
| "loss": 0.2322, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.6542857142857144e-06, | |
| "loss": 0.2629, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.582857142857143e-06, | |
| "loss": 0.2329, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.5114285714285714e-06, | |
| "loss": 0.2731, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.44e-06, | |
| "loss": 0.2523, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.3685714285714286e-06, | |
| "loss": 0.2294, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2971428571428574e-06, | |
| "loss": 0.2583, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2257142857142857e-06, | |
| "loss": 0.2407, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.1542857142857143e-06, | |
| "loss": 0.2425, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.082857142857143e-06, | |
| "loss": 0.2302, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0114285714285715e-06, | |
| "loss": 0.2302, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.400000000000001e-07, | |
| "loss": 0.2472, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 8.685714285714286e-07, | |
| "loss": 0.261, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.971428571428572e-07, | |
| "loss": 0.2217, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.257142857142857e-07, | |
| "loss": 0.2451, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.542857142857144e-07, | |
| "loss": 0.2395, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.82857142857143e-07, | |
| "loss": 0.2501, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.114285714285714e-07, | |
| "loss": 0.2365, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.4e-07, | |
| "loss": 0.2612, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.685714285714286e-07, | |
| "loss": 0.2492, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9714285714285715e-07, | |
| "loss": 0.2571, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.2571428571428574e-07, | |
| "loss": 0.2442, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.542857142857143e-07, | |
| "loss": 0.2513, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 8.285714285714285e-08, | |
| "loss": 0.2402, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.142857142857143e-08, | |
| "loss": 0.2618, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.34070348739624023, | |
| "eval_runtime": 4483.294, | |
| "eval_samples_per_second": 3.652, | |
| "eval_steps_per_second": 0.457, | |
| "eval_wer": 31.733787724874663, | |
| "step": 4000 | |
| } | |
| ], | |
| "max_steps": 4000, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.846946562048e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |