| { |
| "best_metric": 0.4557079945799458, |
| "best_model_checkpoint": "whisper-tiny-danish-2/checkpoint-12000", |
| "epoch": 3.00555, |
| "eval_steps": 1000, |
| "global_step": 12000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001, |
| "grad_norm": 22.681325912475586, |
| "learning_rate": 2.35e-06, |
| "loss": 4.0513, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 7.509825706481934, |
| "learning_rate": 4.85e-06, |
| "loss": 2.4332, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 7.44932746887207, |
| "learning_rate": 7.35e-06, |
| "loss": 1.8256, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 6.986607551574707, |
| "learning_rate": 9.85e-06, |
| "loss": 1.6391, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 7.076284408569336, |
| "learning_rate": 1.235e-05, |
| "loss": 1.5296, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 6.501603603363037, |
| "learning_rate": 1.485e-05, |
| "loss": 1.4288, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 6.999312400817871, |
| "learning_rate": 1.7349999999999998e-05, |
| "loss": 1.3068, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 6.261101722717285, |
| "learning_rate": 1.985e-05, |
| "loss": 1.2373, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 6.493436813354492, |
| "learning_rate": 2.235e-05, |
| "loss": 1.2242, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.082971096038818, |
| "learning_rate": 2.485e-05, |
| "loss": 1.1575, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 6.2460174560546875, |
| "learning_rate": 2.7350000000000004e-05, |
| "loss": 1.093, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 5.896036148071289, |
| "learning_rate": 2.985e-05, |
| "loss": 1.0961, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 5.539938926696777, |
| "learning_rate": 3.235e-05, |
| "loss": 1.0036, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 5.470737934112549, |
| "learning_rate": 3.485e-05, |
| "loss": 0.974, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 5.766882419586182, |
| "learning_rate": 3.735e-05, |
| "loss": 0.9315, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 5.046978950500488, |
| "learning_rate": 3.9850000000000006e-05, |
| "loss": 0.9166, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 5.6074957847595215, |
| "learning_rate": 4.235e-05, |
| "loss": 0.9883, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 5.528851509094238, |
| "learning_rate": 4.4850000000000006e-05, |
| "loss": 0.9578, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 5.047084331512451, |
| "learning_rate": 4.735e-05, |
| "loss": 0.8865, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.150805473327637, |
| "learning_rate": 4.9850000000000006e-05, |
| "loss": 0.8565, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 1.2942239046096802, |
| "eval_runtime": 216.6252, |
| "eval_samples_per_second": 4.653, |
| "eval_steps_per_second": 0.148, |
| "eval_wer": 0.6471036585365854, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 4.584412097930908, |
| "learning_rate": 4.9952040816326534e-05, |
| "loss": 0.8296, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 5.257778167724609, |
| "learning_rate": 4.9901020408163266e-05, |
| "loss": 0.802, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 5.010299205780029, |
| "learning_rate": 4.9850000000000006e-05, |
| "loss": 0.7911, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 5.242851734161377, |
| "learning_rate": 4.979897959183674e-05, |
| "loss": 0.8318, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 5.349483489990234, |
| "learning_rate": 4.974795918367347e-05, |
| "loss": 0.8327, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 6.035111427307129, |
| "learning_rate": 4.96969387755102e-05, |
| "loss": 0.8355, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 5.254024982452393, |
| "learning_rate": 4.964591836734694e-05, |
| "loss": 0.8851, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 6.512954235076904, |
| "learning_rate": 4.959489795918368e-05, |
| "loss": 0.8718, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 4.468319416046143, |
| "learning_rate": 4.954387755102041e-05, |
| "loss": 0.815, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.5422492027282715, |
| "learning_rate": 4.9492857142857146e-05, |
| "loss": 0.8086, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 4.861804008483887, |
| "learning_rate": 4.944183673469388e-05, |
| "loss": 0.7848, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 5.152141571044922, |
| "learning_rate": 4.939081632653062e-05, |
| "loss": 0.7382, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 4.768085479736328, |
| "learning_rate": 4.933979591836735e-05, |
| "loss": 0.7786, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 4.68101167678833, |
| "learning_rate": 4.928877551020408e-05, |
| "loss": 0.733, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 4.635968208312988, |
| "learning_rate": 4.9237755102040814e-05, |
| "loss": 0.7032, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 5.221863269805908, |
| "learning_rate": 4.918673469387755e-05, |
| "loss": 0.703, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 5.017695426940918, |
| "learning_rate": 4.913571428571429e-05, |
| "loss": 0.6421, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 4.761963367462158, |
| "learning_rate": 4.9084693877551025e-05, |
| "loss": 0.703, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 4.619095325469971, |
| "learning_rate": 4.903367346938776e-05, |
| "loss": 0.7374, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 4.652743816375732, |
| "learning_rate": 4.8982653061224496e-05, |
| "loss": 0.7434, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.0815221071243286, |
| "eval_runtime": 235.89, |
| "eval_samples_per_second": 4.273, |
| "eval_steps_per_second": 0.136, |
| "eval_wer": 0.5818512872628726, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 5.1539177894592285, |
| "learning_rate": 4.893163265306123e-05, |
| "loss": 0.7661, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 4.277270793914795, |
| "learning_rate": 4.888061224489796e-05, |
| "loss": 0.6908, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 4.588935852050781, |
| "learning_rate": 4.882959183673469e-05, |
| "loss": 0.6411, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 4.606882572174072, |
| "learning_rate": 4.877857142857143e-05, |
| "loss": 0.6492, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 4.498349189758301, |
| "learning_rate": 4.8727551020408165e-05, |
| "loss": 0.6592, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 4.668141841888428, |
| "learning_rate": 4.8676530612244904e-05, |
| "loss": 0.6865, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 4.357521057128906, |
| "learning_rate": 4.8625510204081636e-05, |
| "loss": 0.676, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 4.414557933807373, |
| "learning_rate": 4.857448979591837e-05, |
| "loss": 0.6496, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 4.414867877960205, |
| "learning_rate": 4.852346938775511e-05, |
| "loss": 0.6679, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 4.020086765289307, |
| "learning_rate": 4.847244897959184e-05, |
| "loss": 0.6364, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 5.012465476989746, |
| "learning_rate": 4.842142857142857e-05, |
| "loss": 0.6624, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 4.224608421325684, |
| "learning_rate": 4.8370408163265305e-05, |
| "loss": 0.6609, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 4.476141929626465, |
| "learning_rate": 4.8319387755102044e-05, |
| "loss": 0.6402, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 4.985313892364502, |
| "learning_rate": 4.8268367346938776e-05, |
| "loss": 0.6706, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 4.675138473510742, |
| "learning_rate": 4.8217346938775515e-05, |
| "loss": 0.7041, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 5.3597846031188965, |
| "learning_rate": 4.816632653061225e-05, |
| "loss": 0.6993, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 4.724060535430908, |
| "learning_rate": 4.811530612244898e-05, |
| "loss": 0.6681, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 4.375901222229004, |
| "learning_rate": 4.806428571428572e-05, |
| "loss": 0.6205, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 4.6154279708862305, |
| "learning_rate": 4.801326530612245e-05, |
| "loss": 0.5887, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.766662120819092, |
| "learning_rate": 4.7962244897959184e-05, |
| "loss": 0.6056, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 1.0121757984161377, |
| "eval_runtime": 218.7463, |
| "eval_samples_per_second": 4.608, |
| "eval_steps_per_second": 0.146, |
| "eval_wer": 0.5472984417344173, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 4.1083550453186035, |
| "learning_rate": 4.791122448979592e-05, |
| "loss": 0.5938, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 4.648180961608887, |
| "learning_rate": 4.7860204081632655e-05, |
| "loss": 0.6083, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 4.021754264831543, |
| "learning_rate": 4.780918367346939e-05, |
| "loss": 0.6132, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.82786226272583, |
| "learning_rate": 4.775816326530613e-05, |
| "loss": 0.6348, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 4.385377407073975, |
| "learning_rate": 4.770714285714286e-05, |
| "loss": 0.602, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 5.215423107147217, |
| "learning_rate": 4.76561224489796e-05, |
| "loss": 0.6135, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 4.4256486892700195, |
| "learning_rate": 4.760510204081633e-05, |
| "loss": 0.6353, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 4.338476181030273, |
| "learning_rate": 4.755408163265306e-05, |
| "loss": 0.6114, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 4.410732269287109, |
| "learning_rate": 4.7503061224489795e-05, |
| "loss": 0.612, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 4.397231578826904, |
| "learning_rate": 4.7452040816326534e-05, |
| "loss": 0.5984, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 4.233676433563232, |
| "learning_rate": 4.740102040816327e-05, |
| "loss": 0.6419, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 4.81524658203125, |
| "learning_rate": 4.735e-05, |
| "loss": 0.6158, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 4.236979961395264, |
| "learning_rate": 4.729897959183674e-05, |
| "loss": 0.6041, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 4.120030403137207, |
| "learning_rate": 4.724795918367347e-05, |
| "loss": 0.6378, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 3.8795013427734375, |
| "learning_rate": 4.719693877551021e-05, |
| "loss": 0.6577, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 4.2965087890625, |
| "learning_rate": 4.714591836734694e-05, |
| "loss": 0.6341, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 4.946217060089111, |
| "learning_rate": 4.7094897959183674e-05, |
| "loss": 0.6217, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 4.450223445892334, |
| "learning_rate": 4.7043877551020407e-05, |
| "loss": 0.6383, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.00085, |
| "grad_norm": 5.3936591148376465, |
| "learning_rate": 4.6992857142857146e-05, |
| "loss": 0.6186, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.00185, |
| "grad_norm": 4.123908042907715, |
| "learning_rate": 4.694183673469388e-05, |
| "loss": 0.6141, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.00185, |
| "eval_loss": 0.9478016495704651, |
| "eval_runtime": 215.0961, |
| "eval_samples_per_second": 4.686, |
| "eval_steps_per_second": 0.149, |
| "eval_wer": 0.5331554878048781, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.00285, |
| "grad_norm": 4.490115642547607, |
| "learning_rate": 4.689081632653061e-05, |
| "loss": 0.6373, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.00385, |
| "grad_norm": 4.364250659942627, |
| "learning_rate": 4.683979591836735e-05, |
| "loss": 0.641, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.00485, |
| "grad_norm": 4.241596221923828, |
| "learning_rate": 4.678877551020409e-05, |
| "loss": 0.6284, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.00585, |
| "grad_norm": 4.362817764282227, |
| "learning_rate": 4.673775510204082e-05, |
| "loss": 0.6346, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.00685, |
| "grad_norm": 3.9235153198242188, |
| "learning_rate": 4.668673469387755e-05, |
| "loss": 0.6007, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.00785, |
| "grad_norm": 3.7977848052978516, |
| "learning_rate": 4.6635714285714286e-05, |
| "loss": 0.5891, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.00885, |
| "grad_norm": 4.0569305419921875, |
| "learning_rate": 4.6584693877551025e-05, |
| "loss": 0.6123, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.00985, |
| "grad_norm": 4.0435872077941895, |
| "learning_rate": 4.653367346938776e-05, |
| "loss": 0.5948, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.01085, |
| "grad_norm": 3.7464935779571533, |
| "learning_rate": 4.648265306122449e-05, |
| "loss": 0.5665, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.01185, |
| "grad_norm": 3.99239182472229, |
| "learning_rate": 4.643163265306122e-05, |
| "loss": 0.6124, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.01285, |
| "grad_norm": 3.6230008602142334, |
| "learning_rate": 4.638061224489796e-05, |
| "loss": 0.5632, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.01385, |
| "grad_norm": 3.5754306316375732, |
| "learning_rate": 4.63295918367347e-05, |
| "loss": 0.5425, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.01485, |
| "grad_norm": 3.7989814281463623, |
| "learning_rate": 4.627857142857143e-05, |
| "loss": 0.5388, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.01585, |
| "grad_norm": 3.778059959411621, |
| "learning_rate": 4.6227551020408165e-05, |
| "loss": 0.5297, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.01685, |
| "grad_norm": 3.987022876739502, |
| "learning_rate": 4.61765306122449e-05, |
| "loss": 0.58, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.01785, |
| "grad_norm": 3.883904218673706, |
| "learning_rate": 4.6125510204081636e-05, |
| "loss": 0.5964, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.01885, |
| "grad_norm": 4.473369598388672, |
| "learning_rate": 4.607448979591837e-05, |
| "loss": 0.5512, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.01985, |
| "grad_norm": 4.040229797363281, |
| "learning_rate": 4.60234693877551e-05, |
| "loss": 0.5292, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.02085, |
| "grad_norm": 4.538361072540283, |
| "learning_rate": 4.597244897959183e-05, |
| "loss": 0.5196, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.02185, |
| "grad_norm": 4.212509632110596, |
| "learning_rate": 4.592142857142858e-05, |
| "loss": 0.5085, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.02185, |
| "eval_loss": 0.9282792806625366, |
| "eval_runtime": 224.1163, |
| "eval_samples_per_second": 4.498, |
| "eval_steps_per_second": 0.143, |
| "eval_wer": 0.5289634146341463, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.02285, |
| "grad_norm": 3.8673441410064697, |
| "learning_rate": 4.587040816326531e-05, |
| "loss": 0.4988, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.02385, |
| "grad_norm": 3.70070743560791, |
| "learning_rate": 4.5819387755102044e-05, |
| "loss": 0.5424, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.02485, |
| "grad_norm": 4.379025459289551, |
| "learning_rate": 4.5768367346938776e-05, |
| "loss": 0.5425, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.02585, |
| "grad_norm": 4.310212135314941, |
| "learning_rate": 4.5717346938775515e-05, |
| "loss": 0.5466, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.02685, |
| "grad_norm": 4.193725109100342, |
| "learning_rate": 4.566632653061225e-05, |
| "loss": 0.5949, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.02785, |
| "grad_norm": 4.096522808074951, |
| "learning_rate": 4.561530612244898e-05, |
| "loss": 0.6003, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.02885, |
| "grad_norm": 4.387059211730957, |
| "learning_rate": 4.556428571428571e-05, |
| "loss": 0.5672, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.02985, |
| "grad_norm": 4.410869598388672, |
| "learning_rate": 4.551326530612245e-05, |
| "loss": 0.5512, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.03085, |
| "grad_norm": 4.141435623168945, |
| "learning_rate": 4.546224489795919e-05, |
| "loss": 0.5534, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.03185, |
| "grad_norm": 3.6259546279907227, |
| "learning_rate": 4.541122448979592e-05, |
| "loss": 0.4962, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.03285, |
| "grad_norm": 4.343920707702637, |
| "learning_rate": 4.5360204081632655e-05, |
| "loss": 0.5411, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.03385, |
| "grad_norm": 4.238042831420898, |
| "learning_rate": 4.530918367346939e-05, |
| "loss": 0.5217, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.03485, |
| "grad_norm": 4.237799167633057, |
| "learning_rate": 4.525816326530613e-05, |
| "loss": 0.4973, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.03585, |
| "grad_norm": 4.474476337432861, |
| "learning_rate": 4.520714285714286e-05, |
| "loss": 0.4967, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.03685, |
| "grad_norm": 3.6932973861694336, |
| "learning_rate": 4.515612244897959e-05, |
| "loss": 0.4524, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.03785, |
| "grad_norm": 4.185140132904053, |
| "learning_rate": 4.5105102040816324e-05, |
| "loss": 0.4851, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.03885, |
| "grad_norm": 4.114623069763184, |
| "learning_rate": 4.505408163265306e-05, |
| "loss": 0.5371, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.03985, |
| "grad_norm": 3.961071729660034, |
| "learning_rate": 4.5003061224489795e-05, |
| "loss": 0.5361, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.04085, |
| "grad_norm": 3.9876036643981934, |
| "learning_rate": 4.4952040816326534e-05, |
| "loss": 0.5576, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.04185, |
| "grad_norm": 4.096221446990967, |
| "learning_rate": 4.490102040816327e-05, |
| "loss": 0.5108, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.04185, |
| "eval_loss": 0.8966282606124878, |
| "eval_runtime": 214.6844, |
| "eval_samples_per_second": 4.695, |
| "eval_steps_per_second": 0.149, |
| "eval_wer": 0.4938177506775068, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.04285, |
| "grad_norm": 3.3264214992523193, |
| "learning_rate": 4.4850000000000006e-05, |
| "loss": 0.4692, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.04385, |
| "grad_norm": 4.144850730895996, |
| "learning_rate": 4.479897959183674e-05, |
| "loss": 0.4676, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.04485, |
| "grad_norm": 4.427196502685547, |
| "learning_rate": 4.474795918367347e-05, |
| "loss": 0.4796, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.04585, |
| "grad_norm": 3.8419079780578613, |
| "learning_rate": 4.46969387755102e-05, |
| "loss": 0.5092, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.04685, |
| "grad_norm": 4.1315436363220215, |
| "learning_rate": 4.464591836734694e-05, |
| "loss": 0.4992, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.04785, |
| "grad_norm": 3.6058404445648193, |
| "learning_rate": 4.4594897959183674e-05, |
| "loss": 0.4844, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.04885, |
| "grad_norm": 3.776262044906616, |
| "learning_rate": 4.454387755102041e-05, |
| "loss": 0.4886, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.04985, |
| "grad_norm": 3.4971370697021484, |
| "learning_rate": 4.4492857142857146e-05, |
| "loss": 0.4755, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.05085, |
| "grad_norm": 4.299288272857666, |
| "learning_rate": 4.444183673469388e-05, |
| "loss": 0.4826, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.05185, |
| "grad_norm": 3.4071223735809326, |
| "learning_rate": 4.439081632653062e-05, |
| "loss": 0.5001, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.05285, |
| "grad_norm": 3.8092288970947266, |
| "learning_rate": 4.433979591836735e-05, |
| "loss": 0.4797, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.05385, |
| "grad_norm": 4.502504348754883, |
| "learning_rate": 4.428877551020408e-05, |
| "loss": 0.4928, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.05485, |
| "grad_norm": 4.641124725341797, |
| "learning_rate": 4.4237755102040814e-05, |
| "loss": 0.5373, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.05585, |
| "grad_norm": 4.549639701843262, |
| "learning_rate": 4.4186734693877554e-05, |
| "loss": 0.529, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.05685, |
| "grad_norm": 3.3781280517578125, |
| "learning_rate": 4.4135714285714286e-05, |
| "loss": 0.5129, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.05785, |
| "grad_norm": 4.276547431945801, |
| "learning_rate": 4.408469387755102e-05, |
| "loss": 0.478, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.05885, |
| "grad_norm": 4.88979959487915, |
| "learning_rate": 4.403367346938776e-05, |
| "loss": 0.4405, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.05985, |
| "grad_norm": 5.3919267654418945, |
| "learning_rate": 4.3982653061224497e-05, |
| "loss": 0.4544, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.06085, |
| "grad_norm": 3.975532293319702, |
| "learning_rate": 4.393163265306123e-05, |
| "loss": 0.4517, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.06185, |
| "grad_norm": 3.94978404045105, |
| "learning_rate": 4.388061224489796e-05, |
| "loss": 0.4604, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.06185, |
| "eval_loss": 0.8832055926322937, |
| "eval_runtime": 217.1866, |
| "eval_samples_per_second": 4.641, |
| "eval_steps_per_second": 0.147, |
| "eval_wer": 0.49817920054200543, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.06285, |
| "grad_norm": 3.804979085922241, |
| "learning_rate": 4.3829591836734694e-05, |
| "loss": 0.4659, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.06385, |
| "grad_norm": 4.515665054321289, |
| "learning_rate": 4.377857142857143e-05, |
| "loss": 0.4879, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.06485, |
| "grad_norm": 4.103418827056885, |
| "learning_rate": 4.3727551020408165e-05, |
| "loss": 0.4608, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.06585, |
| "grad_norm": 3.9994966983795166, |
| "learning_rate": 4.36765306122449e-05, |
| "loss": 0.4712, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.06685, |
| "grad_norm": 3.973745107650757, |
| "learning_rate": 4.362551020408163e-05, |
| "loss": 0.4873, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.06785, |
| "grad_norm": 3.7433559894561768, |
| "learning_rate": 4.357448979591837e-05, |
| "loss": 0.4735, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.06885, |
| "grad_norm": 3.822484254837036, |
| "learning_rate": 4.352346938775511e-05, |
| "loss": 0.4712, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.06985, |
| "grad_norm": 3.4630181789398193, |
| "learning_rate": 4.347244897959184e-05, |
| "loss": 0.4583, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.07085, |
| "grad_norm": 4.550868034362793, |
| "learning_rate": 4.342142857142857e-05, |
| "loss": 0.493, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.07185, |
| "grad_norm": 4.130419731140137, |
| "learning_rate": 4.337142857142857e-05, |
| "loss": 0.4815, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.07285, |
| "grad_norm": 3.5420877933502197, |
| "learning_rate": 4.332040816326531e-05, |
| "loss": 0.4735, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.07385, |
| "grad_norm": 4.168927192687988, |
| "learning_rate": 4.326938775510204e-05, |
| "loss": 0.4919, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.07485, |
| "grad_norm": 3.9668235778808594, |
| "learning_rate": 4.3218367346938775e-05, |
| "loss": 0.5107, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.07585, |
| "grad_norm": 4.459245681762695, |
| "learning_rate": 4.316734693877551e-05, |
| "loss": 0.5011, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.07685, |
| "grad_norm": 4.179884910583496, |
| "learning_rate": 4.311632653061225e-05, |
| "loss": 0.4806, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.07785, |
| "grad_norm": 5.189551830291748, |
| "learning_rate": 4.3065306122448986e-05, |
| "loss": 0.4973, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.0007, |
| "grad_norm": 5.872171401977539, |
| "learning_rate": 4.301428571428572e-05, |
| "loss": 0.4958, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.0017, |
| "grad_norm": 3.948779582977295, |
| "learning_rate": 4.296326530612245e-05, |
| "loss": 0.4801, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.0027, |
| "grad_norm": 3.7434325218200684, |
| "learning_rate": 4.291224489795918e-05, |
| "loss": 0.5009, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.0037, |
| "grad_norm": 5.404001235961914, |
| "learning_rate": 4.286122448979592e-05, |
| "loss": 0.5144, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.0037, |
| "eval_loss": 0.8577666878700256, |
| "eval_runtime": 221.0367, |
| "eval_samples_per_second": 4.56, |
| "eval_steps_per_second": 0.145, |
| "eval_wer": 0.4876778455284553, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.0047, |
| "grad_norm": 3.783438205718994, |
| "learning_rate": 4.2810204081632654e-05, |
| "loss": 0.4987, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.0057, |
| "grad_norm": 3.9520227909088135, |
| "learning_rate": 4.275918367346939e-05, |
| "loss": 0.5081, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.0067, |
| "grad_norm": 3.9701645374298096, |
| "learning_rate": 4.2708163265306126e-05, |
| "loss": 0.4799, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.0077, |
| "grad_norm": 3.5120835304260254, |
| "learning_rate": 4.265714285714286e-05, |
| "loss": 0.4676, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.0087, |
| "grad_norm": 4.348593235015869, |
| "learning_rate": 4.26061224489796e-05, |
| "loss": 0.4853, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.0097, |
| "grad_norm": 3.6884608268737793, |
| "learning_rate": 4.255510204081633e-05, |
| "loss": 0.4803, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.0107, |
| "grad_norm": 3.584364414215088, |
| "learning_rate": 4.250408163265306e-05, |
| "loss": 0.4561, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.0117, |
| "grad_norm": 3.6865909099578857, |
| "learning_rate": 4.24530612244898e-05, |
| "loss": 0.4871, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.0127, |
| "grad_norm": 3.9009077548980713, |
| "learning_rate": 4.2402040816326533e-05, |
| "loss": 0.4599, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.0137, |
| "grad_norm": 3.513470411300659, |
| "learning_rate": 4.2351020408163266e-05, |
| "loss": 0.4316, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.0147, |
| "grad_norm": 3.8565685749053955, |
| "learning_rate": 4.23e-05, |
| "loss": 0.4379, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.0157, |
| "grad_norm": 3.2444798946380615, |
| "learning_rate": 4.224897959183674e-05, |
| "loss": 0.4287, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.0167, |
| "grad_norm": 3.9673781394958496, |
| "learning_rate": 4.219795918367347e-05, |
| "loss": 0.4613, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.0177, |
| "grad_norm": 8.936363220214844, |
| "learning_rate": 4.214693877551021e-05, |
| "loss": 0.4869, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.0187, |
| "grad_norm": 3.6102094650268555, |
| "learning_rate": 4.209591836734694e-05, |
| "loss": 0.4523, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.0197, |
| "grad_norm": 3.8444738388061523, |
| "learning_rate": 4.2044897959183673e-05, |
| "loss": 0.4334, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.0207, |
| "grad_norm": 3.0468149185180664, |
| "learning_rate": 4.199387755102041e-05, |
| "loss": 0.4219, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.0217, |
| "grad_norm": 3.563493251800537, |
| "learning_rate": 4.1942857142857145e-05, |
| "loss": 0.413, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.0227, |
| "grad_norm": 3.6925594806671143, |
| "learning_rate": 4.189183673469388e-05, |
| "loss": 0.4028, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.0237, |
| "grad_norm": 3.872044086456299, |
| "learning_rate": 4.184081632653061e-05, |
| "loss": 0.4411, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.0237, |
| "eval_loss": 0.8616846203804016, |
| "eval_runtime": 215.4703, |
| "eval_samples_per_second": 4.678, |
| "eval_steps_per_second": 0.149, |
| "eval_wer": 0.47925135501355015, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.0247, |
| "grad_norm": 3.9783222675323486, |
| "learning_rate": 4.178979591836735e-05, |
| "loss": 0.44, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.0257, |
| "grad_norm": 5.306482315063477, |
| "learning_rate": 4.173877551020408e-05, |
| "loss": 0.4384, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.0267, |
| "grad_norm": 4.326815128326416, |
| "learning_rate": 4.168775510204082e-05, |
| "loss": 0.4873, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.0277, |
| "grad_norm": 3.9018495082855225, |
| "learning_rate": 4.163673469387755e-05, |
| "loss": 0.4974, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.0287, |
| "grad_norm": 3.4675261974334717, |
| "learning_rate": 4.158571428571429e-05, |
| "loss": 0.4705, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.0297, |
| "grad_norm": 4.265820026397705, |
| "learning_rate": 4.1534693877551024e-05, |
| "loss": 0.4473, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.0307, |
| "grad_norm": 3.4740707874298096, |
| "learning_rate": 4.1483673469387756e-05, |
| "loss": 0.4679, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.0317, |
| "grad_norm": 3.339444637298584, |
| "learning_rate": 4.143265306122449e-05, |
| "loss": 0.4065, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.0327, |
| "grad_norm": 3.825657606124878, |
| "learning_rate": 4.138163265306123e-05, |
| "loss": 0.4436, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.0337, |
| "grad_norm": 3.392925262451172, |
| "learning_rate": 4.133061224489796e-05, |
| "loss": 0.4285, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.0347, |
| "grad_norm": 3.815762519836426, |
| "learning_rate": 4.127959183673469e-05, |
| "loss": 0.4123, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.0357, |
| "grad_norm": 4.090697765350342, |
| "learning_rate": 4.122857142857143e-05, |
| "loss": 0.408, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.0367, |
| "grad_norm": 3.2577061653137207, |
| "learning_rate": 4.1177551020408164e-05, |
| "loss": 0.3751, |
| "step": 9650 |
| }, |
| { |
| "epoch": 2.0377, |
| "grad_norm": 3.391096591949463, |
| "learning_rate": 4.11265306122449e-05, |
| "loss": 0.3885, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.0387, |
| "grad_norm": 3.540929079055786, |
| "learning_rate": 4.1075510204081636e-05, |
| "loss": 0.4434, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.0397, |
| "grad_norm": 4.215907096862793, |
| "learning_rate": 4.102448979591837e-05, |
| "loss": 0.436, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.0407, |
| "grad_norm": 4.207083225250244, |
| "learning_rate": 4.09734693877551e-05, |
| "loss": 0.4699, |
| "step": 9850 |
| }, |
| { |
| "epoch": 2.0417, |
| "grad_norm": 3.9734325408935547, |
| "learning_rate": 4.092244897959184e-05, |
| "loss": 0.4301, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.0427, |
| "grad_norm": 3.023761510848999, |
| "learning_rate": 4.087142857142857e-05, |
| "loss": 0.3889, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.0437, |
| "grad_norm": 3.4388654232025146, |
| "learning_rate": 4.0820408163265304e-05, |
| "loss": 0.3835, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.0437, |
| "eval_loss": 0.8521081805229187, |
| "eval_runtime": 215.0513, |
| "eval_samples_per_second": 4.687, |
| "eval_steps_per_second": 0.149, |
| "eval_wer": 0.47412771002710025, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.0447, |
| "grad_norm": 3.153373956680298, |
| "learning_rate": 4.076938775510204e-05, |
| "loss": 0.3933, |
| "step": 10050 |
| }, |
| { |
| "epoch": 2.0457, |
| "grad_norm": 4.276921272277832, |
| "learning_rate": 4.071836734693878e-05, |
| "loss": 0.42, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.0467, |
| "grad_norm": 3.5044806003570557, |
| "learning_rate": 4.0667346938775515e-05, |
| "loss": 0.4131, |
| "step": 10150 |
| }, |
| { |
| "epoch": 2.0477, |
| "grad_norm": 3.862910032272339, |
| "learning_rate": 4.061632653061225e-05, |
| "loss": 0.4076, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.0487, |
| "grad_norm": 3.593726396560669, |
| "learning_rate": 4.056530612244898e-05, |
| "loss": 0.397, |
| "step": 10250 |
| }, |
| { |
| "epoch": 2.0497, |
| "grad_norm": 3.8984551429748535, |
| "learning_rate": 4.051428571428572e-05, |
| "loss": 0.398, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.0507, |
| "grad_norm": 2.999417781829834, |
| "learning_rate": 4.046326530612245e-05, |
| "loss": 0.3986, |
| "step": 10350 |
| }, |
| { |
| "epoch": 2.0517, |
| "grad_norm": 3.8147029876708984, |
| "learning_rate": 4.041224489795918e-05, |
| "loss": 0.4151, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.0527, |
| "grad_norm": 3.7409307956695557, |
| "learning_rate": 4.0361224489795915e-05, |
| "loss": 0.4006, |
| "step": 10450 |
| }, |
| { |
| "epoch": 2.0537, |
| "grad_norm": 4.3039445877075195, |
| "learning_rate": 4.0310204081632655e-05, |
| "loss": 0.3995, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.0547, |
| "grad_norm": 3.9257168769836426, |
| "learning_rate": 4.0259183673469394e-05, |
| "loss": 0.4485, |
| "step": 10550 |
| }, |
| { |
| "epoch": 2.0557, |
| "grad_norm": 3.795719623565674, |
| "learning_rate": 4.0208163265306126e-05, |
| "loss": 0.4438, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.0567, |
| "grad_norm": 4.891661643981934, |
| "learning_rate": 4.015714285714286e-05, |
| "loss": 0.4374, |
| "step": 10650 |
| }, |
| { |
| "epoch": 2.0577, |
| "grad_norm": 4.732306003570557, |
| "learning_rate": 4.010612244897959e-05, |
| "loss": 0.4005, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.0587, |
| "grad_norm": 4.265634536743164, |
| "learning_rate": 4.005510204081633e-05, |
| "loss": 0.3726, |
| "step": 10750 |
| }, |
| { |
| "epoch": 2.0597, |
| "grad_norm": 4.142653942108154, |
| "learning_rate": 4.000408163265306e-05, |
| "loss": 0.3763, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.0607, |
| "grad_norm": 3.3085813522338867, |
| "learning_rate": 3.9953061224489795e-05, |
| "loss": 0.3716, |
| "step": 10850 |
| }, |
| { |
| "epoch": 2.0617, |
| "grad_norm": 4.671994686126709, |
| "learning_rate": 3.990204081632653e-05, |
| "loss": 0.3767, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.0627, |
| "grad_norm": 3.506270408630371, |
| "learning_rate": 3.985102040816327e-05, |
| "loss": 0.3868, |
| "step": 10950 |
| }, |
| { |
| "epoch": 2.0637, |
| "grad_norm": 3.7705044746398926, |
| "learning_rate": 3.9800000000000005e-05, |
| "loss": 0.4116, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.0637, |
| "eval_loss": 0.8389872908592224, |
| "eval_runtime": 222.6276, |
| "eval_samples_per_second": 4.528, |
| "eval_steps_per_second": 0.144, |
| "eval_wer": 0.48407859078590787, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.0647, |
| "grad_norm": 3.551223039627075, |
| "learning_rate": 3.974897959183674e-05, |
| "loss": 0.3901, |
| "step": 11050 |
| }, |
| { |
| "epoch": 2.0657, |
| "grad_norm": 4.152121543884277, |
| "learning_rate": 3.969795918367347e-05, |
| "loss": 0.3837, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.0667, |
| "grad_norm": 3.815230369567871, |
| "learning_rate": 3.964693877551021e-05, |
| "loss": 0.4107, |
| "step": 11150 |
| }, |
| { |
| "epoch": 2.0677, |
| "grad_norm": 3.5564286708831787, |
| "learning_rate": 3.959693877551021e-05, |
| "loss": 0.3975, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.0687, |
| "grad_norm": 3.2500646114349365, |
| "learning_rate": 3.954591836734694e-05, |
| "loss": 0.3993, |
| "step": 11250 |
| }, |
| { |
| "epoch": 2.0697, |
| "grad_norm": 3.974126100540161, |
| "learning_rate": 3.949489795918367e-05, |
| "loss": 0.3784, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.0707, |
| "grad_norm": 4.25160026550293, |
| "learning_rate": 3.944387755102041e-05, |
| "loss": 0.4069, |
| "step": 11350 |
| }, |
| { |
| "epoch": 2.0717, |
| "grad_norm": 3.363373279571533, |
| "learning_rate": 3.9392857142857144e-05, |
| "loss": 0.4105, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.0727, |
| "grad_norm": 3.961094379425049, |
| "learning_rate": 3.934183673469388e-05, |
| "loss": 0.3961, |
| "step": 11450 |
| }, |
| { |
| "epoch": 2.0737, |
| "grad_norm": 3.97780704498291, |
| "learning_rate": 3.9290816326530615e-05, |
| "loss": 0.4065, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.0747, |
| "grad_norm": 4.831082344055176, |
| "learning_rate": 3.923979591836735e-05, |
| "loss": 0.4255, |
| "step": 11550 |
| }, |
| { |
| "epoch": 2.0757, |
| "grad_norm": 3.660353183746338, |
| "learning_rate": 3.918877551020409e-05, |
| "loss": 0.4315, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.0767, |
| "grad_norm": 3.697075843811035, |
| "learning_rate": 3.913775510204082e-05, |
| "loss": 0.3948, |
| "step": 11650 |
| }, |
| { |
| "epoch": 2.0777, |
| "grad_norm": 5.073598384857178, |
| "learning_rate": 3.908673469387755e-05, |
| "loss": 0.4224, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.00055, |
| "grad_norm": 4.665965557098389, |
| "learning_rate": 3.9035714285714284e-05, |
| "loss": 0.4212, |
| "step": 11750 |
| }, |
| { |
| "epoch": 3.00155, |
| "grad_norm": 5.204474925994873, |
| "learning_rate": 3.898469387755102e-05, |
| "loss": 0.4048, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.00255, |
| "grad_norm": 3.746649742126465, |
| "learning_rate": 3.8933673469387755e-05, |
| "loss": 0.4175, |
| "step": 11850 |
| }, |
| { |
| "epoch": 3.00355, |
| "grad_norm": 3.8436436653137207, |
| "learning_rate": 3.8882653061224495e-05, |
| "loss": 0.4425, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.00455, |
| "grad_norm": 3.9119129180908203, |
| "learning_rate": 3.883163265306123e-05, |
| "loss": 0.4227, |
| "step": 11950 |
| }, |
| { |
| "epoch": 3.00555, |
| "grad_norm": 4.373188018798828, |
| "learning_rate": 3.878061224489796e-05, |
| "loss": 0.4312, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.00555, |
| "eval_loss": 0.8273130655288696, |
| "eval_runtime": 214.8756, |
| "eval_samples_per_second": 4.691, |
| "eval_steps_per_second": 0.149, |
| "eval_wer": 0.4557079945799458, |
| "step": 12000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 50000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "total_flos": 1.8907302968866898e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|