| { |
| "best_global_step": 19000, |
| "best_metric": 0.1748135060855909, |
| "best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v8/checkpoint-19000", |
| "epoch": 32.0, |
| "eval_steps": 1000, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.32, |
| "grad_norm": 19.961477279663086, |
| "learning_rate": 1.97e-06, |
| "loss": 3.217, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 16.765470504760742, |
| "learning_rate": 3.97e-06, |
| "loss": 1.6255, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 16.053678512573242, |
| "learning_rate": 5.9700000000000004e-06, |
| "loss": 1.1789, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 14.488689422607422, |
| "learning_rate": 7.970000000000002e-06, |
| "loss": 0.9348, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 12.85074234008789, |
| "learning_rate": 9.970000000000001e-06, |
| "loss": 0.8116, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.6225642561912537, |
| "eval_runtime": 205.3808, |
| "eval_samples_per_second": 6.086, |
| "eval_steps_per_second": 1.524, |
| "eval_wer": 0.47516686297605026, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 14.117347717285156, |
| "learning_rate": 1.1970000000000002e-05, |
| "loss": 0.757, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 11.961180686950684, |
| "learning_rate": 1.3970000000000002e-05, |
| "loss": 0.618, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 11.46937370300293, |
| "learning_rate": 1.597e-05, |
| "loss": 0.5826, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 10.750535011291504, |
| "learning_rate": 1.7970000000000002e-05, |
| "loss": 0.5615, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 12.116422653198242, |
| "learning_rate": 1.9970000000000004e-05, |
| "loss": 0.4683, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.2, |
| "eval_loss": 0.41659122705459595, |
| "eval_runtime": 206.2639, |
| "eval_samples_per_second": 6.06, |
| "eval_steps_per_second": 1.517, |
| "eval_wer": 0.41647035728307813, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 9.442646980285645, |
| "learning_rate": 1.978111111111111e-05, |
| "loss": 0.4231, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 13.404372215270996, |
| "learning_rate": 1.955888888888889e-05, |
| "loss": 0.4156, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 9.753326416015625, |
| "learning_rate": 1.9336666666666667e-05, |
| "loss": 0.3613, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 10.980928421020508, |
| "learning_rate": 1.9114444444444447e-05, |
| "loss": 0.3039, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 11.34424114227295, |
| "learning_rate": 1.8892222222222223e-05, |
| "loss": 0.3047, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.8, |
| "eval_loss": 0.34299781918525696, |
| "eval_runtime": 205.0374, |
| "eval_samples_per_second": 6.096, |
| "eval_steps_per_second": 1.527, |
| "eval_wer": 0.3496270121711818, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 7.82305383682251, |
| "learning_rate": 1.8670000000000003e-05, |
| "loss": 0.2757, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 7.7784576416015625, |
| "learning_rate": 1.844777777777778e-05, |
| "loss": 0.2217, |
| "step": 3400 |
| }, |
| { |
| "epoch": 5.76, |
| "grad_norm": 12.481864929199219, |
| "learning_rate": 1.8225555555555555e-05, |
| "loss": 0.2279, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 6.796186447143555, |
| "learning_rate": 1.8003333333333334e-05, |
| "loss": 0.2183, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 5.8489460945129395, |
| "learning_rate": 1.7781111111111114e-05, |
| "loss": 0.1679, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.4, |
| "eval_loss": 0.31804612278938293, |
| "eval_runtime": 201.9885, |
| "eval_samples_per_second": 6.188, |
| "eval_steps_per_second": 1.55, |
| "eval_wer": 0.24676089517078917, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.72, |
| "grad_norm": 8.320049285888672, |
| "learning_rate": 1.755888888888889e-05, |
| "loss": 0.1735, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.04, |
| "grad_norm": 6.053928375244141, |
| "learning_rate": 1.733777777777778e-05, |
| "loss": 0.1708, |
| "step": 4400 |
| }, |
| { |
| "epoch": 7.36, |
| "grad_norm": 13.38781452178955, |
| "learning_rate": 1.7115555555555557e-05, |
| "loss": 0.1268, |
| "step": 4600 |
| }, |
| { |
| "epoch": 7.68, |
| "grad_norm": 7.869670391082764, |
| "learning_rate": 1.6893333333333336e-05, |
| "loss": 0.1355, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 6.039541721343994, |
| "learning_rate": 1.6671111111111113e-05, |
| "loss": 0.1405, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.28736841678619385, |
| "eval_runtime": 201.5675, |
| "eval_samples_per_second": 6.201, |
| "eval_steps_per_second": 1.553, |
| "eval_wer": 0.21181782489202983, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.32, |
| "grad_norm": 4.737486362457275, |
| "learning_rate": 1.644888888888889e-05, |
| "loss": 0.1042, |
| "step": 5200 |
| }, |
| { |
| "epoch": 8.64, |
| "grad_norm": 8.452003479003906, |
| "learning_rate": 1.6226666666666668e-05, |
| "loss": 0.1113, |
| "step": 5400 |
| }, |
| { |
| "epoch": 8.96, |
| "grad_norm": 4.064589500427246, |
| "learning_rate": 1.6004444444444444e-05, |
| "loss": 0.1094, |
| "step": 5600 |
| }, |
| { |
| "epoch": 9.28, |
| "grad_norm": 7.312466621398926, |
| "learning_rate": 1.5782222222222224e-05, |
| "loss": 0.0854, |
| "step": 5800 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 5.71759033203125, |
| "learning_rate": 1.556e-05, |
| "loss": 0.0856, |
| "step": 6000 |
| }, |
| { |
| "epoch": 9.6, |
| "eval_loss": 0.2818545699119568, |
| "eval_runtime": 205.5208, |
| "eval_samples_per_second": 6.082, |
| "eval_steps_per_second": 1.523, |
| "eval_wer": 0.22781703965449548, |
| "step": 6000 |
| }, |
| { |
| "epoch": 9.92, |
| "grad_norm": 5.981679439544678, |
| "learning_rate": 1.533777777777778e-05, |
| "loss": 0.0895, |
| "step": 6200 |
| }, |
| { |
| "epoch": 10.24, |
| "grad_norm": 4.288158893585205, |
| "learning_rate": 1.5115555555555557e-05, |
| "loss": 0.0744, |
| "step": 6400 |
| }, |
| { |
| "epoch": 10.56, |
| "grad_norm": 4.479767799377441, |
| "learning_rate": 1.4893333333333335e-05, |
| "loss": 0.0721, |
| "step": 6600 |
| }, |
| { |
| "epoch": 10.88, |
| "grad_norm": 3.508542776107788, |
| "learning_rate": 1.4671111111111111e-05, |
| "loss": 0.0732, |
| "step": 6800 |
| }, |
| { |
| "epoch": 11.2, |
| "grad_norm": 5.090618133544922, |
| "learning_rate": 1.444888888888889e-05, |
| "loss": 0.066, |
| "step": 7000 |
| }, |
| { |
| "epoch": 11.2, |
| "eval_loss": 0.27581551671028137, |
| "eval_runtime": 200.6769, |
| "eval_samples_per_second": 6.229, |
| "eval_steps_per_second": 1.56, |
| "eval_wer": 0.2033765213977228, |
| "step": 7000 |
| }, |
| { |
| "epoch": 11.52, |
| "grad_norm": 4.929802417755127, |
| "learning_rate": 1.4227777777777778e-05, |
| "loss": 0.0617, |
| "step": 7200 |
| }, |
| { |
| "epoch": 11.84, |
| "grad_norm": 4.811327934265137, |
| "learning_rate": 1.4005555555555556e-05, |
| "loss": 0.0618, |
| "step": 7400 |
| }, |
| { |
| "epoch": 12.16, |
| "grad_norm": 3.7202506065368652, |
| "learning_rate": 1.3783333333333336e-05, |
| "loss": 0.0582, |
| "step": 7600 |
| }, |
| { |
| "epoch": 12.48, |
| "grad_norm": 3.58890962600708, |
| "learning_rate": 1.3561111111111112e-05, |
| "loss": 0.0524, |
| "step": 7800 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 2.8380696773529053, |
| "learning_rate": 1.333888888888889e-05, |
| "loss": 0.0534, |
| "step": 8000 |
| }, |
| { |
| "epoch": 12.8, |
| "eval_loss": 0.2711934447288513, |
| "eval_runtime": 205.6866, |
| "eval_samples_per_second": 6.077, |
| "eval_steps_per_second": 1.522, |
| "eval_wer": 0.22340007852375343, |
| "step": 8000 |
| }, |
| { |
| "epoch": 13.12, |
| "grad_norm": 5.2906670570373535, |
| "learning_rate": 1.311666666666667e-05, |
| "loss": 0.0547, |
| "step": 8200 |
| }, |
| { |
| "epoch": 13.44, |
| "grad_norm": 4.201717853546143, |
| "learning_rate": 1.2894444444444445e-05, |
| "loss": 0.0482, |
| "step": 8400 |
| }, |
| { |
| "epoch": 13.76, |
| "grad_norm": 6.0169477462768555, |
| "learning_rate": 1.2672222222222223e-05, |
| "loss": 0.0513, |
| "step": 8600 |
| }, |
| { |
| "epoch": 14.08, |
| "grad_norm": 2.1733462810516357, |
| "learning_rate": 1.2450000000000003e-05, |
| "loss": 0.0474, |
| "step": 8800 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 4.900807857513428, |
| "learning_rate": 1.2227777777777779e-05, |
| "loss": 0.044, |
| "step": 9000 |
| }, |
| { |
| "epoch": 14.4, |
| "eval_loss": 0.2689875662326813, |
| "eval_runtime": 205.0191, |
| "eval_samples_per_second": 6.097, |
| "eval_steps_per_second": 1.527, |
| "eval_wer": 0.2009226541028661, |
| "step": 9000 |
| }, |
| { |
| "epoch": 14.72, |
| "grad_norm": 2.0183374881744385, |
| "learning_rate": 1.2005555555555557e-05, |
| "loss": 0.0439, |
| "step": 9200 |
| }, |
| { |
| "epoch": 15.04, |
| "grad_norm": 4.9282450675964355, |
| "learning_rate": 1.1784444444444444e-05, |
| "loss": 0.0436, |
| "step": 9400 |
| }, |
| { |
| "epoch": 15.36, |
| "grad_norm": 3.2207448482513428, |
| "learning_rate": 1.1562222222222224e-05, |
| "loss": 0.0372, |
| "step": 9600 |
| }, |
| { |
| "epoch": 15.68, |
| "grad_norm": 5.770218849182129, |
| "learning_rate": 1.134e-05, |
| "loss": 0.0388, |
| "step": 9800 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 9.450571060180664, |
| "learning_rate": 1.1117777777777778e-05, |
| "loss": 0.0393, |
| "step": 10000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.2668628692626953, |
| "eval_runtime": 205.3305, |
| "eval_samples_per_second": 6.088, |
| "eval_steps_per_second": 1.524, |
| "eval_wer": 0.1930702787593247, |
| "step": 10000 |
| }, |
| { |
| "epoch": 16.32, |
| "grad_norm": 6.021556854248047, |
| "learning_rate": 1.0895555555555557e-05, |
| "loss": 0.0328, |
| "step": 10200 |
| }, |
| { |
| "epoch": 16.64, |
| "grad_norm": 2.8971662521362305, |
| "learning_rate": 1.0673333333333333e-05, |
| "loss": 0.0342, |
| "step": 10400 |
| }, |
| { |
| "epoch": 16.96, |
| "grad_norm": 3.1022391319274902, |
| "learning_rate": 1.0451111111111111e-05, |
| "loss": 0.0377, |
| "step": 10600 |
| }, |
| { |
| "epoch": 17.28, |
| "grad_norm": 1.2703841924667358, |
| "learning_rate": 1.022888888888889e-05, |
| "loss": 0.0348, |
| "step": 10800 |
| }, |
| { |
| "epoch": 17.6, |
| "grad_norm": 4.3228373527526855, |
| "learning_rate": 1.0006666666666667e-05, |
| "loss": 0.0344, |
| "step": 11000 |
| }, |
| { |
| "epoch": 17.6, |
| "eval_loss": 0.26008835434913635, |
| "eval_runtime": 202.8929, |
| "eval_samples_per_second": 6.161, |
| "eval_steps_per_second": 1.543, |
| "eval_wer": 0.18865331762858265, |
| "step": 11000 |
| }, |
| { |
| "epoch": 17.92, |
| "grad_norm": 5.954049587249756, |
| "learning_rate": 9.784444444444445e-06, |
| "loss": 0.034, |
| "step": 11200 |
| }, |
| { |
| "epoch": 18.24, |
| "grad_norm": 2.9373772144317627, |
| "learning_rate": 9.562222222222223e-06, |
| "loss": 0.0321, |
| "step": 11400 |
| }, |
| { |
| "epoch": 18.56, |
| "grad_norm": 3.066429615020752, |
| "learning_rate": 9.340000000000002e-06, |
| "loss": 0.0299, |
| "step": 11600 |
| }, |
| { |
| "epoch": 18.88, |
| "grad_norm": 2.1500790119171143, |
| "learning_rate": 9.117777777777778e-06, |
| "loss": 0.0294, |
| "step": 11800 |
| }, |
| { |
| "epoch": 19.2, |
| "grad_norm": 2.651730537414551, |
| "learning_rate": 8.895555555555556e-06, |
| "loss": 0.0292, |
| "step": 12000 |
| }, |
| { |
| "epoch": 19.2, |
| "eval_loss": 0.2627313733100891, |
| "eval_runtime": 202.5129, |
| "eval_samples_per_second": 6.172, |
| "eval_steps_per_second": 1.546, |
| "eval_wer": 0.18089909697683548, |
| "step": 12000 |
| }, |
| { |
| "epoch": 19.52, |
| "grad_norm": 2.2529680728912354, |
| "learning_rate": 8.673333333333334e-06, |
| "loss": 0.0284, |
| "step": 12200 |
| }, |
| { |
| "epoch": 19.84, |
| "grad_norm": 4.965338230133057, |
| "learning_rate": 8.451111111111112e-06, |
| "loss": 0.0275, |
| "step": 12400 |
| }, |
| { |
| "epoch": 20.16, |
| "grad_norm": 4.967452526092529, |
| "learning_rate": 8.22888888888889e-06, |
| "loss": 0.0287, |
| "step": 12600 |
| }, |
| { |
| "epoch": 20.48, |
| "grad_norm": 4.872837543487549, |
| "learning_rate": 8.006666666666667e-06, |
| "loss": 0.0264, |
| "step": 12800 |
| }, |
| { |
| "epoch": 20.8, |
| "grad_norm": 5.586236000061035, |
| "learning_rate": 7.784444444444445e-06, |
| "loss": 0.0272, |
| "step": 13000 |
| }, |
| { |
| "epoch": 20.8, |
| "eval_loss": 0.25965991616249084, |
| "eval_runtime": 203.2854, |
| "eval_samples_per_second": 6.149, |
| "eval_steps_per_second": 1.54, |
| "eval_wer": 0.18315665488810365, |
| "step": 13000 |
| }, |
| { |
| "epoch": 21.12, |
| "grad_norm": 2.6139557361602783, |
| "learning_rate": 7.562222222222223e-06, |
| "loss": 0.0251, |
| "step": 13200 |
| }, |
| { |
| "epoch": 21.44, |
| "grad_norm": 6.1768341064453125, |
| "learning_rate": 7.340000000000001e-06, |
| "loss": 0.025, |
| "step": 13400 |
| }, |
| { |
| "epoch": 21.76, |
| "grad_norm": 3.9618000984191895, |
| "learning_rate": 7.117777777777778e-06, |
| "loss": 0.0257, |
| "step": 13600 |
| }, |
| { |
| "epoch": 22.08, |
| "grad_norm": 2.3093647956848145, |
| "learning_rate": 6.8955555555555565e-06, |
| "loss": 0.0241, |
| "step": 13800 |
| }, |
| { |
| "epoch": 22.4, |
| "grad_norm": 1.8838666677474976, |
| "learning_rate": 6.674444444444445e-06, |
| "loss": 0.0231, |
| "step": 14000 |
| }, |
| { |
| "epoch": 22.4, |
| "eval_loss": 0.25562289357185364, |
| "eval_runtime": 205.1594, |
| "eval_samples_per_second": 6.093, |
| "eval_steps_per_second": 1.526, |
| "eval_wer": 0.18138987043580684, |
| "step": 14000 |
| }, |
| { |
| "epoch": 22.72, |
| "grad_norm": 6.57139253616333, |
| "learning_rate": 6.452222222222223e-06, |
| "loss": 0.0237, |
| "step": 14200 |
| }, |
| { |
| "epoch": 23.04, |
| "grad_norm": 6.151878833770752, |
| "learning_rate": 6.2300000000000005e-06, |
| "loss": 0.0231, |
| "step": 14400 |
| }, |
| { |
| "epoch": 23.36, |
| "grad_norm": 3.6745595932006836, |
| "learning_rate": 6.007777777777778e-06, |
| "loss": 0.0226, |
| "step": 14600 |
| }, |
| { |
| "epoch": 23.68, |
| "grad_norm": 1.7786234617233276, |
| "learning_rate": 5.785555555555556e-06, |
| "loss": 0.0217, |
| "step": 14800 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 4.625872611999512, |
| "learning_rate": 5.563333333333334e-06, |
| "loss": 0.0223, |
| "step": 15000 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.2562410533428192, |
| "eval_runtime": 202.4663, |
| "eval_samples_per_second": 6.174, |
| "eval_steps_per_second": 1.546, |
| "eval_wer": 0.1827640361209266, |
| "step": 15000 |
| }, |
| { |
| "epoch": 24.32, |
| "grad_norm": 2.113445520401001, |
| "learning_rate": 5.341111111111111e-06, |
| "loss": 0.0205, |
| "step": 15200 |
| }, |
| { |
| "epoch": 24.64, |
| "grad_norm": 3.8710196018218994, |
| "learning_rate": 5.118888888888889e-06, |
| "loss": 0.0216, |
| "step": 15400 |
| }, |
| { |
| "epoch": 24.96, |
| "grad_norm": 1.624074935913086, |
| "learning_rate": 4.896666666666667e-06, |
| "loss": 0.0221, |
| "step": 15600 |
| }, |
| { |
| "epoch": 25.28, |
| "grad_norm": 11.679327964782715, |
| "learning_rate": 4.6744444444444445e-06, |
| "loss": 0.0192, |
| "step": 15800 |
| }, |
| { |
| "epoch": 25.6, |
| "grad_norm": 5.923542499542236, |
| "learning_rate": 4.452222222222223e-06, |
| "loss": 0.0192, |
| "step": 16000 |
| }, |
| { |
| "epoch": 25.6, |
| "eval_loss": 0.25337618589401245, |
| "eval_runtime": 203.6663, |
| "eval_samples_per_second": 6.137, |
| "eval_steps_per_second": 1.537, |
| "eval_wer": 0.18021201413427562, |
| "step": 16000 |
| }, |
| { |
| "epoch": 25.92, |
| "grad_norm": 1.646501064300537, |
| "learning_rate": 4.23e-06, |
| "loss": 0.0198, |
| "step": 16200 |
| }, |
| { |
| "epoch": 26.24, |
| "grad_norm": 3.09792160987854, |
| "learning_rate": 4.008888888888889e-06, |
| "loss": 0.0188, |
| "step": 16400 |
| }, |
| { |
| "epoch": 26.56, |
| "grad_norm": 1.3494817018508911, |
| "learning_rate": 3.7866666666666667e-06, |
| "loss": 0.0158, |
| "step": 16600 |
| }, |
| { |
| "epoch": 26.88, |
| "grad_norm": 2.5502867698669434, |
| "learning_rate": 3.565555555555556e-06, |
| "loss": 0.02, |
| "step": 16800 |
| }, |
| { |
| "epoch": 27.2, |
| "grad_norm": 2.5397415161132812, |
| "learning_rate": 3.3433333333333332e-06, |
| "loss": 0.0167, |
| "step": 17000 |
| }, |
| { |
| "epoch": 27.2, |
| "eval_loss": 0.2511875331401825, |
| "eval_runtime": 201.8937, |
| "eval_samples_per_second": 6.191, |
| "eval_steps_per_second": 1.55, |
| "eval_wer": 0.1762858264625049, |
| "step": 17000 |
| }, |
| { |
| "epoch": 27.52, |
| "grad_norm": 2.679438352584839, |
| "learning_rate": 3.1211111111111115e-06, |
| "loss": 0.0181, |
| "step": 17200 |
| }, |
| { |
| "epoch": 27.84, |
| "grad_norm": 3.130686044692993, |
| "learning_rate": 2.898888888888889e-06, |
| "loss": 0.018, |
| "step": 17400 |
| }, |
| { |
| "epoch": 28.16, |
| "grad_norm": 5.3460774421691895, |
| "learning_rate": 2.6766666666666667e-06, |
| "loss": 0.0178, |
| "step": 17600 |
| }, |
| { |
| "epoch": 28.48, |
| "grad_norm": 3.5381155014038086, |
| "learning_rate": 2.4544444444444446e-06, |
| "loss": 0.0174, |
| "step": 17800 |
| }, |
| { |
| "epoch": 28.8, |
| "grad_norm": 2.955854892730713, |
| "learning_rate": 2.2322222222222224e-06, |
| "loss": 0.0178, |
| "step": 18000 |
| }, |
| { |
| "epoch": 28.8, |
| "eval_loss": 0.24957236647605896, |
| "eval_runtime": 205.824, |
| "eval_samples_per_second": 6.073, |
| "eval_steps_per_second": 1.521, |
| "eval_wer": 0.17942677659992148, |
| "step": 18000 |
| }, |
| { |
| "epoch": 29.12, |
| "grad_norm": 1.8030221462249756, |
| "learning_rate": 2.0100000000000002e-06, |
| "loss": 0.017, |
| "step": 18200 |
| }, |
| { |
| "epoch": 29.44, |
| "grad_norm": 2.740708112716675, |
| "learning_rate": 1.787777777777778e-06, |
| "loss": 0.0161, |
| "step": 18400 |
| }, |
| { |
| "epoch": 29.76, |
| "grad_norm": 2.2259738445281982, |
| "learning_rate": 1.5655555555555557e-06, |
| "loss": 0.0153, |
| "step": 18600 |
| }, |
| { |
| "epoch": 30.08, |
| "grad_norm": 3.472790479660034, |
| "learning_rate": 1.3433333333333335e-06, |
| "loss": 0.0156, |
| "step": 18800 |
| }, |
| { |
| "epoch": 30.4, |
| "grad_norm": 3.7026050090789795, |
| "learning_rate": 1.1211111111111112e-06, |
| "loss": 0.0143, |
| "step": 19000 |
| }, |
| { |
| "epoch": 30.4, |
| "eval_loss": 0.24610121548175812, |
| "eval_runtime": 205.7197, |
| "eval_samples_per_second": 6.076, |
| "eval_steps_per_second": 1.521, |
| "eval_wer": 0.1748135060855909, |
| "step": 19000 |
| }, |
| { |
| "epoch": 30.72, |
| "grad_norm": 0.6519302129745483, |
| "learning_rate": 8.98888888888889e-07, |
| "loss": 0.0145, |
| "step": 19200 |
| }, |
| { |
| "epoch": 31.04, |
| "grad_norm": 2.7453384399414062, |
| "learning_rate": 6.766666666666667e-07, |
| "loss": 0.0159, |
| "step": 19400 |
| }, |
| { |
| "epoch": 31.36, |
| "grad_norm": 5.072993278503418, |
| "learning_rate": 4.544444444444445e-07, |
| "loss": 0.016, |
| "step": 19600 |
| }, |
| { |
| "epoch": 31.68, |
| "grad_norm": 2.2992451190948486, |
| "learning_rate": 2.3222222222222223e-07, |
| "loss": 0.0165, |
| "step": 19800 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.8698179721832275, |
| "learning_rate": 1e-08, |
| "loss": 0.0147, |
| "step": 20000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.24500565230846405, |
| "eval_runtime": 201.9819, |
| "eval_samples_per_second": 6.189, |
| "eval_steps_per_second": 1.55, |
| "eval_wer": 0.1762858264625049, |
| "step": 20000 |
| }, |
| { |
| "epoch": 32.0, |
| "step": 20000, |
| "total_flos": 7.87725543407616e+18, |
| "train_loss": 0.17171463202238083, |
| "train_runtime": 42247.0614, |
| "train_samples_per_second": 7.574, |
| "train_steps_per_second": 0.473 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 32, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.87725543407616e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|