| { | |
| "best_metric": 0.24260137975215912, | |
| "best_model_checkpoint": "wave2vec2-xlsr-Persian/checkpoint-240000", | |
| "epoch": 3.0, | |
| "eval_steps": 10000, | |
| "global_step": 255012, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1176415227518705, | |
| "grad_norm": 1.416466236114502, | |
| "learning_rate": 9.646079712769477e-06, | |
| "loss": 3.7691, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1176415227518705, | |
| "eval_loss": 0.7940966486930847, | |
| "eval_runtime": 534.4277, | |
| "eval_samples_per_second": 16.743, | |
| "eval_steps_per_second": 2.094, | |
| "eval_wer": 0.6079105283797882, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.235283045503741, | |
| "grad_norm": 2.3572049140930176, | |
| "learning_rate": 9.252436892745226e-06, | |
| "loss": 0.8658, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.235283045503741, | |
| "eval_loss": 0.5118501782417297, | |
| "eval_runtime": 536.8454, | |
| "eval_samples_per_second": 16.668, | |
| "eval_steps_per_second": 2.084, | |
| "eval_wer": 0.4575246579700923, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.3529245682556115, | |
| "grad_norm": 2.209596872329712, | |
| "learning_rate": 8.858872809158622e-06, | |
| "loss": 0.6829, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.3529245682556115, | |
| "eval_loss": 0.4285117983818054, | |
| "eval_runtime": 539.6896, | |
| "eval_samples_per_second": 16.58, | |
| "eval_steps_per_second": 2.073, | |
| "eval_wer": 0.40392825696083584, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.470566091007482, | |
| "grad_norm": 2.2252326011657715, | |
| "learning_rate": 8.465348093790845e-06, | |
| "loss": 0.6078, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.470566091007482, | |
| "eval_loss": 0.38793477416038513, | |
| "eval_runtime": 547.0046, | |
| "eval_samples_per_second": 16.358, | |
| "eval_steps_per_second": 2.046, | |
| "eval_wer": 0.3664437028760849, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5882076137593525, | |
| "grad_norm": 2.7398128509521484, | |
| "learning_rate": 8.071784010204244e-06, | |
| "loss": 0.5561, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.5882076137593525, | |
| "eval_loss": 0.3593791723251343, | |
| "eval_runtime": 551.125, | |
| "eval_samples_per_second": 16.236, | |
| "eval_steps_per_second": 2.03, | |
| "eval_wer": 0.34282162650112097, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.705849136511223, | |
| "grad_norm": 2.960217237472534, | |
| "learning_rate": 7.678259294836465e-06, | |
| "loss": 0.5168, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.705849136511223, | |
| "eval_loss": 0.33377397060394287, | |
| "eval_runtime": 546.4831, | |
| "eval_samples_per_second": 16.374, | |
| "eval_steps_per_second": 2.048, | |
| "eval_wer": 0.3202835389089079, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.8234906592630935, | |
| "grad_norm": 2.531191349029541, | |
| "learning_rate": 7.284734579468687e-06, | |
| "loss": 0.499, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.8234906592630935, | |
| "eval_loss": 0.3143016993999481, | |
| "eval_runtime": 550.3802, | |
| "eval_samples_per_second": 16.258, | |
| "eval_steps_per_second": 2.033, | |
| "eval_wer": 0.30924387157877603, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.941132182014964, | |
| "grad_norm": 2.0980894565582275, | |
| "learning_rate": 6.891131127663261e-06, | |
| "loss": 0.4768, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.941132182014964, | |
| "eval_loss": 0.3023754954338074, | |
| "eval_runtime": 546.9538, | |
| "eval_samples_per_second": 16.36, | |
| "eval_steps_per_second": 2.046, | |
| "eval_wer": 0.29365367113334173, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.0587737047668344, | |
| "grad_norm": 1.7364046573638916, | |
| "learning_rate": 6.497606412295483e-06, | |
| "loss": 0.4529, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.0587737047668344, | |
| "eval_loss": 0.29402047395706177, | |
| "eval_runtime": 544.3376, | |
| "eval_samples_per_second": 16.438, | |
| "eval_steps_per_second": 2.056, | |
| "eval_wer": 0.2827878859629002, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.176415227518705, | |
| "grad_norm": 3.4313299655914307, | |
| "learning_rate": 6.1040816969277054e-06, | |
| "loss": 0.44, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.176415227518705, | |
| "eval_loss": 0.2909528613090515, | |
| "eval_runtime": 543.2034, | |
| "eval_samples_per_second": 16.473, | |
| "eval_steps_per_second": 2.06, | |
| "eval_wer": 0.27463762217996435, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.2940567502705755, | |
| "grad_norm": 1.1621043682098389, | |
| "learning_rate": 5.710478245122278e-06, | |
| "loss": 0.4264, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.2940567502705755, | |
| "eval_loss": 0.2814837098121643, | |
| "eval_runtime": 547.8907, | |
| "eval_samples_per_second": 16.332, | |
| "eval_steps_per_second": 2.042, | |
| "eval_wer": 0.2683001724023115, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.4116982730224459, | |
| "grad_norm": 2.507716178894043, | |
| "learning_rate": 5.316914161535675e-06, | |
| "loss": 0.4189, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.4116982730224459, | |
| "eval_loss": 0.27175650000572205, | |
| "eval_runtime": 548.1953, | |
| "eval_samples_per_second": 16.323, | |
| "eval_steps_per_second": 2.041, | |
| "eval_wer": 0.26368304611946813, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.5293397957743164, | |
| "grad_norm": 2.3356528282165527, | |
| "learning_rate": 4.923428814386722e-06, | |
| "loss": 0.4052, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.5293397957743164, | |
| "eval_loss": 0.2673029899597168, | |
| "eval_runtime": 544.6822, | |
| "eval_samples_per_second": 16.428, | |
| "eval_steps_per_second": 2.054, | |
| "eval_wer": 0.25851467639420195, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.646981318526187, | |
| "grad_norm": 2.476557970046997, | |
| "learning_rate": 4.529864730800119e-06, | |
| "loss": 0.4044, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.646981318526187, | |
| "eval_loss": 0.26591917872428894, | |
| "eval_runtime": 545.1182, | |
| "eval_samples_per_second": 16.415, | |
| "eval_steps_per_second": 2.053, | |
| "eval_wer": 0.2534720937631799, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.7646228412780576, | |
| "grad_norm": 1.8470633029937744, | |
| "learning_rate": 4.136418751869991e-06, | |
| "loss": 0.4046, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.7646228412780576, | |
| "eval_loss": 0.2603091299533844, | |
| "eval_runtime": 545.5751, | |
| "eval_samples_per_second": 16.401, | |
| "eval_steps_per_second": 2.051, | |
| "eval_wer": 0.24952459877616556, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.8822643640299281, | |
| "grad_norm": 2.6165308952331543, | |
| "learning_rate": 3.742815300064564e-06, | |
| "loss": 0.3944, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.8822643640299281, | |
| "eval_loss": 0.2575734555721283, | |
| "eval_runtime": 545.6155, | |
| "eval_samples_per_second": 16.4, | |
| "eval_steps_per_second": 2.051, | |
| "eval_wer": 0.24606545368445198, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.9999058867817985, | |
| "grad_norm": 2.0350422859191895, | |
| "learning_rate": 3.3492512164779615e-06, | |
| "loss": 0.3876, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.9999058867817985, | |
| "eval_loss": 0.25538763403892517, | |
| "eval_runtime": 543.8449, | |
| "eval_samples_per_second": 16.453, | |
| "eval_steps_per_second": 2.058, | |
| "eval_wer": 0.24299106911630866, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 2.117547409533669, | |
| "grad_norm": 2.234062433242798, | |
| "learning_rate": 2.955726501110184e-06, | |
| "loss": 0.3836, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 2.117547409533669, | |
| "eval_loss": 0.25172147154808044, | |
| "eval_runtime": 543.9926, | |
| "eval_samples_per_second": 16.449, | |
| "eval_steps_per_second": 2.057, | |
| "eval_wer": 0.24226224389377649, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 2.2351889322855394, | |
| "grad_norm": 2.7782626152038574, | |
| "learning_rate": 2.5621624175235817e-06, | |
| "loss": 0.3767, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.2351889322855394, | |
| "eval_loss": 0.2503082752227783, | |
| "eval_runtime": 546.7703, | |
| "eval_samples_per_second": 16.365, | |
| "eval_steps_per_second": 2.047, | |
| "eval_wer": 0.23942833465286462, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.35283045503741, | |
| "grad_norm": 2.365490198135376, | |
| "learning_rate": 2.1686770703746284e-06, | |
| "loss": 0.3738, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.35283045503741, | |
| "eval_loss": 0.24804162979125977, | |
| "eval_runtime": 545.3342, | |
| "eval_samples_per_second": 16.408, | |
| "eval_steps_per_second": 2.052, | |
| "eval_wer": 0.23909166919474062, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.4704719777892805, | |
| "grad_norm": 3.105099678039551, | |
| "learning_rate": 1.7751523550068502e-06, | |
| "loss": 0.3707, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 2.4704719777892805, | |
| "eval_loss": 0.24506914615631104, | |
| "eval_runtime": 547.7088, | |
| "eval_samples_per_second": 16.337, | |
| "eval_steps_per_second": 2.043, | |
| "eval_wer": 0.23790409104025928, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 2.588113500541151, | |
| "grad_norm": 0.9898041486740112, | |
| "learning_rate": 1.3815882714202481e-06, | |
| "loss": 0.3649, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 2.588113500541151, | |
| "eval_loss": 0.24370211362838745, | |
| "eval_runtime": 547.4398, | |
| "eval_samples_per_second": 16.345, | |
| "eval_steps_per_second": 2.044, | |
| "eval_wer": 0.23637614780723498, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 2.7057550232930216, | |
| "grad_norm": 4.53593635559082, | |
| "learning_rate": 9.880241878336459e-07, | |
| "loss": 0.369, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 2.7057550232930216, | |
| "eval_loss": 0.24421393871307373, | |
| "eval_runtime": 549.6273, | |
| "eval_samples_per_second": 16.28, | |
| "eval_steps_per_second": 2.036, | |
| "eval_wer": 0.23538094991453876, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 2.8233965460448918, | |
| "grad_norm": 3.995215654373169, | |
| "learning_rate": 5.945388406846922e-07, | |
| "loss": 0.3608, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 2.8233965460448918, | |
| "eval_loss": 0.24260137975215912, | |
| "eval_runtime": 548.4399, | |
| "eval_samples_per_second": 16.315, | |
| "eval_steps_per_second": 2.04, | |
| "eval_wer": 0.23452633759776248, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 2.9410380687967628, | |
| "grad_norm": 1.9774836301803589, | |
| "learning_rate": 2.0097475709808989e-07, | |
| "loss": 0.3644, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 2.9410380687967628, | |
| "eval_loss": 0.24260272085666656, | |
| "eval_runtime": 548.9964, | |
| "eval_samples_per_second": 16.299, | |
| "eval_steps_per_second": 2.038, | |
| "eval_wer": 0.234019489600367, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 255012, | |
| "total_flos": 2.7896646019563717e+20, | |
| "train_loss": 0.5826510110653187, | |
| "train_runtime": 84804.3792, | |
| "train_samples_per_second": 6.014, | |
| "train_steps_per_second": 3.007 | |
| } | |
| ], | |
| "logging_steps": 10000, | |
| "max_steps": 255012, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.7896646019563717e+20, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |