{ "best_global_step": 600, "best_metric": 1.0189228529839884, "best_model_checkpoint": "./SALAMA_NEW99/checkpoint-600", "epoch": 0.9787928221859706, "eval_steps": 600, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01631321370309951, "grad_norm": 0.8075992465019226, "learning_rate": 1.8e-07, "loss": 0.0109, "step": 10 }, { "epoch": 0.03262642740619902, "grad_norm": 1.4262839555740356, "learning_rate": 3.8e-07, "loss": 0.0094, "step": 20 }, { "epoch": 0.048939641109298535, "grad_norm": 2.3042991161346436, "learning_rate": 5.800000000000001e-07, "loss": 0.0093, "step": 30 }, { "epoch": 0.06525285481239804, "grad_norm": 0.546521782875061, "learning_rate": 7.8e-07, "loss": 0.0218, "step": 40 }, { "epoch": 0.08156606851549755, "grad_norm": 2.1409292221069336, "learning_rate": 9.800000000000001e-07, "loss": 0.0077, "step": 50 }, { "epoch": 0.09787928221859707, "grad_norm": 1.9533841609954834, "learning_rate": 1.1800000000000001e-06, "loss": 0.0061, "step": 60 }, { "epoch": 0.11419249592169657, "grad_norm": 1.0335050821304321, "learning_rate": 1.3800000000000001e-06, "loss": 0.0245, "step": 70 }, { "epoch": 0.13050570962479607, "grad_norm": 1.158021330833435, "learning_rate": 1.5800000000000001e-06, "loss": 0.009, "step": 80 }, { "epoch": 0.1468189233278956, "grad_norm": 2.7232327461242676, "learning_rate": 1.7800000000000001e-06, "loss": 0.0197, "step": 90 }, { "epoch": 0.1631321370309951, "grad_norm": 1.2299067974090576, "learning_rate": 1.98e-06, "loss": 0.007, "step": 100 }, { "epoch": 0.17944535073409462, "grad_norm": 2.173428535461426, "learning_rate": 2.1800000000000003e-06, "loss": 0.0098, "step": 110 }, { "epoch": 0.19575856443719414, "grad_norm": 1.4795992374420166, "learning_rate": 2.38e-06, "loss": 0.0098, "step": 120 }, { "epoch": 0.21207177814029363, "grad_norm": 0.48007962107658386, "learning_rate": 2.5800000000000003e-06, "loss": 0.0258, "step": 130 }, { "epoch": 0.22838499184339314, "grad_norm": 0.5283637642860413, "learning_rate": 2.7800000000000005e-06, "loss": 0.0078, "step": 140 }, { "epoch": 0.24469820554649266, "grad_norm": 1.748868465423584, "learning_rate": 2.9800000000000003e-06, "loss": 0.0083, "step": 150 }, { "epoch": 0.26101141924959215, "grad_norm": 2.2862608432769775, "learning_rate": 3.1800000000000005e-06, "loss": 0.012, "step": 160 }, { "epoch": 0.27732463295269166, "grad_norm": 1.9614684581756592, "learning_rate": 3.3800000000000007e-06, "loss": 0.0093, "step": 170 }, { "epoch": 0.2936378466557912, "grad_norm": 0.2269970178604126, "learning_rate": 3.58e-06, "loss": 0.0079, "step": 180 }, { "epoch": 0.3099510603588907, "grad_norm": 3.722224712371826, "learning_rate": 3.7800000000000002e-06, "loss": 0.0123, "step": 190 }, { "epoch": 0.3262642740619902, "grad_norm": 1.0197768211364746, "learning_rate": 3.980000000000001e-06, "loss": 0.0126, "step": 200 }, { "epoch": 0.3425774877650897, "grad_norm": 2.238046884536743, "learning_rate": 4.18e-06, "loss": 0.0155, "step": 210 }, { "epoch": 0.35889070146818924, "grad_norm": 2.2540082931518555, "learning_rate": 4.38e-06, "loss": 0.0064, "step": 220 }, { "epoch": 0.37520391517128876, "grad_norm": 4.272034168243408, "learning_rate": 4.58e-06, "loss": 0.0144, "step": 230 }, { "epoch": 0.3915171288743883, "grad_norm": 1.297784447669983, "learning_rate": 4.78e-06, "loss": 0.0183, "step": 240 }, { "epoch": 0.4078303425774878, "grad_norm": 1.1175105571746826, "learning_rate": 4.980000000000001e-06, "loss": 0.01, "step": 250 }, { "epoch": 0.42414355628058725, "grad_norm": 0.989632248878479, "learning_rate": 5.18e-06, "loss": 0.0185, "step": 260 }, { "epoch": 0.44045676998368677, "grad_norm": 2.2630860805511475, "learning_rate": 5.380000000000001e-06, "loss": 0.004, "step": 270 }, { "epoch": 0.4567699836867863, "grad_norm": 3.4459376335144043, "learning_rate": 5.580000000000001e-06, "loss": 0.0338, "step": 280 }, { "epoch": 0.4730831973898858, "grad_norm": 2.0962679386138916, "learning_rate": 5.78e-06, "loss": 0.0078, "step": 290 }, { "epoch": 0.4893964110929853, "grad_norm": 1.109755277633667, "learning_rate": 5.98e-06, "loss": 0.0133, "step": 300 }, { "epoch": 0.5057096247960848, "grad_norm": 1.8200187683105469, "learning_rate": 6.18e-06, "loss": 0.0128, "step": 310 }, { "epoch": 0.5220228384991843, "grad_norm": 0.8147674798965454, "learning_rate": 6.380000000000001e-06, "loss": 0.0139, "step": 320 }, { "epoch": 0.5383360522022839, "grad_norm": 2.6240575313568115, "learning_rate": 6.5800000000000005e-06, "loss": 0.0153, "step": 330 }, { "epoch": 0.5546492659053833, "grad_norm": 0.49103260040283203, "learning_rate": 6.780000000000001e-06, "loss": 0.0119, "step": 340 }, { "epoch": 0.5709624796084829, "grad_norm": 1.4433389902114868, "learning_rate": 6.98e-06, "loss": 0.0169, "step": 350 }, { "epoch": 0.5872756933115824, "grad_norm": 1.6671804189682007, "learning_rate": 7.180000000000001e-06, "loss": 0.0103, "step": 360 }, { "epoch": 0.6035889070146819, "grad_norm": 1.7344307899475098, "learning_rate": 7.3800000000000005e-06, "loss": 0.0234, "step": 370 }, { "epoch": 0.6199021207177814, "grad_norm": 1.4996588230133057, "learning_rate": 7.58e-06, "loss": 0.0138, "step": 380 }, { "epoch": 0.636215334420881, "grad_norm": 1.5783976316452026, "learning_rate": 7.78e-06, "loss": 0.0219, "step": 390 }, { "epoch": 0.6525285481239804, "grad_norm": 0.5455562472343445, "learning_rate": 7.980000000000002e-06, "loss": 0.0113, "step": 400 }, { "epoch": 0.6688417618270799, "grad_norm": 1.6277507543563843, "learning_rate": 8.18e-06, "loss": 0.018, "step": 410 }, { "epoch": 0.6851549755301795, "grad_norm": 2.473850965499878, "learning_rate": 8.380000000000001e-06, "loss": 0.0224, "step": 420 }, { "epoch": 0.7014681892332789, "grad_norm": 1.4439480304718018, "learning_rate": 8.580000000000001e-06, "loss": 0.0156, "step": 430 }, { "epoch": 0.7177814029363785, "grad_norm": 0.7241881489753723, "learning_rate": 8.78e-06, "loss": 0.0131, "step": 440 }, { "epoch": 0.734094616639478, "grad_norm": 2.1885428428649902, "learning_rate": 8.98e-06, "loss": 0.019, "step": 450 }, { "epoch": 0.7504078303425775, "grad_norm": 2.702345371246338, "learning_rate": 9.180000000000002e-06, "loss": 0.0253, "step": 460 }, { "epoch": 0.766721044045677, "grad_norm": 3.975011110305786, "learning_rate": 9.38e-06, "loss": 0.0107, "step": 470 }, { "epoch": 0.7830342577487766, "grad_norm": 2.3299198150634766, "learning_rate": 9.58e-06, "loss": 0.0212, "step": 480 }, { "epoch": 0.799347471451876, "grad_norm": 1.317580223083496, "learning_rate": 9.780000000000001e-06, "loss": 0.0227, "step": 490 }, { "epoch": 0.8156606851549756, "grad_norm": 2.4023892879486084, "learning_rate": 9.980000000000001e-06, "loss": 0.0365, "step": 500 }, { "epoch": 0.831973898858075, "grad_norm": 2.641399383544922, "learning_rate": 9.932785660941001e-06, "loss": 0.0138, "step": 510 }, { "epoch": 0.8482871125611745, "grad_norm": 2.7177205085754395, "learning_rate": 9.858103061986558e-06, "loss": 0.0145, "step": 520 }, { "epoch": 0.8646003262642741, "grad_norm": 1.9215168952941895, "learning_rate": 9.783420463032114e-06, "loss": 0.0305, "step": 530 }, { "epoch": 0.8809135399673735, "grad_norm": 2.8683998584747314, "learning_rate": 9.708737864077671e-06, "loss": 0.0183, "step": 540 }, { "epoch": 0.8972267536704731, "grad_norm": 1.9078762531280518, "learning_rate": 9.634055265123227e-06, "loss": 0.0235, "step": 550 }, { "epoch": 0.9135399673735726, "grad_norm": 3.7367427349090576, "learning_rate": 9.559372666168784e-06, "loss": 0.0175, "step": 560 }, { "epoch": 0.9298531810766721, "grad_norm": 1.7846318483352661, "learning_rate": 9.48469006721434e-06, "loss": 0.0138, "step": 570 }, { "epoch": 0.9461663947797716, "grad_norm": 3.428025245666504, "learning_rate": 9.410007468259897e-06, "loss": 0.0168, "step": 580 }, { "epoch": 0.9624796084828712, "grad_norm": 2.601677417755127, "learning_rate": 9.335324869305452e-06, "loss": 0.0192, "step": 590 }, { "epoch": 0.9787928221859706, "grad_norm": 3.509207248687744, "learning_rate": 9.26064227035101e-06, "loss": 0.0177, "step": 600 }, { "epoch": 0.9787928221859706, "eval_loss": 0.011165497824549675, "eval_runtime": 1766.1438, "eval_samples_per_second": 2.775, "eval_steps_per_second": 0.347, "eval_wer": 1.0189228529839884, "step": 600 } ], "logging_steps": 10, "max_steps": 1839, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.540839686144e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }