| { |
| "best_global_step": 1000, |
| "best_metric": 22.084805653710244, |
| "best_model_checkpoint": "./SALAMA_NEWMEDTMUB/checkpoint-1000", |
| "epoch": 0.7312614259597806, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007312614259597806, |
| "grad_norm": 23.786584854125977, |
| "learning_rate": 1.8e-07, |
| "loss": 0.338, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.014625228519195612, |
| "grad_norm": 25.875110626220703, |
| "learning_rate": 3.8e-07, |
| "loss": 0.4003, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.021937842778793418, |
| "grad_norm": 20.586557388305664, |
| "learning_rate": 5.800000000000001e-07, |
| "loss": 0.5275, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.029250457038391225, |
| "grad_norm": 22.56281089782715, |
| "learning_rate": 7.8e-07, |
| "loss": 0.3006, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03656307129798903, |
| "grad_norm": 28.27590560913086, |
| "learning_rate": 9.800000000000001e-07, |
| "loss": 0.2741, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.043875685557586835, |
| "grad_norm": 15.167706489562988, |
| "learning_rate": 1.1800000000000001e-06, |
| "loss": 0.4286, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.051188299817184646, |
| "grad_norm": 17.841941833496094, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 0.3639, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05850091407678245, |
| "grad_norm": 1.7901560068130493, |
| "learning_rate": 1.5800000000000001e-06, |
| "loss": 0.2633, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06581352833638025, |
| "grad_norm": 23.4942569732666, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 0.4025, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07312614259597806, |
| "grad_norm": 15.08558464050293, |
| "learning_rate": 1.98e-06, |
| "loss": 0.6375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08043875685557587, |
| "grad_norm": 8.504720687866211, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 0.2601, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08775137111517367, |
| "grad_norm": 11.795039176940918, |
| "learning_rate": 2.38e-06, |
| "loss": 0.4155, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09506398537477148, |
| "grad_norm": 4.088447570800781, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 0.3254, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10237659963436929, |
| "grad_norm": 9.273443222045898, |
| "learning_rate": 2.7800000000000005e-06, |
| "loss": 0.6213, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10968921389396709, |
| "grad_norm": 10.197525978088379, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.2383, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1170018281535649, |
| "grad_norm": 32.452735900878906, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 0.4707, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12431444241316271, |
| "grad_norm": 12.96947193145752, |
| "learning_rate": 3.3800000000000007e-06, |
| "loss": 0.2176, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1316270566727605, |
| "grad_norm": 7.767789840698242, |
| "learning_rate": 3.58e-06, |
| "loss": 0.2673, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.13893967093235832, |
| "grad_norm": 13.503886222839355, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.4111, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.14625228519195613, |
| "grad_norm": 12.849893569946289, |
| "learning_rate": 3.980000000000001e-06, |
| "loss": 0.3538, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15356489945155394, |
| "grad_norm": 20.808692932128906, |
| "learning_rate": 4.18e-06, |
| "loss": 0.3639, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.16087751371115175, |
| "grad_norm": 6.0378007888793945, |
| "learning_rate": 4.38e-06, |
| "loss": 0.514, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16819012797074953, |
| "grad_norm": 10.26975154876709, |
| "learning_rate": 4.58e-06, |
| "loss": 0.3015, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.17550274223034734, |
| "grad_norm": 17.779497146606445, |
| "learning_rate": 4.78e-06, |
| "loss": 0.2099, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18281535648994515, |
| "grad_norm": 10.017502784729004, |
| "learning_rate": 4.980000000000001e-06, |
| "loss": 0.1943, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19012797074954296, |
| "grad_norm": 23.421777725219727, |
| "learning_rate": 5.18e-06, |
| "loss": 0.2958, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.19744058500914077, |
| "grad_norm": 11.93268871307373, |
| "learning_rate": 5.380000000000001e-06, |
| "loss": 0.3723, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.20475319926873858, |
| "grad_norm": 5.4110612869262695, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 0.2661, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.21206581352833637, |
| "grad_norm": 17.327363967895508, |
| "learning_rate": 5.78e-06, |
| "loss": 0.436, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21937842778793418, |
| "grad_norm": 6.25701379776001, |
| "learning_rate": 5.98e-06, |
| "loss": 0.2156, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.226691042047532, |
| "grad_norm": 13.935582160949707, |
| "learning_rate": 6.18e-06, |
| "loss": 0.598, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2340036563071298, |
| "grad_norm": 6.627614974975586, |
| "learning_rate": 6.380000000000001e-06, |
| "loss": 0.3433, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2413162705667276, |
| "grad_norm": 6.431783676147461, |
| "learning_rate": 6.5800000000000005e-06, |
| "loss": 0.1368, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24862888482632542, |
| "grad_norm": 23.54817771911621, |
| "learning_rate": 6.780000000000001e-06, |
| "loss": 0.2473, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.25594149908592323, |
| "grad_norm": 8.785611152648926, |
| "learning_rate": 6.98e-06, |
| "loss": 0.2112, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.263254113345521, |
| "grad_norm": 6.616166591644287, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 0.1278, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.27056672760511885, |
| "grad_norm": 3.7033286094665527, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.3054, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.27787934186471663, |
| "grad_norm": 13.525383949279785, |
| "learning_rate": 7.58e-06, |
| "loss": 0.4384, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2851919561243144, |
| "grad_norm": 7.937963962554932, |
| "learning_rate": 7.78e-06, |
| "loss": 0.0957, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.29250457038391225, |
| "grad_norm": 16.69182777404785, |
| "learning_rate": 7.980000000000002e-06, |
| "loss": 0.2025, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.29981718464351004, |
| "grad_norm": 8.825018882751465, |
| "learning_rate": 8.18e-06, |
| "loss": 0.3476, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3071297989031079, |
| "grad_norm": 18.566797256469727, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.4193, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.31444241316270566, |
| "grad_norm": 5.76287317276001, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.2035, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3217550274223035, |
| "grad_norm": 8.784618377685547, |
| "learning_rate": 8.78e-06, |
| "loss": 0.1575, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3290676416819013, |
| "grad_norm": 16.30363655090332, |
| "learning_rate": 8.98e-06, |
| "loss": 0.471, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.33638025594149906, |
| "grad_norm": 2.915956735610962, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.182, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3436928702010969, |
| "grad_norm": 3.9583890438079834, |
| "learning_rate": 9.38e-06, |
| "loss": 0.1509, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3510054844606947, |
| "grad_norm": 9.183399200439453, |
| "learning_rate": 9.58e-06, |
| "loss": 0.2354, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3583180987202925, |
| "grad_norm": 10.724503517150879, |
| "learning_rate": 9.780000000000001e-06, |
| "loss": 0.1361, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3656307129798903, |
| "grad_norm": 9.822811126708984, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.237, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.37294332723948814, |
| "grad_norm": 15.446513175964355, |
| "learning_rate": 9.95974955277281e-06, |
| "loss": 0.6261, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3802559414990859, |
| "grad_norm": 5.505027770996094, |
| "learning_rate": 9.915026833631485e-06, |
| "loss": 0.3904, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3875685557586837, |
| "grad_norm": 7.490746021270752, |
| "learning_rate": 9.870304114490162e-06, |
| "loss": 0.3183, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.39488117001828155, |
| "grad_norm": 6.62838888168335, |
| "learning_rate": 9.825581395348838e-06, |
| "loss": 0.3496, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.40219378427787933, |
| "grad_norm": 17.81728744506836, |
| "learning_rate": 9.780858676207515e-06, |
| "loss": 0.3502, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.40950639853747717, |
| "grad_norm": 9.13936710357666, |
| "learning_rate": 9.73613595706619e-06, |
| "loss": 0.5141, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.41681901279707495, |
| "grad_norm": 3.2332348823547363, |
| "learning_rate": 9.691413237924867e-06, |
| "loss": 0.2679, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.42413162705667273, |
| "grad_norm": 8.26561450958252, |
| "learning_rate": 9.646690518783543e-06, |
| "loss": 0.3905, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.43144424131627057, |
| "grad_norm": 6.408196449279785, |
| "learning_rate": 9.601967799642218e-06, |
| "loss": 0.2694, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.43875685557586835, |
| "grad_norm": 12.754035949707031, |
| "learning_rate": 9.557245080500895e-06, |
| "loss": 0.4731, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4460694698354662, |
| "grad_norm": 0.6065673232078552, |
| "learning_rate": 9.512522361359572e-06, |
| "loss": 0.2143, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.453382084095064, |
| "grad_norm": 4.9976582527160645, |
| "learning_rate": 9.467799642218248e-06, |
| "loss": 0.4713, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4606946983546618, |
| "grad_norm": 6.202505588531494, |
| "learning_rate": 9.423076923076923e-06, |
| "loss": 0.1564, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4680073126142596, |
| "grad_norm": 49.993919372558594, |
| "learning_rate": 9.3783542039356e-06, |
| "loss": 0.1725, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4753199268738574, |
| "grad_norm": 6.155549049377441, |
| "learning_rate": 9.333631484794277e-06, |
| "loss": 0.19, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4826325411334552, |
| "grad_norm": 10.210041046142578, |
| "learning_rate": 9.288908765652953e-06, |
| "loss": 0.1782, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.489945155393053, |
| "grad_norm": 3.6081888675689697, |
| "learning_rate": 9.244186046511628e-06, |
| "loss": 0.337, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.49725776965265084, |
| "grad_norm": 12.385890007019043, |
| "learning_rate": 9.199463327370305e-06, |
| "loss": 0.2003, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5045703839122486, |
| "grad_norm": 7.23034143447876, |
| "learning_rate": 9.15474060822898e-06, |
| "loss": 0.1712, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5118829981718465, |
| "grad_norm": 4.523240566253662, |
| "learning_rate": 9.110017889087658e-06, |
| "loss": 0.3199, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5191956124314442, |
| "grad_norm": 10.353597640991211, |
| "learning_rate": 9.065295169946333e-06, |
| "loss": 0.3007, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.526508226691042, |
| "grad_norm": 5.710986137390137, |
| "learning_rate": 9.02057245080501e-06, |
| "loss": 0.148, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5338208409506399, |
| "grad_norm": 8.321440696716309, |
| "learning_rate": 8.975849731663686e-06, |
| "loss": 0.3764, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5411334552102377, |
| "grad_norm": 6.329477787017822, |
| "learning_rate": 8.931127012522363e-06, |
| "loss": 0.1774, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5484460694698354, |
| "grad_norm": 14.216864585876465, |
| "learning_rate": 8.886404293381038e-06, |
| "loss": 0.2678, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5557586837294333, |
| "grad_norm": 6.65015172958374, |
| "learning_rate": 8.841681574239714e-06, |
| "loss": 0.2138, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5630712979890311, |
| "grad_norm": 17.47846221923828, |
| "learning_rate": 8.79695885509839e-06, |
| "loss": 0.2223, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5703839122486288, |
| "grad_norm": 11.677774429321289, |
| "learning_rate": 8.752236135957068e-06, |
| "loss": 0.332, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5776965265082267, |
| "grad_norm": 12.348886489868164, |
| "learning_rate": 8.707513416815743e-06, |
| "loss": 0.2077, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5850091407678245, |
| "grad_norm": 9.304800033569336, |
| "learning_rate": 8.662790697674419e-06, |
| "loss": 0.1048, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5923217550274223, |
| "grad_norm": 6.786717414855957, |
| "learning_rate": 8.618067978533096e-06, |
| "loss": 0.2946, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5996343692870201, |
| "grad_norm": 7.69470739364624, |
| "learning_rate": 8.573345259391773e-06, |
| "loss": 0.2642, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6069469835466179, |
| "grad_norm": 4.253036975860596, |
| "learning_rate": 8.528622540250448e-06, |
| "loss": 0.3286, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6142595978062158, |
| "grad_norm": 8.26134967803955, |
| "learning_rate": 8.483899821109124e-06, |
| "loss": 0.0926, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6215722120658135, |
| "grad_norm": 14.105083465576172, |
| "learning_rate": 8.4391771019678e-06, |
| "loss": 0.2227, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6288848263254113, |
| "grad_norm": 9.787266731262207, |
| "learning_rate": 8.394454382826476e-06, |
| "loss": 0.2463, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6361974405850092, |
| "grad_norm": 7.089901447296143, |
| "learning_rate": 8.349731663685151e-06, |
| "loss": 0.4785, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.643510054844607, |
| "grad_norm": 8.751269340515137, |
| "learning_rate": 8.305008944543829e-06, |
| "loss": 0.4554, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6508226691042047, |
| "grad_norm": 12.375998497009277, |
| "learning_rate": 8.260286225402506e-06, |
| "loss": 0.1926, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6581352833638026, |
| "grad_norm": 2.7685928344726562, |
| "learning_rate": 8.215563506261181e-06, |
| "loss": 0.3606, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6654478976234004, |
| "grad_norm": 1.846977949142456, |
| "learning_rate": 8.170840787119858e-06, |
| "loss": 0.1449, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6727605118829981, |
| "grad_norm": 6.678775310516357, |
| "learning_rate": 8.126118067978534e-06, |
| "loss": 0.1799, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.680073126142596, |
| "grad_norm": 5.570348262786865, |
| "learning_rate": 8.08139534883721e-06, |
| "loss": 0.2333, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6873857404021938, |
| "grad_norm": 7.157234191894531, |
| "learning_rate": 8.036672629695886e-06, |
| "loss": 0.2279, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6946983546617916, |
| "grad_norm": 5.400512218475342, |
| "learning_rate": 7.991949910554563e-06, |
| "loss": 0.1583, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7020109689213894, |
| "grad_norm": 11.138449668884277, |
| "learning_rate": 7.947227191413239e-06, |
| "loss": 0.3318, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7093235831809872, |
| "grad_norm": 6.359451770782471, |
| "learning_rate": 7.902504472271914e-06, |
| "loss": 0.2533, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.716636197440585, |
| "grad_norm": 5.201321601867676, |
| "learning_rate": 7.857781753130591e-06, |
| "loss": 0.2389, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7239488117001828, |
| "grad_norm": 9.445417404174805, |
| "learning_rate": 7.813059033989268e-06, |
| "loss": 0.32, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7312614259597806, |
| "grad_norm": 6.709265232086182, |
| "learning_rate": 7.768336314847944e-06, |
| "loss": 0.4202, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7312614259597806, |
| "eval_loss": 0.23008325695991516, |
| "eval_runtime": 17.9053, |
| "eval_samples_per_second": 3.965, |
| "eval_steps_per_second": 3.965, |
| "eval_wer": 22.084805653710244, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2736, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.08241963008e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|