{ "best_metric": 0.9826419535157399, "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1335", "epoch": 7.972769953051643, "eval_steps": 500, "global_step": 2128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03755868544600939, "grad_norm": 2.0377416610717773, "learning_rate": 1.4084507042253521e-06, "loss": 3.8687, "step": 10 }, { "epoch": 0.07511737089201878, "grad_norm": 3.055781602859497, "learning_rate": 2.8169014084507042e-06, "loss": 4.1156, "step": 20 }, { "epoch": 0.11267605633802817, "grad_norm": 3.383268356323242, "learning_rate": 4.225352112676057e-06, "loss": 4.0885, "step": 30 }, { "epoch": 0.15023474178403756, "grad_norm": 3.8566606044769287, "learning_rate": 5.6338028169014084e-06, "loss": 3.9316, "step": 40 }, { "epoch": 0.18779342723004694, "grad_norm": 5.065456867218018, "learning_rate": 7.042253521126761e-06, "loss": 3.6474, "step": 50 }, { "epoch": 0.22535211267605634, "grad_norm": 5.89341926574707, "learning_rate": 8.450704225352114e-06, "loss": 3.2124, "step": 60 }, { "epoch": 0.26291079812206575, "grad_norm": 5.9929399490356445, "learning_rate": 9.859154929577466e-06, "loss": 2.756, "step": 70 }, { "epoch": 0.3004694835680751, "grad_norm": 5.689433574676514, "learning_rate": 1.1267605633802817e-05, "loss": 2.4596, "step": 80 }, { "epoch": 0.3380281690140845, "grad_norm": 4.89589262008667, "learning_rate": 1.267605633802817e-05, "loss": 2.2638, "step": 90 }, { "epoch": 0.3755868544600939, "grad_norm": 4.8666839599609375, "learning_rate": 1.4084507042253522e-05, "loss": 2.1166, "step": 100 }, { "epoch": 0.4131455399061033, "grad_norm": 4.466708660125732, "learning_rate": 1.5492957746478876e-05, "loss": 2.0048, "step": 110 }, { "epoch": 0.4507042253521127, "grad_norm": 3.676050901412964, "learning_rate": 1.6901408450704228e-05, "loss": 1.9138, "step": 120 }, { "epoch": 0.48826291079812206, "grad_norm": 2.183825731277466, "learning_rate": 1.830985915492958e-05, "loss": 1.863, "step": 130 }, { "epoch": 0.5258215962441315, "grad_norm": 2.075413465499878, "learning_rate": 1.9718309859154933e-05, "loss": 1.7616, "step": 140 }, { "epoch": 0.5633802816901409, "grad_norm": 0.8534318208694458, "learning_rate": 2.112676056338028e-05, "loss": 1.7185, "step": 150 }, { "epoch": 0.6009389671361502, "grad_norm": 0.9039830565452576, "learning_rate": 2.2535211267605634e-05, "loss": 1.8054, "step": 160 }, { "epoch": 0.6384976525821596, "grad_norm": 1.32124662399292, "learning_rate": 2.3943661971830986e-05, "loss": 1.7367, "step": 170 }, { "epoch": 0.676056338028169, "grad_norm": 1.232069969177246, "learning_rate": 2.535211267605634e-05, "loss": 1.7423, "step": 180 }, { "epoch": 0.7136150234741784, "grad_norm": 1.9570960998535156, "learning_rate": 2.676056338028169e-05, "loss": 1.6132, "step": 190 }, { "epoch": 0.7511737089201878, "grad_norm": 2.4463119506835938, "learning_rate": 2.8169014084507043e-05, "loss": 1.6099, "step": 200 }, { "epoch": 0.7887323943661971, "grad_norm": 6.601908206939697, "learning_rate": 2.9577464788732395e-05, "loss": 1.6043, "step": 210 }, { "epoch": 0.8262910798122066, "grad_norm": 3.225101947784424, "learning_rate": 2.989033942558747e-05, "loss": 1.5621, "step": 220 }, { "epoch": 0.863849765258216, "grad_norm": 3.698263645172119, "learning_rate": 2.9733681462140994e-05, "loss": 1.514, "step": 230 }, { "epoch": 0.9014084507042254, "grad_norm": 5.209756374359131, "learning_rate": 2.9577023498694518e-05, "loss": 1.4532, "step": 240 }, { "epoch": 0.9389671361502347, "grad_norm": 2.1304848194122314, "learning_rate": 2.9420365535248042e-05, "loss": 1.4312, "step": 250 }, { "epoch": 0.9765258215962441, "grad_norm": 4.837350368499756, "learning_rate": 2.926370757180157e-05, "loss": 1.3624, "step": 260 }, { "epoch": 1.0, "eval_accuracy": 0.6546042953809944, "eval_loss": 1.19585382938385, "eval_runtime": 4.9178, "eval_samples_per_second": 1382.328, "eval_steps_per_second": 43.312, "step": 267 }, { "epoch": 1.0112676056338028, "grad_norm": 4.779292106628418, "learning_rate": 2.9107049608355094e-05, "loss": 1.2541, "step": 270 }, { "epoch": 1.0488262910798123, "grad_norm": 3.60760498046875, "learning_rate": 2.8950391644908618e-05, "loss": 1.2271, "step": 280 }, { "epoch": 1.0863849765258216, "grad_norm": 2.3788599967956543, "learning_rate": 2.8793733681462142e-05, "loss": 1.2335, "step": 290 }, { "epoch": 1.123943661971831, "grad_norm": 3.353325843811035, "learning_rate": 2.8637075718015666e-05, "loss": 1.1613, "step": 300 }, { "epoch": 1.1615023474178403, "grad_norm": 4.326411247253418, "learning_rate": 2.8480417754569193e-05, "loss": 1.0754, "step": 310 }, { "epoch": 1.1990610328638498, "grad_norm": 3.1939706802368164, "learning_rate": 2.8323759791122717e-05, "loss": 1.0353, "step": 320 }, { "epoch": 1.236619718309859, "grad_norm": 2.8827011585235596, "learning_rate": 2.816710182767624e-05, "loss": 0.9806, "step": 330 }, { "epoch": 1.2741784037558685, "grad_norm": 3.910698652267456, "learning_rate": 2.8010443864229766e-05, "loss": 1.0813, "step": 340 }, { "epoch": 1.3117370892018778, "grad_norm": 3.5916378498077393, "learning_rate": 2.7853785900783293e-05, "loss": 0.9792, "step": 350 }, { "epoch": 1.3492957746478873, "grad_norm": 2.6981167793273926, "learning_rate": 2.7697127937336817e-05, "loss": 0.9231, "step": 360 }, { "epoch": 1.3868544600938968, "grad_norm": 5.702897071838379, "learning_rate": 2.754046997389034e-05, "loss": 0.9435, "step": 370 }, { "epoch": 1.424413145539906, "grad_norm": 4.622363090515137, "learning_rate": 2.7383812010443865e-05, "loss": 0.8449, "step": 380 }, { "epoch": 1.4619718309859155, "grad_norm": 2.2103636264801025, "learning_rate": 2.7227154046997393e-05, "loss": 0.7713, "step": 390 }, { "epoch": 1.4995305164319248, "grad_norm": 4.545182228088379, "learning_rate": 2.7070496083550917e-05, "loss": 0.7719, "step": 400 }, { "epoch": 1.5370892018779343, "grad_norm": 6.883026599884033, "learning_rate": 2.691383812010444e-05, "loss": 0.7564, "step": 410 }, { "epoch": 1.5746478873239438, "grad_norm": 4.770920276641846, "learning_rate": 2.6757180156657965e-05, "loss": 0.6994, "step": 420 }, { "epoch": 1.612206572769953, "grad_norm": 4.413459300994873, "learning_rate": 2.660052219321149e-05, "loss": 0.6313, "step": 430 }, { "epoch": 1.6497652582159623, "grad_norm": 2.0261390209198, "learning_rate": 2.6443864229765013e-05, "loss": 0.6017, "step": 440 }, { "epoch": 1.6873239436619718, "grad_norm": 5.67121696472168, "learning_rate": 2.6287206266318537e-05, "loss": 0.5792, "step": 450 }, { "epoch": 1.7248826291079813, "grad_norm": 2.573594808578491, "learning_rate": 2.6146214099216712e-05, "loss": 0.545, "step": 460 }, { "epoch": 1.7624413145539906, "grad_norm": 4.145854949951172, "learning_rate": 2.5989556135770236e-05, "loss": 0.4907, "step": 470 }, { "epoch": 1.8, "grad_norm": 1.7418975830078125, "learning_rate": 2.583289817232376e-05, "loss": 0.485, "step": 480 }, { "epoch": 1.8375586854460093, "grad_norm": 4.651867866516113, "learning_rate": 2.5676240208877287e-05, "loss": 0.4572, "step": 490 }, { "epoch": 1.8751173708920188, "grad_norm": 4.849829196929932, "learning_rate": 2.551958224543081e-05, "loss": 0.4864, "step": 500 }, { "epoch": 1.9126760563380283, "grad_norm": 2.631229877471924, "learning_rate": 2.5362924281984335e-05, "loss": 0.4035, "step": 510 }, { "epoch": 1.9502347417840376, "grad_norm": 5.099828243255615, "learning_rate": 2.520626631853786e-05, "loss": 0.3818, "step": 520 }, { "epoch": 1.9877934272300468, "grad_norm": 3.25174617767334, "learning_rate": 2.5049608355091387e-05, "loss": 0.3854, "step": 530 }, { "epoch": 2.0, "eval_accuracy": 0.9733745219182113, "eval_loss": 0.2675245702266693, "eval_runtime": 5.0739, "eval_samples_per_second": 1339.801, "eval_steps_per_second": 41.98, "step": 534 }, { "epoch": 2.0225352112676056, "grad_norm": 4.533545017242432, "learning_rate": 2.489295039164491e-05, "loss": 0.3589, "step": 540 }, { "epoch": 2.060093896713615, "grad_norm": 4.4245991706848145, "learning_rate": 2.4736292428198435e-05, "loss": 0.378, "step": 550 }, { "epoch": 2.0976525821596246, "grad_norm": 5.778880596160889, "learning_rate": 2.457963446475196e-05, "loss": 0.3653, "step": 560 }, { "epoch": 2.1352112676056336, "grad_norm": 3.5573890209198, "learning_rate": 2.4422976501305487e-05, "loss": 0.3107, "step": 570 }, { "epoch": 2.172769953051643, "grad_norm": 3.655824899673462, "learning_rate": 2.426631853785901e-05, "loss": 0.3405, "step": 580 }, { "epoch": 2.2103286384976526, "grad_norm": 2.430022954940796, "learning_rate": 2.4109660574412535e-05, "loss": 0.3298, "step": 590 }, { "epoch": 2.247887323943662, "grad_norm": 2.9207568168640137, "learning_rate": 2.3953002610966055e-05, "loss": 0.3022, "step": 600 }, { "epoch": 2.2854460093896716, "grad_norm": 4.8787007331848145, "learning_rate": 2.3796344647519583e-05, "loss": 0.3991, "step": 610 }, { "epoch": 2.3230046948356806, "grad_norm": 3.0268468856811523, "learning_rate": 2.3639686684073107e-05, "loss": 0.3159, "step": 620 }, { "epoch": 2.36056338028169, "grad_norm": 2.6611557006835938, "learning_rate": 2.348302872062663e-05, "loss": 0.2868, "step": 630 }, { "epoch": 2.3981220657276996, "grad_norm": 2.485551595687866, "learning_rate": 2.3326370757180155e-05, "loss": 0.3032, "step": 640 }, { "epoch": 2.435680751173709, "grad_norm": 4.556153297424316, "learning_rate": 2.316971279373368e-05, "loss": 0.2985, "step": 650 }, { "epoch": 2.473239436619718, "grad_norm": 5.270796298980713, "learning_rate": 2.3013054830287207e-05, "loss": 0.2839, "step": 660 }, { "epoch": 2.5107981220657276, "grad_norm": 3.347005844116211, "learning_rate": 2.285639686684073e-05, "loss": 0.2871, "step": 670 }, { "epoch": 2.548356807511737, "grad_norm": 5.236591815948486, "learning_rate": 2.2699738903394255e-05, "loss": 0.3028, "step": 680 }, { "epoch": 2.5859154929577466, "grad_norm": 2.995059013366699, "learning_rate": 2.254308093994778e-05, "loss": 0.2537, "step": 690 }, { "epoch": 2.6234741784037556, "grad_norm": 2.805640459060669, "learning_rate": 2.2386422976501306e-05, "loss": 0.297, "step": 700 }, { "epoch": 2.661032863849765, "grad_norm": 3.0646071434020996, "learning_rate": 2.222976501305483e-05, "loss": 0.2453, "step": 710 }, { "epoch": 2.6985915492957746, "grad_norm": 3.6719613075256348, "learning_rate": 2.2073107049608354e-05, "loss": 0.2655, "step": 720 }, { "epoch": 2.736150234741784, "grad_norm": 3.2248122692108154, "learning_rate": 2.191644908616188e-05, "loss": 0.2297, "step": 730 }, { "epoch": 2.7737089201877936, "grad_norm": 3.769843578338623, "learning_rate": 2.1759791122715406e-05, "loss": 0.2548, "step": 740 }, { "epoch": 2.8112676056338026, "grad_norm": 3.6679906845092773, "learning_rate": 2.160313315926893e-05, "loss": 0.2836, "step": 750 }, { "epoch": 2.848826291079812, "grad_norm": 1.6924936771392822, "learning_rate": 2.1446475195822454e-05, "loss": 0.2555, "step": 760 }, { "epoch": 2.8863849765258216, "grad_norm": 2.1275901794433594, "learning_rate": 2.1289817232375978e-05, "loss": 0.2334, "step": 770 }, { "epoch": 2.923943661971831, "grad_norm": 6.528135299682617, "learning_rate": 2.1133159268929506e-05, "loss": 0.2544, "step": 780 }, { "epoch": 2.9615023474178406, "grad_norm": 2.4497199058532715, "learning_rate": 2.097650130548303e-05, "loss": 0.2628, "step": 790 }, { "epoch": 2.9990610328638496, "grad_norm": 2.278947591781616, "learning_rate": 2.0819843342036554e-05, "loss": 0.2473, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.9767578699617535, "eval_loss": 0.1461225152015686, "eval_runtime": 5.0057, "eval_samples_per_second": 1358.045, "eval_steps_per_second": 42.551, "step": 801 }, { "epoch": 3.0338028169014084, "grad_norm": 3.1185402870178223, "learning_rate": 2.0663185378590078e-05, "loss": 0.2245, "step": 810 }, { "epoch": 3.071361502347418, "grad_norm": 2.456102132797241, "learning_rate": 2.0506527415143602e-05, "loss": 0.2423, "step": 820 }, { "epoch": 3.1089201877934274, "grad_norm": 2.9463231563568115, "learning_rate": 2.034986945169713e-05, "loss": 0.2274, "step": 830 }, { "epoch": 3.1464788732394364, "grad_norm": 3.5940473079681396, "learning_rate": 2.0193211488250653e-05, "loss": 0.2368, "step": 840 }, { "epoch": 3.184037558685446, "grad_norm": 4.721577167510986, "learning_rate": 2.0036553524804177e-05, "loss": 0.2554, "step": 850 }, { "epoch": 3.2215962441314554, "grad_norm": 2.496495485305786, "learning_rate": 1.98798955613577e-05, "loss": 0.2363, "step": 860 }, { "epoch": 3.259154929577465, "grad_norm": 3.0665740966796875, "learning_rate": 1.972323759791123e-05, "loss": 0.2248, "step": 870 }, { "epoch": 3.2967136150234744, "grad_norm": 4.336172580718994, "learning_rate": 1.9566579634464753e-05, "loss": 0.1922, "step": 880 }, { "epoch": 3.3342723004694834, "grad_norm": 4.110763072967529, "learning_rate": 1.9409921671018277e-05, "loss": 0.1965, "step": 890 }, { "epoch": 3.371830985915493, "grad_norm": 1.9457247257232666, "learning_rate": 1.92532637075718e-05, "loss": 0.2258, "step": 900 }, { "epoch": 3.4093896713615024, "grad_norm": 2.719369411468506, "learning_rate": 1.909660574412533e-05, "loss": 0.2184, "step": 910 }, { "epoch": 3.446948356807512, "grad_norm": 3.438279151916504, "learning_rate": 1.8939947780678853e-05, "loss": 0.1964, "step": 920 }, { "epoch": 3.4845070422535214, "grad_norm": 3.2813045978546143, "learning_rate": 1.8783289817232377e-05, "loss": 0.2348, "step": 930 }, { "epoch": 3.5220657276995304, "grad_norm": 4.151478290557861, "learning_rate": 1.86266318537859e-05, "loss": 0.2004, "step": 940 }, { "epoch": 3.55962441314554, "grad_norm": 3.4271771907806396, "learning_rate": 1.8469973890339425e-05, "loss": 0.2039, "step": 950 }, { "epoch": 3.5971830985915494, "grad_norm": 4.0341901779174805, "learning_rate": 1.8313315926892952e-05, "loss": 0.1997, "step": 960 }, { "epoch": 3.6347417840375584, "grad_norm": 4.762091636657715, "learning_rate": 1.8156657963446476e-05, "loss": 0.2153, "step": 970 }, { "epoch": 3.672300469483568, "grad_norm": 3.3214402198791504, "learning_rate": 1.8e-05, "loss": 0.1801, "step": 980 }, { "epoch": 3.7098591549295774, "grad_norm": 3.84503173828125, "learning_rate": 1.7843342036553525e-05, "loss": 0.2106, "step": 990 }, { "epoch": 3.747417840375587, "grad_norm": 3.303781747817993, "learning_rate": 1.7686684073107052e-05, "loss": 0.1965, "step": 1000 }, { "epoch": 3.7849765258215964, "grad_norm": 2.691159248352051, "learning_rate": 1.7530026109660576e-05, "loss": 0.193, "step": 1010 }, { "epoch": 3.8225352112676054, "grad_norm": 4.134768009185791, "learning_rate": 1.73733681462141e-05, "loss": 0.1908, "step": 1020 }, { "epoch": 3.860093896713615, "grad_norm": 2.9195241928100586, "learning_rate": 1.7216710182767624e-05, "loss": 0.1886, "step": 1030 }, { "epoch": 3.8976525821596244, "grad_norm": 3.795133352279663, "learning_rate": 1.706005221932115e-05, "loss": 0.2007, "step": 1040 }, { "epoch": 3.935211267605634, "grad_norm": 3.9436607360839844, "learning_rate": 1.6903394255874676e-05, "loss": 0.1834, "step": 1050 }, { "epoch": 3.9727699530516434, "grad_norm": 3.4115564823150635, "learning_rate": 1.67467362924282e-05, "loss": 0.1997, "step": 1060 }, { "epoch": 4.0, "eval_accuracy": 0.980435422182995, "eval_loss": 0.10877315700054169, "eval_runtime": 4.9191, "eval_samples_per_second": 1381.955, "eval_steps_per_second": 43.3, "step": 1068 }, { "epoch": 4.007511737089202, "grad_norm": 5.121041774749756, "learning_rate": 1.6590078328981724e-05, "loss": 0.1785, "step": 1070 }, { "epoch": 4.045070422535211, "grad_norm": 2.908527374267578, "learning_rate": 1.643342036553525e-05, "loss": 0.1678, "step": 1080 }, { "epoch": 4.08262910798122, "grad_norm": 1.9687402248382568, "learning_rate": 1.6276762402088775e-05, "loss": 0.192, "step": 1090 }, { "epoch": 4.12018779342723, "grad_norm": 2.722937822341919, "learning_rate": 1.61201044386423e-05, "loss": 0.1983, "step": 1100 }, { "epoch": 4.157746478873239, "grad_norm": 2.3741490840911865, "learning_rate": 1.5963446475195823e-05, "loss": 0.2145, "step": 1110 }, { "epoch": 4.195305164319249, "grad_norm": 2.653414011001587, "learning_rate": 1.5806788511749348e-05, "loss": 0.1701, "step": 1120 }, { "epoch": 4.232863849765258, "grad_norm": 3.444087266921997, "learning_rate": 1.5650130548302875e-05, "loss": 0.2047, "step": 1130 }, { "epoch": 4.270422535211267, "grad_norm": 2.024235486984253, "learning_rate": 1.54934725848564e-05, "loss": 0.1817, "step": 1140 }, { "epoch": 4.307981220657277, "grad_norm": 2.742171049118042, "learning_rate": 1.533681462140992e-05, "loss": 0.1723, "step": 1150 }, { "epoch": 4.345539906103286, "grad_norm": 3.3700480461120605, "learning_rate": 1.5180156657963446e-05, "loss": 0.17, "step": 1160 }, { "epoch": 4.383098591549296, "grad_norm": 2.552915573120117, "learning_rate": 1.5023498694516973e-05, "loss": 0.1802, "step": 1170 }, { "epoch": 4.420657276995305, "grad_norm": 3.3317511081695557, "learning_rate": 1.4866840731070497e-05, "loss": 0.1933, "step": 1180 }, { "epoch": 4.458215962441314, "grad_norm": 1.9266548156738281, "learning_rate": 1.4710182767624021e-05, "loss": 0.1739, "step": 1190 }, { "epoch": 4.495774647887324, "grad_norm": 2.1459243297576904, "learning_rate": 1.4553524804177547e-05, "loss": 0.1599, "step": 1200 }, { "epoch": 4.533333333333333, "grad_norm": 3.9314770698547363, "learning_rate": 1.4396866840731071e-05, "loss": 0.1958, "step": 1210 }, { "epoch": 4.570892018779343, "grad_norm": 2.6377363204956055, "learning_rate": 1.4240208877284597e-05, "loss": 0.1604, "step": 1220 }, { "epoch": 4.608450704225352, "grad_norm": 2.810866594314575, "learning_rate": 1.408355091383812e-05, "loss": 0.1495, "step": 1230 }, { "epoch": 4.646009389671361, "grad_norm": 2.2084455490112305, "learning_rate": 1.3926892950391646e-05, "loss": 0.185, "step": 1240 }, { "epoch": 4.683568075117371, "grad_norm": 2.7217283248901367, "learning_rate": 1.377023498694517e-05, "loss": 0.1757, "step": 1250 }, { "epoch": 4.72112676056338, "grad_norm": 3.075267791748047, "learning_rate": 1.3613577023498696e-05, "loss": 0.1814, "step": 1260 }, { "epoch": 4.758685446009389, "grad_norm": 3.2452406883239746, "learning_rate": 1.345691906005222e-05, "loss": 0.1622, "step": 1270 }, { "epoch": 4.796244131455399, "grad_norm": 2.712754487991333, "learning_rate": 1.3300261096605744e-05, "loss": 0.1714, "step": 1280 }, { "epoch": 4.833802816901408, "grad_norm": 1.6795600652694702, "learning_rate": 1.3143603133159269e-05, "loss": 0.1519, "step": 1290 }, { "epoch": 4.871361502347418, "grad_norm": 3.9085493087768555, "learning_rate": 1.2986945169712793e-05, "loss": 0.1758, "step": 1300 }, { "epoch": 4.908920187793427, "grad_norm": 3.529478073120117, "learning_rate": 1.2830287206266318e-05, "loss": 0.1549, "step": 1310 }, { "epoch": 4.946478873239436, "grad_norm": 2.559157609939575, "learning_rate": 1.2673629242819842e-05, "loss": 0.1824, "step": 1320 }, { "epoch": 4.984037558685446, "grad_norm": 2.2350497245788574, "learning_rate": 1.2516971279373368e-05, "loss": 0.1723, "step": 1330 }, { "epoch": 5.0, "eval_accuracy": 0.9826419535157399, "eval_loss": 0.09542840719223022, "eval_runtime": 5.0389, "eval_samples_per_second": 1349.105, "eval_steps_per_second": 42.271, "step": 1335 }, { "epoch": 5.018779342723005, "grad_norm": 2.5073907375335693, "learning_rate": 1.2360313315926892e-05, "loss": 0.1401, "step": 1340 }, { "epoch": 5.056338028169014, "grad_norm": 4.696757793426514, "learning_rate": 1.2203655352480418e-05, "loss": 0.1801, "step": 1350 }, { "epoch": 5.093896713615023, "grad_norm": 1.2180489301681519, "learning_rate": 1.2046997389033942e-05, "loss": 0.1335, "step": 1360 }, { "epoch": 5.131455399061033, "grad_norm": 0.887860119342804, "learning_rate": 1.1890339425587468e-05, "loss": 0.1479, "step": 1370 }, { "epoch": 5.169014084507042, "grad_norm": 3.6347432136535645, "learning_rate": 1.1733681462140992e-05, "loss": 0.1575, "step": 1380 }, { "epoch": 5.206572769953052, "grad_norm": 2.901700496673584, "learning_rate": 1.1577023498694518e-05, "loss": 0.1367, "step": 1390 }, { "epoch": 5.244131455399061, "grad_norm": 2.6395390033721924, "learning_rate": 1.1420365535248042e-05, "loss": 0.144, "step": 1400 }, { "epoch": 5.28169014084507, "grad_norm": 3.923652172088623, "learning_rate": 1.1263707571801567e-05, "loss": 0.1576, "step": 1410 }, { "epoch": 5.31924882629108, "grad_norm": 2.290224313735962, "learning_rate": 1.1107049608355092e-05, "loss": 0.16, "step": 1420 }, { "epoch": 5.356807511737089, "grad_norm": 2.332317590713501, "learning_rate": 1.0950391644908617e-05, "loss": 0.1505, "step": 1430 }, { "epoch": 5.394366197183099, "grad_norm": 3.474155902862549, "learning_rate": 1.0793733681462141e-05, "loss": 0.1828, "step": 1440 }, { "epoch": 5.431924882629108, "grad_norm": 2.5219180583953857, "learning_rate": 1.0637075718015665e-05, "loss": 0.1563, "step": 1450 }, { "epoch": 5.469483568075117, "grad_norm": 4.863851547241211, "learning_rate": 1.0480417754569191e-05, "loss": 0.1308, "step": 1460 }, { "epoch": 5.507042253521127, "grad_norm": 4.817688941955566, "learning_rate": 1.0323759791122715e-05, "loss": 0.1757, "step": 1470 }, { "epoch": 5.544600938967136, "grad_norm": 3.194732189178467, "learning_rate": 1.0167101827676241e-05, "loss": 0.1577, "step": 1480 }, { "epoch": 5.582159624413146, "grad_norm": 3.6605474948883057, "learning_rate": 1.0010443864229765e-05, "loss": 0.2044, "step": 1490 }, { "epoch": 5.619718309859155, "grad_norm": 2.427701473236084, "learning_rate": 9.853785900783291e-06, "loss": 0.1574, "step": 1500 }, { "epoch": 5.657276995305164, "grad_norm": 2.8025519847869873, "learning_rate": 9.697127937336815e-06, "loss": 0.188, "step": 1510 }, { "epoch": 5.694835680751174, "grad_norm": 2.042407989501953, "learning_rate": 9.54046997389034e-06, "loss": 0.1639, "step": 1520 }, { "epoch": 5.732394366197183, "grad_norm": 4.5383477210998535, "learning_rate": 9.383812010443865e-06, "loss": 0.1641, "step": 1530 }, { "epoch": 5.769953051643192, "grad_norm": 2.919588804244995, "learning_rate": 9.22715404699739e-06, "loss": 0.1374, "step": 1540 }, { "epoch": 5.807511737089202, "grad_norm": 2.4344029426574707, "learning_rate": 9.070496083550915e-06, "loss": 0.1711, "step": 1550 }, { "epoch": 5.845070422535211, "grad_norm": 1.5614906549453735, "learning_rate": 8.913838120104439e-06, "loss": 0.1624, "step": 1560 }, { "epoch": 5.882629107981221, "grad_norm": 3.0189967155456543, "learning_rate": 8.757180156657963e-06, "loss": 0.1691, "step": 1570 }, { "epoch": 5.92018779342723, "grad_norm": 2.44000506401062, "learning_rate": 8.600522193211488e-06, "loss": 0.1513, "step": 1580 }, { "epoch": 5.957746478873239, "grad_norm": 2.4327423572540283, "learning_rate": 8.443864229765013e-06, "loss": 0.1538, "step": 1590 }, { "epoch": 5.995305164319249, "grad_norm": 2.1192240715026855, "learning_rate": 8.287206266318538e-06, "loss": 0.1442, "step": 1600 }, { "epoch": 6.0, "eval_accuracy": 0.981318034716093, "eval_loss": 0.09270217269659042, "eval_runtime": 4.8524, "eval_samples_per_second": 1400.961, "eval_steps_per_second": 43.896, "step": 1602 }, { "epoch": 6.030046948356808, "grad_norm": 1.8678548336029053, "learning_rate": 8.130548302872062e-06, "loss": 0.1328, "step": 1610 }, { "epoch": 6.067605633802817, "grad_norm": 3.0712783336639404, "learning_rate": 7.973890339425586e-06, "loss": 0.1543, "step": 1620 }, { "epoch": 6.105164319248826, "grad_norm": 4.49588680267334, "learning_rate": 7.817232375979112e-06, "loss": 0.1452, "step": 1630 }, { "epoch": 6.142723004694836, "grad_norm": 3.9594759941101074, "learning_rate": 7.660574412532636e-06, "loss": 0.1513, "step": 1640 }, { "epoch": 6.180281690140845, "grad_norm": 2.528153657913208, "learning_rate": 7.503916449086162e-06, "loss": 0.1589, "step": 1650 }, { "epoch": 6.217840375586855, "grad_norm": 2.159458875656128, "learning_rate": 7.347258485639687e-06, "loss": 0.1443, "step": 1660 }, { "epoch": 6.255399061032864, "grad_norm": 2.098022222518921, "learning_rate": 7.190600522193212e-06, "loss": 0.1564, "step": 1670 }, { "epoch": 6.292957746478873, "grad_norm": 1.993698239326477, "learning_rate": 7.033942558746737e-06, "loss": 0.1401, "step": 1680 }, { "epoch": 6.330516431924883, "grad_norm": 2.2639145851135254, "learning_rate": 6.877284595300262e-06, "loss": 0.1452, "step": 1690 }, { "epoch": 6.368075117370892, "grad_norm": 2.5003936290740967, "learning_rate": 6.720626631853786e-06, "loss": 0.1439, "step": 1700 }, { "epoch": 6.405633802816902, "grad_norm": 2.0841052532196045, "learning_rate": 6.563968668407311e-06, "loss": 0.1438, "step": 1710 }, { "epoch": 6.443192488262911, "grad_norm": 3.550182819366455, "learning_rate": 6.4073107049608355e-06, "loss": 0.1433, "step": 1720 }, { "epoch": 6.48075117370892, "grad_norm": 1.4857251644134521, "learning_rate": 6.2506527415143605e-06, "loss": 0.1404, "step": 1730 }, { "epoch": 6.51830985915493, "grad_norm": 3.503309726715088, "learning_rate": 6.093994778067885e-06, "loss": 0.1493, "step": 1740 }, { "epoch": 6.555868544600939, "grad_norm": 3.59545636177063, "learning_rate": 5.93733681462141e-06, "loss": 0.1563, "step": 1750 }, { "epoch": 6.593427230046949, "grad_norm": 2.879582405090332, "learning_rate": 5.780678851174934e-06, "loss": 0.122, "step": 1760 }, { "epoch": 6.630985915492958, "grad_norm": 1.7240543365478516, "learning_rate": 5.624020887728459e-06, "loss": 0.1404, "step": 1770 }, { "epoch": 6.668544600938967, "grad_norm": 3.0438528060913086, "learning_rate": 5.467362924281984e-06, "loss": 0.1432, "step": 1780 }, { "epoch": 6.706103286384977, "grad_norm": 2.496366024017334, "learning_rate": 5.310704960835509e-06, "loss": 0.1277, "step": 1790 }, { "epoch": 6.743661971830986, "grad_norm": 1.7166277170181274, "learning_rate": 5.154046997389034e-06, "loss": 0.143, "step": 1800 }, { "epoch": 6.781220657276995, "grad_norm": 2.4547784328460693, "learning_rate": 4.997389033942559e-06, "loss": 0.1198, "step": 1810 }, { "epoch": 6.818779342723005, "grad_norm": 2.604220390319824, "learning_rate": 4.840731070496084e-06, "loss": 0.1705, "step": 1820 }, { "epoch": 6.856338028169014, "grad_norm": 2.7237601280212402, "learning_rate": 4.684073107049609e-06, "loss": 0.1506, "step": 1830 }, { "epoch": 6.893896713615024, "grad_norm": 2.638058662414551, "learning_rate": 4.527415143603134e-06, "loss": 0.154, "step": 1840 }, { "epoch": 6.931455399061033, "grad_norm": 3.8382205963134766, "learning_rate": 4.3707571801566586e-06, "loss": 0.1553, "step": 1850 }, { "epoch": 6.969014084507043, "grad_norm": 2.071164131164551, "learning_rate": 4.2140992167101835e-06, "loss": 0.1397, "step": 1860 }, { "epoch": 7.0, "eval_accuracy": 0.9811709326272433, "eval_loss": 0.08920056372880936, "eval_runtime": 4.9166, "eval_samples_per_second": 1382.662, "eval_steps_per_second": 43.323, "step": 1869 }, { "epoch": 7.003755868544601, "grad_norm": 2.5346381664276123, "learning_rate": 4.0574412532637075e-06, "loss": 0.1296, "step": 1870 }, { "epoch": 7.041314553990611, "grad_norm": 2.575307846069336, "learning_rate": 3.9007832898172325e-06, "loss": 0.1389, "step": 1880 }, { "epoch": 7.07887323943662, "grad_norm": 2.0408527851104736, "learning_rate": 3.7441253263707574e-06, "loss": 0.1521, "step": 1890 }, { "epoch": 7.1164319248826295, "grad_norm": 3.2742061614990234, "learning_rate": 3.5874673629242823e-06, "loss": 0.1342, "step": 1900 }, { "epoch": 7.153990610328639, "grad_norm": 1.4502960443496704, "learning_rate": 3.4308093994778068e-06, "loss": 0.1204, "step": 1910 }, { "epoch": 7.191549295774648, "grad_norm": 3.7600743770599365, "learning_rate": 3.2741514360313317e-06, "loss": 0.1431, "step": 1920 }, { "epoch": 7.229107981220658, "grad_norm": 2.7332417964935303, "learning_rate": 3.1174934725848566e-06, "loss": 0.1281, "step": 1930 }, { "epoch": 7.266666666666667, "grad_norm": 2.6618921756744385, "learning_rate": 2.960835509138381e-06, "loss": 0.141, "step": 1940 }, { "epoch": 7.304225352112676, "grad_norm": 3.625688314437866, "learning_rate": 2.804177545691906e-06, "loss": 0.1455, "step": 1950 }, { "epoch": 7.341784037558686, "grad_norm": 2.0667765140533447, "learning_rate": 2.647519582245431e-06, "loss": 0.1359, "step": 1960 }, { "epoch": 7.379342723004695, "grad_norm": 2.369652509689331, "learning_rate": 2.490861618798956e-06, "loss": 0.1295, "step": 1970 }, { "epoch": 7.416901408450705, "grad_norm": 3.836838722229004, "learning_rate": 2.3342036553524807e-06, "loss": 0.1489, "step": 1980 }, { "epoch": 7.454460093896714, "grad_norm": 3.3261311054229736, "learning_rate": 2.1775456919060052e-06, "loss": 0.1289, "step": 1990 }, { "epoch": 7.492018779342723, "grad_norm": 2.6514954566955566, "learning_rate": 2.0208877284595297e-06, "loss": 0.1185, "step": 2000 }, { "epoch": 7.529577464788733, "grad_norm": 2.1017005443573, "learning_rate": 1.8642297650130548e-06, "loss": 0.1472, "step": 2010 }, { "epoch": 7.567136150234742, "grad_norm": 2.5104258060455322, "learning_rate": 1.7075718015665795e-06, "loss": 0.1467, "step": 2020 }, { "epoch": 7.6046948356807516, "grad_norm": 1.7915935516357422, "learning_rate": 1.5509138381201045e-06, "loss": 0.1212, "step": 2030 }, { "epoch": 7.642253521126761, "grad_norm": 2.4937989711761475, "learning_rate": 1.3942558746736294e-06, "loss": 0.1395, "step": 2040 }, { "epoch": 7.67981220657277, "grad_norm": 2.758594274520874, "learning_rate": 1.237597911227154e-06, "loss": 0.1361, "step": 2050 }, { "epoch": 7.71737089201878, "grad_norm": 2.291672468185425, "learning_rate": 1.0809399477806788e-06, "loss": 0.1182, "step": 2060 }, { "epoch": 7.754929577464789, "grad_norm": 1.944736361503601, "learning_rate": 9.242819843342037e-07, "loss": 0.1307, "step": 2070 }, { "epoch": 7.792488262910798, "grad_norm": 1.448411226272583, "learning_rate": 7.676240208877285e-07, "loss": 0.1407, "step": 2080 }, { "epoch": 7.830046948356808, "grad_norm": 3.276000499725342, "learning_rate": 6.109660574412533e-07, "loss": 0.1361, "step": 2090 }, { "epoch": 7.867605633802817, "grad_norm": 3.627788543701172, "learning_rate": 4.5430809399477806e-07, "loss": 0.131, "step": 2100 }, { "epoch": 7.905164319248827, "grad_norm": 1.2533661127090454, "learning_rate": 2.9765013054830287e-07, "loss": 0.1245, "step": 2110 }, { "epoch": 7.942723004694836, "grad_norm": 1.472484827041626, "learning_rate": 1.409921671018277e-07, "loss": 0.1368, "step": 2120 }, { "epoch": 7.972769953051643, "eval_accuracy": 0.9811709326272433, "eval_loss": 0.08957477658987045, "eval_runtime": 5.418, "eval_samples_per_second": 1254.697, "eval_steps_per_second": 39.313, "step": 2128 }, { "epoch": 7.972769953051643, "step": 2128, "total_flos": 3.767900833756416e+18, "train_loss": 0.5178930132572812, "train_runtime": 756.2923, "train_samples_per_second": 540.468, "train_steps_per_second": 2.814 } ], "logging_steps": 10, "max_steps": 2128, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.767900833756416e+18, "train_batch_size": 48, "trial_name": null, "trial_params": null }