| { | |
| "best_metric": 0.9826419535157399, | |
| "best_model_checkpoint": "wav2vec2-base-ft-keyword-spotting/checkpoint-1335", | |
| "epoch": 7.972769953051643, | |
| "eval_steps": 500, | |
| "global_step": 2128, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03755868544600939, | |
| "grad_norm": 2.0377416610717773, | |
| "learning_rate": 1.4084507042253521e-06, | |
| "loss": 3.8687, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07511737089201878, | |
| "grad_norm": 3.055781602859497, | |
| "learning_rate": 2.8169014084507042e-06, | |
| "loss": 4.1156, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11267605633802817, | |
| "grad_norm": 3.383268356323242, | |
| "learning_rate": 4.225352112676057e-06, | |
| "loss": 4.0885, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15023474178403756, | |
| "grad_norm": 3.8566606044769287, | |
| "learning_rate": 5.6338028169014084e-06, | |
| "loss": 3.9316, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18779342723004694, | |
| "grad_norm": 5.065456867218018, | |
| "learning_rate": 7.042253521126761e-06, | |
| "loss": 3.6474, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22535211267605634, | |
| "grad_norm": 5.89341926574707, | |
| "learning_rate": 8.450704225352114e-06, | |
| "loss": 3.2124, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26291079812206575, | |
| "grad_norm": 5.9929399490356445, | |
| "learning_rate": 9.859154929577466e-06, | |
| "loss": 2.756, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3004694835680751, | |
| "grad_norm": 5.689433574676514, | |
| "learning_rate": 1.1267605633802817e-05, | |
| "loss": 2.4596, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3380281690140845, | |
| "grad_norm": 4.89589262008667, | |
| "learning_rate": 1.267605633802817e-05, | |
| "loss": 2.2638, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3755868544600939, | |
| "grad_norm": 4.8666839599609375, | |
| "learning_rate": 1.4084507042253522e-05, | |
| "loss": 2.1166, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4131455399061033, | |
| "grad_norm": 4.466708660125732, | |
| "learning_rate": 1.5492957746478876e-05, | |
| "loss": 2.0048, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4507042253521127, | |
| "grad_norm": 3.676050901412964, | |
| "learning_rate": 1.6901408450704228e-05, | |
| "loss": 1.9138, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48826291079812206, | |
| "grad_norm": 2.183825731277466, | |
| "learning_rate": 1.830985915492958e-05, | |
| "loss": 1.863, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5258215962441315, | |
| "grad_norm": 2.075413465499878, | |
| "learning_rate": 1.9718309859154933e-05, | |
| "loss": 1.7616, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 0.8534318208694458, | |
| "learning_rate": 2.112676056338028e-05, | |
| "loss": 1.7185, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6009389671361502, | |
| "grad_norm": 0.9039830565452576, | |
| "learning_rate": 2.2535211267605634e-05, | |
| "loss": 1.8054, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6384976525821596, | |
| "grad_norm": 1.32124662399292, | |
| "learning_rate": 2.3943661971830986e-05, | |
| "loss": 1.7367, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.676056338028169, | |
| "grad_norm": 1.232069969177246, | |
| "learning_rate": 2.535211267605634e-05, | |
| "loss": 1.7423, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7136150234741784, | |
| "grad_norm": 1.9570960998535156, | |
| "learning_rate": 2.676056338028169e-05, | |
| "loss": 1.6132, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7511737089201878, | |
| "grad_norm": 2.4463119506835938, | |
| "learning_rate": 2.8169014084507043e-05, | |
| "loss": 1.6099, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7887323943661971, | |
| "grad_norm": 6.601908206939697, | |
| "learning_rate": 2.9577464788732395e-05, | |
| "loss": 1.6043, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8262910798122066, | |
| "grad_norm": 3.225101947784424, | |
| "learning_rate": 2.989033942558747e-05, | |
| "loss": 1.5621, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.863849765258216, | |
| "grad_norm": 3.698263645172119, | |
| "learning_rate": 2.9733681462140994e-05, | |
| "loss": 1.514, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9014084507042254, | |
| "grad_norm": 5.209756374359131, | |
| "learning_rate": 2.9577023498694518e-05, | |
| "loss": 1.4532, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9389671361502347, | |
| "grad_norm": 2.1304848194122314, | |
| "learning_rate": 2.9420365535248042e-05, | |
| "loss": 1.4312, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9765258215962441, | |
| "grad_norm": 4.837350368499756, | |
| "learning_rate": 2.926370757180157e-05, | |
| "loss": 1.3624, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6546042953809944, | |
| "eval_loss": 1.19585382938385, | |
| "eval_runtime": 4.9178, | |
| "eval_samples_per_second": 1382.328, | |
| "eval_steps_per_second": 43.312, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.0112676056338028, | |
| "grad_norm": 4.779292106628418, | |
| "learning_rate": 2.9107049608355094e-05, | |
| "loss": 1.2541, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0488262910798123, | |
| "grad_norm": 3.60760498046875, | |
| "learning_rate": 2.8950391644908618e-05, | |
| "loss": 1.2271, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0863849765258216, | |
| "grad_norm": 2.3788599967956543, | |
| "learning_rate": 2.8793733681462142e-05, | |
| "loss": 1.2335, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.123943661971831, | |
| "grad_norm": 3.353325843811035, | |
| "learning_rate": 2.8637075718015666e-05, | |
| "loss": 1.1613, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1615023474178403, | |
| "grad_norm": 4.326411247253418, | |
| "learning_rate": 2.8480417754569193e-05, | |
| "loss": 1.0754, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1990610328638498, | |
| "grad_norm": 3.1939706802368164, | |
| "learning_rate": 2.8323759791122717e-05, | |
| "loss": 1.0353, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.236619718309859, | |
| "grad_norm": 2.8827011585235596, | |
| "learning_rate": 2.816710182767624e-05, | |
| "loss": 0.9806, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2741784037558685, | |
| "grad_norm": 3.910698652267456, | |
| "learning_rate": 2.8010443864229766e-05, | |
| "loss": 1.0813, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3117370892018778, | |
| "grad_norm": 3.5916378498077393, | |
| "learning_rate": 2.7853785900783293e-05, | |
| "loss": 0.9792, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3492957746478873, | |
| "grad_norm": 2.6981167793273926, | |
| "learning_rate": 2.7697127937336817e-05, | |
| "loss": 0.9231, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3868544600938968, | |
| "grad_norm": 5.702897071838379, | |
| "learning_rate": 2.754046997389034e-05, | |
| "loss": 0.9435, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.424413145539906, | |
| "grad_norm": 4.622363090515137, | |
| "learning_rate": 2.7383812010443865e-05, | |
| "loss": 0.8449, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4619718309859155, | |
| "grad_norm": 2.2103636264801025, | |
| "learning_rate": 2.7227154046997393e-05, | |
| "loss": 0.7713, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4995305164319248, | |
| "grad_norm": 4.545182228088379, | |
| "learning_rate": 2.7070496083550917e-05, | |
| "loss": 0.7719, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5370892018779343, | |
| "grad_norm": 6.883026599884033, | |
| "learning_rate": 2.691383812010444e-05, | |
| "loss": 0.7564, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5746478873239438, | |
| "grad_norm": 4.770920276641846, | |
| "learning_rate": 2.6757180156657965e-05, | |
| "loss": 0.6994, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.612206572769953, | |
| "grad_norm": 4.413459300994873, | |
| "learning_rate": 2.660052219321149e-05, | |
| "loss": 0.6313, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6497652582159623, | |
| "grad_norm": 2.0261390209198, | |
| "learning_rate": 2.6443864229765013e-05, | |
| "loss": 0.6017, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6873239436619718, | |
| "grad_norm": 5.67121696472168, | |
| "learning_rate": 2.6287206266318537e-05, | |
| "loss": 0.5792, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.7248826291079813, | |
| "grad_norm": 2.573594808578491, | |
| "learning_rate": 2.6146214099216712e-05, | |
| "loss": 0.545, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7624413145539906, | |
| "grad_norm": 4.145854949951172, | |
| "learning_rate": 2.5989556135770236e-05, | |
| "loss": 0.4907, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.7418975830078125, | |
| "learning_rate": 2.583289817232376e-05, | |
| "loss": 0.485, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.8375586854460093, | |
| "grad_norm": 4.651867866516113, | |
| "learning_rate": 2.5676240208877287e-05, | |
| "loss": 0.4572, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.8751173708920188, | |
| "grad_norm": 4.849829196929932, | |
| "learning_rate": 2.551958224543081e-05, | |
| "loss": 0.4864, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.9126760563380283, | |
| "grad_norm": 2.631229877471924, | |
| "learning_rate": 2.5362924281984335e-05, | |
| "loss": 0.4035, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9502347417840376, | |
| "grad_norm": 5.099828243255615, | |
| "learning_rate": 2.520626631853786e-05, | |
| "loss": 0.3818, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.9877934272300468, | |
| "grad_norm": 3.25174617767334, | |
| "learning_rate": 2.5049608355091387e-05, | |
| "loss": 0.3854, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9733745219182113, | |
| "eval_loss": 0.2675245702266693, | |
| "eval_runtime": 5.0739, | |
| "eval_samples_per_second": 1339.801, | |
| "eval_steps_per_second": 41.98, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.0225352112676056, | |
| "grad_norm": 4.533545017242432, | |
| "learning_rate": 2.489295039164491e-05, | |
| "loss": 0.3589, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.060093896713615, | |
| "grad_norm": 4.4245991706848145, | |
| "learning_rate": 2.4736292428198435e-05, | |
| "loss": 0.378, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.0976525821596246, | |
| "grad_norm": 5.778880596160889, | |
| "learning_rate": 2.457963446475196e-05, | |
| "loss": 0.3653, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.1352112676056336, | |
| "grad_norm": 3.5573890209198, | |
| "learning_rate": 2.4422976501305487e-05, | |
| "loss": 0.3107, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.172769953051643, | |
| "grad_norm": 3.655824899673462, | |
| "learning_rate": 2.426631853785901e-05, | |
| "loss": 0.3405, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.2103286384976526, | |
| "grad_norm": 2.430022954940796, | |
| "learning_rate": 2.4109660574412535e-05, | |
| "loss": 0.3298, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.247887323943662, | |
| "grad_norm": 2.9207568168640137, | |
| "learning_rate": 2.3953002610966055e-05, | |
| "loss": 0.3022, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.2854460093896716, | |
| "grad_norm": 4.8787007331848145, | |
| "learning_rate": 2.3796344647519583e-05, | |
| "loss": 0.3991, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.3230046948356806, | |
| "grad_norm": 3.0268468856811523, | |
| "learning_rate": 2.3639686684073107e-05, | |
| "loss": 0.3159, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.36056338028169, | |
| "grad_norm": 2.6611557006835938, | |
| "learning_rate": 2.348302872062663e-05, | |
| "loss": 0.2868, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.3981220657276996, | |
| "grad_norm": 2.485551595687866, | |
| "learning_rate": 2.3326370757180155e-05, | |
| "loss": 0.3032, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.435680751173709, | |
| "grad_norm": 4.556153297424316, | |
| "learning_rate": 2.316971279373368e-05, | |
| "loss": 0.2985, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.473239436619718, | |
| "grad_norm": 5.270796298980713, | |
| "learning_rate": 2.3013054830287207e-05, | |
| "loss": 0.2839, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.5107981220657276, | |
| "grad_norm": 3.347005844116211, | |
| "learning_rate": 2.285639686684073e-05, | |
| "loss": 0.2871, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.548356807511737, | |
| "grad_norm": 5.236591815948486, | |
| "learning_rate": 2.2699738903394255e-05, | |
| "loss": 0.3028, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.5859154929577466, | |
| "grad_norm": 2.995059013366699, | |
| "learning_rate": 2.254308093994778e-05, | |
| "loss": 0.2537, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.6234741784037556, | |
| "grad_norm": 2.805640459060669, | |
| "learning_rate": 2.2386422976501306e-05, | |
| "loss": 0.297, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.661032863849765, | |
| "grad_norm": 3.0646071434020996, | |
| "learning_rate": 2.222976501305483e-05, | |
| "loss": 0.2453, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.6985915492957746, | |
| "grad_norm": 3.6719613075256348, | |
| "learning_rate": 2.2073107049608354e-05, | |
| "loss": 0.2655, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.736150234741784, | |
| "grad_norm": 3.2248122692108154, | |
| "learning_rate": 2.191644908616188e-05, | |
| "loss": 0.2297, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.7737089201877936, | |
| "grad_norm": 3.769843578338623, | |
| "learning_rate": 2.1759791122715406e-05, | |
| "loss": 0.2548, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.8112676056338026, | |
| "grad_norm": 3.6679906845092773, | |
| "learning_rate": 2.160313315926893e-05, | |
| "loss": 0.2836, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.848826291079812, | |
| "grad_norm": 1.6924936771392822, | |
| "learning_rate": 2.1446475195822454e-05, | |
| "loss": 0.2555, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.8863849765258216, | |
| "grad_norm": 2.1275901794433594, | |
| "learning_rate": 2.1289817232375978e-05, | |
| "loss": 0.2334, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.923943661971831, | |
| "grad_norm": 6.528135299682617, | |
| "learning_rate": 2.1133159268929506e-05, | |
| "loss": 0.2544, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.9615023474178406, | |
| "grad_norm": 2.4497199058532715, | |
| "learning_rate": 2.097650130548303e-05, | |
| "loss": 0.2628, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.9990610328638496, | |
| "grad_norm": 2.278947591781616, | |
| "learning_rate": 2.0819843342036554e-05, | |
| "loss": 0.2473, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9767578699617535, | |
| "eval_loss": 0.1461225152015686, | |
| "eval_runtime": 5.0057, | |
| "eval_samples_per_second": 1358.045, | |
| "eval_steps_per_second": 42.551, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.0338028169014084, | |
| "grad_norm": 3.1185402870178223, | |
| "learning_rate": 2.0663185378590078e-05, | |
| "loss": 0.2245, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.071361502347418, | |
| "grad_norm": 2.456102132797241, | |
| "learning_rate": 2.0506527415143602e-05, | |
| "loss": 0.2423, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.1089201877934274, | |
| "grad_norm": 2.9463231563568115, | |
| "learning_rate": 2.034986945169713e-05, | |
| "loss": 0.2274, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.1464788732394364, | |
| "grad_norm": 3.5940473079681396, | |
| "learning_rate": 2.0193211488250653e-05, | |
| "loss": 0.2368, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.184037558685446, | |
| "grad_norm": 4.721577167510986, | |
| "learning_rate": 2.0036553524804177e-05, | |
| "loss": 0.2554, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.2215962441314554, | |
| "grad_norm": 2.496495485305786, | |
| "learning_rate": 1.98798955613577e-05, | |
| "loss": 0.2363, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.259154929577465, | |
| "grad_norm": 3.0665740966796875, | |
| "learning_rate": 1.972323759791123e-05, | |
| "loss": 0.2248, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.2967136150234744, | |
| "grad_norm": 4.336172580718994, | |
| "learning_rate": 1.9566579634464753e-05, | |
| "loss": 0.1922, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.3342723004694834, | |
| "grad_norm": 4.110763072967529, | |
| "learning_rate": 1.9409921671018277e-05, | |
| "loss": 0.1965, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.371830985915493, | |
| "grad_norm": 1.9457247257232666, | |
| "learning_rate": 1.92532637075718e-05, | |
| "loss": 0.2258, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.4093896713615024, | |
| "grad_norm": 2.719369411468506, | |
| "learning_rate": 1.909660574412533e-05, | |
| "loss": 0.2184, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.446948356807512, | |
| "grad_norm": 3.438279151916504, | |
| "learning_rate": 1.8939947780678853e-05, | |
| "loss": 0.1964, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.4845070422535214, | |
| "grad_norm": 3.2813045978546143, | |
| "learning_rate": 1.8783289817232377e-05, | |
| "loss": 0.2348, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.5220657276995304, | |
| "grad_norm": 4.151478290557861, | |
| "learning_rate": 1.86266318537859e-05, | |
| "loss": 0.2004, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.55962441314554, | |
| "grad_norm": 3.4271771907806396, | |
| "learning_rate": 1.8469973890339425e-05, | |
| "loss": 0.2039, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.5971830985915494, | |
| "grad_norm": 4.0341901779174805, | |
| "learning_rate": 1.8313315926892952e-05, | |
| "loss": 0.1997, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.6347417840375584, | |
| "grad_norm": 4.762091636657715, | |
| "learning_rate": 1.8156657963446476e-05, | |
| "loss": 0.2153, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.672300469483568, | |
| "grad_norm": 3.3214402198791504, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.1801, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.7098591549295774, | |
| "grad_norm": 3.84503173828125, | |
| "learning_rate": 1.7843342036553525e-05, | |
| "loss": 0.2106, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.747417840375587, | |
| "grad_norm": 3.303781747817993, | |
| "learning_rate": 1.7686684073107052e-05, | |
| "loss": 0.1965, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.7849765258215964, | |
| "grad_norm": 2.691159248352051, | |
| "learning_rate": 1.7530026109660576e-05, | |
| "loss": 0.193, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.8225352112676054, | |
| "grad_norm": 4.134768009185791, | |
| "learning_rate": 1.73733681462141e-05, | |
| "loss": 0.1908, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.860093896713615, | |
| "grad_norm": 2.9195241928100586, | |
| "learning_rate": 1.7216710182767624e-05, | |
| "loss": 0.1886, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.8976525821596244, | |
| "grad_norm": 3.795133352279663, | |
| "learning_rate": 1.706005221932115e-05, | |
| "loss": 0.2007, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.935211267605634, | |
| "grad_norm": 3.9436607360839844, | |
| "learning_rate": 1.6903394255874676e-05, | |
| "loss": 0.1834, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.9727699530516434, | |
| "grad_norm": 3.4115564823150635, | |
| "learning_rate": 1.67467362924282e-05, | |
| "loss": 0.1997, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.980435422182995, | |
| "eval_loss": 0.10877315700054169, | |
| "eval_runtime": 4.9191, | |
| "eval_samples_per_second": 1381.955, | |
| "eval_steps_per_second": 43.3, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 4.007511737089202, | |
| "grad_norm": 5.121041774749756, | |
| "learning_rate": 1.6590078328981724e-05, | |
| "loss": 0.1785, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.045070422535211, | |
| "grad_norm": 2.908527374267578, | |
| "learning_rate": 1.643342036553525e-05, | |
| "loss": 0.1678, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.08262910798122, | |
| "grad_norm": 1.9687402248382568, | |
| "learning_rate": 1.6276762402088775e-05, | |
| "loss": 0.192, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.12018779342723, | |
| "grad_norm": 2.722937822341919, | |
| "learning_rate": 1.61201044386423e-05, | |
| "loss": 0.1983, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.157746478873239, | |
| "grad_norm": 2.3741490840911865, | |
| "learning_rate": 1.5963446475195823e-05, | |
| "loss": 0.2145, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.195305164319249, | |
| "grad_norm": 2.653414011001587, | |
| "learning_rate": 1.5806788511749348e-05, | |
| "loss": 0.1701, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.232863849765258, | |
| "grad_norm": 3.444087266921997, | |
| "learning_rate": 1.5650130548302875e-05, | |
| "loss": 0.2047, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.270422535211267, | |
| "grad_norm": 2.024235486984253, | |
| "learning_rate": 1.54934725848564e-05, | |
| "loss": 0.1817, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.307981220657277, | |
| "grad_norm": 2.742171049118042, | |
| "learning_rate": 1.533681462140992e-05, | |
| "loss": 0.1723, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.345539906103286, | |
| "grad_norm": 3.3700480461120605, | |
| "learning_rate": 1.5180156657963446e-05, | |
| "loss": 0.17, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.383098591549296, | |
| "grad_norm": 2.552915573120117, | |
| "learning_rate": 1.5023498694516973e-05, | |
| "loss": 0.1802, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.420657276995305, | |
| "grad_norm": 3.3317511081695557, | |
| "learning_rate": 1.4866840731070497e-05, | |
| "loss": 0.1933, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.458215962441314, | |
| "grad_norm": 1.9266548156738281, | |
| "learning_rate": 1.4710182767624021e-05, | |
| "loss": 0.1739, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.495774647887324, | |
| "grad_norm": 2.1459243297576904, | |
| "learning_rate": 1.4553524804177547e-05, | |
| "loss": 0.1599, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 3.9314770698547363, | |
| "learning_rate": 1.4396866840731071e-05, | |
| "loss": 0.1958, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.570892018779343, | |
| "grad_norm": 2.6377363204956055, | |
| "learning_rate": 1.4240208877284597e-05, | |
| "loss": 0.1604, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.608450704225352, | |
| "grad_norm": 2.810866594314575, | |
| "learning_rate": 1.408355091383812e-05, | |
| "loss": 0.1495, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.646009389671361, | |
| "grad_norm": 2.2084455490112305, | |
| "learning_rate": 1.3926892950391646e-05, | |
| "loss": 0.185, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 4.683568075117371, | |
| "grad_norm": 2.7217283248901367, | |
| "learning_rate": 1.377023498694517e-05, | |
| "loss": 0.1757, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.72112676056338, | |
| "grad_norm": 3.075267791748047, | |
| "learning_rate": 1.3613577023498696e-05, | |
| "loss": 0.1814, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.758685446009389, | |
| "grad_norm": 3.2452406883239746, | |
| "learning_rate": 1.345691906005222e-05, | |
| "loss": 0.1622, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.796244131455399, | |
| "grad_norm": 2.712754487991333, | |
| "learning_rate": 1.3300261096605744e-05, | |
| "loss": 0.1714, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.833802816901408, | |
| "grad_norm": 1.6795600652694702, | |
| "learning_rate": 1.3143603133159269e-05, | |
| "loss": 0.1519, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.871361502347418, | |
| "grad_norm": 3.9085493087768555, | |
| "learning_rate": 1.2986945169712793e-05, | |
| "loss": 0.1758, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.908920187793427, | |
| "grad_norm": 3.529478073120117, | |
| "learning_rate": 1.2830287206266318e-05, | |
| "loss": 0.1549, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.946478873239436, | |
| "grad_norm": 2.559157609939575, | |
| "learning_rate": 1.2673629242819842e-05, | |
| "loss": 0.1824, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.984037558685446, | |
| "grad_norm": 2.2350497245788574, | |
| "learning_rate": 1.2516971279373368e-05, | |
| "loss": 0.1723, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9826419535157399, | |
| "eval_loss": 0.09542840719223022, | |
| "eval_runtime": 5.0389, | |
| "eval_samples_per_second": 1349.105, | |
| "eval_steps_per_second": 42.271, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 5.018779342723005, | |
| "grad_norm": 2.5073907375335693, | |
| "learning_rate": 1.2360313315926892e-05, | |
| "loss": 0.1401, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.056338028169014, | |
| "grad_norm": 4.696757793426514, | |
| "learning_rate": 1.2203655352480418e-05, | |
| "loss": 0.1801, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.093896713615023, | |
| "grad_norm": 1.2180489301681519, | |
| "learning_rate": 1.2046997389033942e-05, | |
| "loss": 0.1335, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.131455399061033, | |
| "grad_norm": 0.887860119342804, | |
| "learning_rate": 1.1890339425587468e-05, | |
| "loss": 0.1479, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.169014084507042, | |
| "grad_norm": 3.6347432136535645, | |
| "learning_rate": 1.1733681462140992e-05, | |
| "loss": 0.1575, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.206572769953052, | |
| "grad_norm": 2.901700496673584, | |
| "learning_rate": 1.1577023498694518e-05, | |
| "loss": 0.1367, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.244131455399061, | |
| "grad_norm": 2.6395390033721924, | |
| "learning_rate": 1.1420365535248042e-05, | |
| "loss": 0.144, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.28169014084507, | |
| "grad_norm": 3.923652172088623, | |
| "learning_rate": 1.1263707571801567e-05, | |
| "loss": 0.1576, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.31924882629108, | |
| "grad_norm": 2.290224313735962, | |
| "learning_rate": 1.1107049608355092e-05, | |
| "loss": 0.16, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.356807511737089, | |
| "grad_norm": 2.332317590713501, | |
| "learning_rate": 1.0950391644908617e-05, | |
| "loss": 0.1505, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.394366197183099, | |
| "grad_norm": 3.474155902862549, | |
| "learning_rate": 1.0793733681462141e-05, | |
| "loss": 0.1828, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.431924882629108, | |
| "grad_norm": 2.5219180583953857, | |
| "learning_rate": 1.0637075718015665e-05, | |
| "loss": 0.1563, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.469483568075117, | |
| "grad_norm": 4.863851547241211, | |
| "learning_rate": 1.0480417754569191e-05, | |
| "loss": 0.1308, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 5.507042253521127, | |
| "grad_norm": 4.817688941955566, | |
| "learning_rate": 1.0323759791122715e-05, | |
| "loss": 0.1757, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 5.544600938967136, | |
| "grad_norm": 3.194732189178467, | |
| "learning_rate": 1.0167101827676241e-05, | |
| "loss": 0.1577, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 5.582159624413146, | |
| "grad_norm": 3.6605474948883057, | |
| "learning_rate": 1.0010443864229765e-05, | |
| "loss": 0.2044, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 5.619718309859155, | |
| "grad_norm": 2.427701473236084, | |
| "learning_rate": 9.853785900783291e-06, | |
| "loss": 0.1574, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.657276995305164, | |
| "grad_norm": 2.8025519847869873, | |
| "learning_rate": 9.697127937336815e-06, | |
| "loss": 0.188, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 5.694835680751174, | |
| "grad_norm": 2.042407989501953, | |
| "learning_rate": 9.54046997389034e-06, | |
| "loss": 0.1639, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 5.732394366197183, | |
| "grad_norm": 4.5383477210998535, | |
| "learning_rate": 9.383812010443865e-06, | |
| "loss": 0.1641, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 5.769953051643192, | |
| "grad_norm": 2.919588804244995, | |
| "learning_rate": 9.22715404699739e-06, | |
| "loss": 0.1374, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 5.807511737089202, | |
| "grad_norm": 2.4344029426574707, | |
| "learning_rate": 9.070496083550915e-06, | |
| "loss": 0.1711, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.845070422535211, | |
| "grad_norm": 1.5614906549453735, | |
| "learning_rate": 8.913838120104439e-06, | |
| "loss": 0.1624, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.882629107981221, | |
| "grad_norm": 3.0189967155456543, | |
| "learning_rate": 8.757180156657963e-06, | |
| "loss": 0.1691, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.92018779342723, | |
| "grad_norm": 2.44000506401062, | |
| "learning_rate": 8.600522193211488e-06, | |
| "loss": 0.1513, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.957746478873239, | |
| "grad_norm": 2.4327423572540283, | |
| "learning_rate": 8.443864229765013e-06, | |
| "loss": 0.1538, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.995305164319249, | |
| "grad_norm": 2.1192240715026855, | |
| "learning_rate": 8.287206266318538e-06, | |
| "loss": 0.1442, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.981318034716093, | |
| "eval_loss": 0.09270217269659042, | |
| "eval_runtime": 4.8524, | |
| "eval_samples_per_second": 1400.961, | |
| "eval_steps_per_second": 43.896, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 6.030046948356808, | |
| "grad_norm": 1.8678548336029053, | |
| "learning_rate": 8.130548302872062e-06, | |
| "loss": 0.1328, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.067605633802817, | |
| "grad_norm": 3.0712783336639404, | |
| "learning_rate": 7.973890339425586e-06, | |
| "loss": 0.1543, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.105164319248826, | |
| "grad_norm": 4.49588680267334, | |
| "learning_rate": 7.817232375979112e-06, | |
| "loss": 0.1452, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.142723004694836, | |
| "grad_norm": 3.9594759941101074, | |
| "learning_rate": 7.660574412532636e-06, | |
| "loss": 0.1513, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.180281690140845, | |
| "grad_norm": 2.528153657913208, | |
| "learning_rate": 7.503916449086162e-06, | |
| "loss": 0.1589, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.217840375586855, | |
| "grad_norm": 2.159458875656128, | |
| "learning_rate": 7.347258485639687e-06, | |
| "loss": 0.1443, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.255399061032864, | |
| "grad_norm": 2.098022222518921, | |
| "learning_rate": 7.190600522193212e-06, | |
| "loss": 0.1564, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.292957746478873, | |
| "grad_norm": 1.993698239326477, | |
| "learning_rate": 7.033942558746737e-06, | |
| "loss": 0.1401, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.330516431924883, | |
| "grad_norm": 2.2639145851135254, | |
| "learning_rate": 6.877284595300262e-06, | |
| "loss": 0.1452, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.368075117370892, | |
| "grad_norm": 2.5003936290740967, | |
| "learning_rate": 6.720626631853786e-06, | |
| "loss": 0.1439, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.405633802816902, | |
| "grad_norm": 2.0841052532196045, | |
| "learning_rate": 6.563968668407311e-06, | |
| "loss": 0.1438, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 6.443192488262911, | |
| "grad_norm": 3.550182819366455, | |
| "learning_rate": 6.4073107049608355e-06, | |
| "loss": 0.1433, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 6.48075117370892, | |
| "grad_norm": 1.4857251644134521, | |
| "learning_rate": 6.2506527415143605e-06, | |
| "loss": 0.1404, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 6.51830985915493, | |
| "grad_norm": 3.503309726715088, | |
| "learning_rate": 6.093994778067885e-06, | |
| "loss": 0.1493, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 6.555868544600939, | |
| "grad_norm": 3.59545636177063, | |
| "learning_rate": 5.93733681462141e-06, | |
| "loss": 0.1563, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 6.593427230046949, | |
| "grad_norm": 2.879582405090332, | |
| "learning_rate": 5.780678851174934e-06, | |
| "loss": 0.122, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 6.630985915492958, | |
| "grad_norm": 1.7240543365478516, | |
| "learning_rate": 5.624020887728459e-06, | |
| "loss": 0.1404, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 6.668544600938967, | |
| "grad_norm": 3.0438528060913086, | |
| "learning_rate": 5.467362924281984e-06, | |
| "loss": 0.1432, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 6.706103286384977, | |
| "grad_norm": 2.496366024017334, | |
| "learning_rate": 5.310704960835509e-06, | |
| "loss": 0.1277, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 6.743661971830986, | |
| "grad_norm": 1.7166277170181274, | |
| "learning_rate": 5.154046997389034e-06, | |
| "loss": 0.143, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.781220657276995, | |
| "grad_norm": 2.4547784328460693, | |
| "learning_rate": 4.997389033942559e-06, | |
| "loss": 0.1198, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 6.818779342723005, | |
| "grad_norm": 2.604220390319824, | |
| "learning_rate": 4.840731070496084e-06, | |
| "loss": 0.1705, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 6.856338028169014, | |
| "grad_norm": 2.7237601280212402, | |
| "learning_rate": 4.684073107049609e-06, | |
| "loss": 0.1506, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 6.893896713615024, | |
| "grad_norm": 2.638058662414551, | |
| "learning_rate": 4.527415143603134e-06, | |
| "loss": 0.154, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 6.931455399061033, | |
| "grad_norm": 3.8382205963134766, | |
| "learning_rate": 4.3707571801566586e-06, | |
| "loss": 0.1553, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.969014084507043, | |
| "grad_norm": 2.071164131164551, | |
| "learning_rate": 4.2140992167101835e-06, | |
| "loss": 0.1397, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9811709326272433, | |
| "eval_loss": 0.08920056372880936, | |
| "eval_runtime": 4.9166, | |
| "eval_samples_per_second": 1382.662, | |
| "eval_steps_per_second": 43.323, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 7.003755868544601, | |
| "grad_norm": 2.5346381664276123, | |
| "learning_rate": 4.0574412532637075e-06, | |
| "loss": 0.1296, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.041314553990611, | |
| "grad_norm": 2.575307846069336, | |
| "learning_rate": 3.9007832898172325e-06, | |
| "loss": 0.1389, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.07887323943662, | |
| "grad_norm": 2.0408527851104736, | |
| "learning_rate": 3.7441253263707574e-06, | |
| "loss": 0.1521, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.1164319248826295, | |
| "grad_norm": 3.2742061614990234, | |
| "learning_rate": 3.5874673629242823e-06, | |
| "loss": 0.1342, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.153990610328639, | |
| "grad_norm": 1.4502960443496704, | |
| "learning_rate": 3.4308093994778068e-06, | |
| "loss": 0.1204, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.191549295774648, | |
| "grad_norm": 3.7600743770599365, | |
| "learning_rate": 3.2741514360313317e-06, | |
| "loss": 0.1431, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.229107981220658, | |
| "grad_norm": 2.7332417964935303, | |
| "learning_rate": 3.1174934725848566e-06, | |
| "loss": 0.1281, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.266666666666667, | |
| "grad_norm": 2.6618921756744385, | |
| "learning_rate": 2.960835509138381e-06, | |
| "loss": 0.141, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.304225352112676, | |
| "grad_norm": 3.625688314437866, | |
| "learning_rate": 2.804177545691906e-06, | |
| "loss": 0.1455, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 7.341784037558686, | |
| "grad_norm": 2.0667765140533447, | |
| "learning_rate": 2.647519582245431e-06, | |
| "loss": 0.1359, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 7.379342723004695, | |
| "grad_norm": 2.369652509689331, | |
| "learning_rate": 2.490861618798956e-06, | |
| "loss": 0.1295, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 7.416901408450705, | |
| "grad_norm": 3.836838722229004, | |
| "learning_rate": 2.3342036553524807e-06, | |
| "loss": 0.1489, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 7.454460093896714, | |
| "grad_norm": 3.3261311054229736, | |
| "learning_rate": 2.1775456919060052e-06, | |
| "loss": 0.1289, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 7.492018779342723, | |
| "grad_norm": 2.6514954566955566, | |
| "learning_rate": 2.0208877284595297e-06, | |
| "loss": 0.1185, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.529577464788733, | |
| "grad_norm": 2.1017005443573, | |
| "learning_rate": 1.8642297650130548e-06, | |
| "loss": 0.1472, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 7.567136150234742, | |
| "grad_norm": 2.5104258060455322, | |
| "learning_rate": 1.7075718015665795e-06, | |
| "loss": 0.1467, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 7.6046948356807516, | |
| "grad_norm": 1.7915935516357422, | |
| "learning_rate": 1.5509138381201045e-06, | |
| "loss": 0.1212, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 7.642253521126761, | |
| "grad_norm": 2.4937989711761475, | |
| "learning_rate": 1.3942558746736294e-06, | |
| "loss": 0.1395, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 7.67981220657277, | |
| "grad_norm": 2.758594274520874, | |
| "learning_rate": 1.237597911227154e-06, | |
| "loss": 0.1361, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 7.71737089201878, | |
| "grad_norm": 2.291672468185425, | |
| "learning_rate": 1.0809399477806788e-06, | |
| "loss": 0.1182, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 7.754929577464789, | |
| "grad_norm": 1.944736361503601, | |
| "learning_rate": 9.242819843342037e-07, | |
| "loss": 0.1307, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 7.792488262910798, | |
| "grad_norm": 1.448411226272583, | |
| "learning_rate": 7.676240208877285e-07, | |
| "loss": 0.1407, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 7.830046948356808, | |
| "grad_norm": 3.276000499725342, | |
| "learning_rate": 6.109660574412533e-07, | |
| "loss": 0.1361, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 7.867605633802817, | |
| "grad_norm": 3.627788543701172, | |
| "learning_rate": 4.5430809399477806e-07, | |
| "loss": 0.131, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 7.905164319248827, | |
| "grad_norm": 1.2533661127090454, | |
| "learning_rate": 2.9765013054830287e-07, | |
| "loss": 0.1245, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 7.942723004694836, | |
| "grad_norm": 1.472484827041626, | |
| "learning_rate": 1.409921671018277e-07, | |
| "loss": 0.1368, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 7.972769953051643, | |
| "eval_accuracy": 0.9811709326272433, | |
| "eval_loss": 0.08957477658987045, | |
| "eval_runtime": 5.418, | |
| "eval_samples_per_second": 1254.697, | |
| "eval_steps_per_second": 39.313, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 7.972769953051643, | |
| "step": 2128, | |
| "total_flos": 3.767900833756416e+18, | |
| "train_loss": 0.5178930132572812, | |
| "train_runtime": 756.2923, | |
| "train_samples_per_second": 540.468, | |
| "train_steps_per_second": 2.814 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2128, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.767900833756416e+18, | |
| "train_batch_size": 48, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |