| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1047, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02865329512893983, | |
| "grad_norm": 2.6186984732280814, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": 0.5731, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05730659025787966, | |
| "grad_norm": 1.0232811311533683, | |
| "learning_rate": 1.8095238095238097e-06, | |
| "loss": 0.5412, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08595988538681948, | |
| "grad_norm": 0.9701094427215733, | |
| "learning_rate": 2.7619047619047625e-06, | |
| "loss": 0.4937, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11461318051575932, | |
| "grad_norm": 0.5323948276277348, | |
| "learning_rate": 3.7142857142857146e-06, | |
| "loss": 0.4582, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14326647564469913, | |
| "grad_norm": 0.36885209030841964, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.4392, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17191977077363896, | |
| "grad_norm": 0.27190324169109104, | |
| "learning_rate": 5.619047619047619e-06, | |
| "loss": 0.4255, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.20057306590257878, | |
| "grad_norm": 0.284362183163526, | |
| "learning_rate": 6.571428571428572e-06, | |
| "loss": 0.42, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22922636103151864, | |
| "grad_norm": 0.2415087591622169, | |
| "learning_rate": 7.523809523809524e-06, | |
| "loss": 0.4006, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25787965616045844, | |
| "grad_norm": 0.2832959959768393, | |
| "learning_rate": 8.476190476190477e-06, | |
| "loss": 0.3976, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.28653295128939826, | |
| "grad_norm": 0.2819249571240325, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": 0.3887, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3151862464183381, | |
| "grad_norm": 0.26289638357241263, | |
| "learning_rate": 9.999555111181558e-06, | |
| "loss": 0.3914, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3438395415472779, | |
| "grad_norm": 0.31346359091894765, | |
| "learning_rate": 9.994551021152415e-06, | |
| "loss": 0.3847, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.37249283667621774, | |
| "grad_norm": 0.25447340280629915, | |
| "learning_rate": 9.983992313852776e-06, | |
| "loss": 0.3883, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.40114613180515757, | |
| "grad_norm": 0.2713615676238601, | |
| "learning_rate": 9.967890731995383e-06, | |
| "loss": 0.3885, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4297994269340974, | |
| "grad_norm": 0.29785890393022935, | |
| "learning_rate": 9.946264182720295e-06, | |
| "loss": 0.3811, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4584527220630373, | |
| "grad_norm": 0.31068169293825476, | |
| "learning_rate": 9.919136717679723e-06, | |
| "loss": 0.3755, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4871060171919771, | |
| "grad_norm": 0.291914065891215, | |
| "learning_rate": 9.88653850628933e-06, | |
| "loss": 0.3727, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5157593123209169, | |
| "grad_norm": 0.28645514791771215, | |
| "learning_rate": 9.848505802175762e-06, | |
| "loss": 0.3694, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5444126074498568, | |
| "grad_norm": 0.26191461156518464, | |
| "learning_rate": 9.8050809028577e-06, | |
| "loss": 0.3704, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5730659025787965, | |
| "grad_norm": 0.2782451749750521, | |
| "learning_rate": 9.756312102705284e-06, | |
| "loss": 0.3766, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6017191977077364, | |
| "grad_norm": 0.31746617801856314, | |
| "learning_rate": 9.702253639230246e-06, | |
| "loss": 0.3639, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6303724928366762, | |
| "grad_norm": 0.3104703712769699, | |
| "learning_rate": 9.642965632766437e-06, | |
| "loss": 0.3749, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6590257879656161, | |
| "grad_norm": 0.25481045697197613, | |
| "learning_rate": 9.57851401960788e-06, | |
| "loss": 0.3691, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6876790830945558, | |
| "grad_norm": 0.2569207157034273, | |
| "learning_rate": 9.508970478678676e-06, | |
| "loss": 0.364, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7163323782234957, | |
| "grad_norm": 0.3010191468789677, | |
| "learning_rate": 9.434412351816329e-06, | |
| "loss": 0.3699, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7449856733524355, | |
| "grad_norm": 0.2845353099007207, | |
| "learning_rate": 9.354922557757153e-06, | |
| "loss": 0.3626, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7736389684813754, | |
| "grad_norm": 0.2583262736091065, | |
| "learning_rate": 9.270589499919405e-06, | |
| "loss": 0.367, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8022922636103151, | |
| "grad_norm": 0.2734910827357129, | |
| "learning_rate": 9.181506968086696e-06, | |
| "loss": 0.3616, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.830945558739255, | |
| "grad_norm": 0.3163373871358028, | |
| "learning_rate": 9.087774034101069e-06, | |
| "loss": 0.3603, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8595988538681948, | |
| "grad_norm": 0.27019612276333577, | |
| "learning_rate": 8.989494941681672e-06, | |
| "loss": 0.3625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8882521489971347, | |
| "grad_norm": 0.26011182825983586, | |
| "learning_rate": 8.886778990491632e-06, | |
| "loss": 0.3571, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9169054441260746, | |
| "grad_norm": 0.2602289671922979, | |
| "learning_rate": 8.77974041458202e-06, | |
| "loss": 0.3551, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9455587392550143, | |
| "grad_norm": 0.28489190685342874, | |
| "learning_rate": 8.668498255348119e-06, | |
| "loss": 0.3651, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9742120343839542, | |
| "grad_norm": 0.2907841661352505, | |
| "learning_rate": 8.553176229139262e-06, | |
| "loss": 0.3621, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.002865329512894, | |
| "grad_norm": 0.29410444361118165, | |
| "learning_rate": 8.433902589669489e-06, | |
| "loss": 0.3526, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0315186246418337, | |
| "grad_norm": 0.2629411999849927, | |
| "learning_rate": 8.310809985382059e-06, | |
| "loss": 0.3428, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0601719197707737, | |
| "grad_norm": 0.2664030042868273, | |
| "learning_rate": 8.184035311926397e-06, | |
| "loss": 0.3352, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0888252148997135, | |
| "grad_norm": 0.27448456132450433, | |
| "learning_rate": 8.053719559911605e-06, | |
| "loss": 0.3313, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.1174785100286533, | |
| "grad_norm": 0.280680513900802, | |
| "learning_rate": 7.92000765810579e-06, | |
| "loss": 0.3372, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.146131805157593, | |
| "grad_norm": 0.2696702891136822, | |
| "learning_rate": 7.783048312255653e-06, | |
| "loss": 0.3418, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.174785100286533, | |
| "grad_norm": 0.31145780582148586, | |
| "learning_rate": 7.642993839705557e-06, | |
| "loss": 0.3374, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.2034383954154728, | |
| "grad_norm": 0.2869659364327292, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.3363, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.2320916905444126, | |
| "grad_norm": 0.25325335643256375, | |
| "learning_rate": 7.3542258216579136e-06, | |
| "loss": 0.3316, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.2607449856733524, | |
| "grad_norm": 0.2772339994096704, | |
| "learning_rate": 7.205833425311394e-06, | |
| "loss": 0.3436, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2893982808022924, | |
| "grad_norm": 0.29278050839602937, | |
| "learning_rate": 7.0549878434056155e-06, | |
| "loss": 0.3406, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.3180515759312321, | |
| "grad_norm": 0.28430937748006735, | |
| "learning_rate": 6.901856836660386e-06, | |
| "loss": 0.3432, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.346704871060172, | |
| "grad_norm": 0.2803846351745633, | |
| "learning_rate": 6.746610707497511e-06, | |
| "loss": 0.34, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.3753581661891117, | |
| "grad_norm": 0.27906165695648083, | |
| "learning_rate": 6.58942211064142e-06, | |
| "loss": 0.3353, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.4040114613180517, | |
| "grad_norm": 0.28984790937034516, | |
| "learning_rate": 6.43046586110374e-06, | |
| "loss": 0.3309, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.4326647564469914, | |
| "grad_norm": 0.23182165178077277, | |
| "learning_rate": 6.269918739765313e-06, | |
| "loss": 0.3355, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4613180515759312, | |
| "grad_norm": 0.2488774130896262, | |
| "learning_rate": 6.107959296771915e-06, | |
| "loss": 0.3329, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.4899713467048712, | |
| "grad_norm": 0.24874727627825863, | |
| "learning_rate": 5.944767652962309e-06, | |
| "loss": 0.3438, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.518624641833811, | |
| "grad_norm": 0.2911542320121491, | |
| "learning_rate": 5.780525299549473e-06, | |
| "loss": 0.3359, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.5472779369627507, | |
| "grad_norm": 0.28119928239607705, | |
| "learning_rate": 5.615414896277786e-06, | |
| "loss": 0.336, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5759312320916905, | |
| "grad_norm": 0.2765768847959203, | |
| "learning_rate": 5.44962006828065e-06, | |
| "loss": 0.3404, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.6045845272206303, | |
| "grad_norm": 0.27806174427298036, | |
| "learning_rate": 5.283325201864475e-06, | |
| "loss": 0.3304, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.63323782234957, | |
| "grad_norm": 0.2404022014715492, | |
| "learning_rate": 5.116715239446121e-06, | |
| "loss": 0.3295, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.66189111747851, | |
| "grad_norm": 0.2513689234570333, | |
| "learning_rate": 4.9499754738718835e-06, | |
| "loss": 0.3342, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6905444126074498, | |
| "grad_norm": 0.2429044062755473, | |
| "learning_rate": 4.7832913423467555e-06, | |
| "loss": 0.3364, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.7191977077363898, | |
| "grad_norm": 0.23681124768665515, | |
| "learning_rate": 4.616848220203124e-06, | |
| "loss": 0.3275, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.7478510028653296, | |
| "grad_norm": 0.2491649241512219, | |
| "learning_rate": 4.450831214738303e-06, | |
| "loss": 0.3385, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.7765042979942693, | |
| "grad_norm": 0.7732756792555575, | |
| "learning_rate": 4.285424959350139e-06, | |
| "loss": 0.3303, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.8051575931232091, | |
| "grad_norm": 0.24958098955725797, | |
| "learning_rate": 4.1208134081996625e-06, | |
| "loss": 0.3382, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.8338108882521489, | |
| "grad_norm": 0.2713258221481738, | |
| "learning_rate": 3.957179631629148e-06, | |
| "loss": 0.3348, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.8624641833810889, | |
| "grad_norm": 0.23269097420301693, | |
| "learning_rate": 3.7947056125630904e-06, | |
| "loss": 0.3332, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.8911174785100286, | |
| "grad_norm": 0.2800452866317965, | |
| "learning_rate": 3.6335720441185474e-06, | |
| "loss": 0.3263, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.9197707736389686, | |
| "grad_norm": 0.2244027553245592, | |
| "learning_rate": 3.4739581286499147e-06, | |
| "loss": 0.3347, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.9484240687679084, | |
| "grad_norm": 0.36236606952760386, | |
| "learning_rate": 3.3160413784516342e-06, | |
| "loss": 0.3251, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.9770773638968482, | |
| "grad_norm": 0.2560138762411745, | |
| "learning_rate": 3.1599974183404784e-06, | |
| "loss": 0.3311, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.005730659025788, | |
| "grad_norm": 0.24846783217066923, | |
| "learning_rate": 3.0059997903369658e-06, | |
| "loss": 0.3263, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.0343839541547277, | |
| "grad_norm": 0.2561038844246594, | |
| "learning_rate": 2.854219760663125e-06, | |
| "loss": 0.3194, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.0630372492836675, | |
| "grad_norm": 0.21888324162969877, | |
| "learning_rate": 2.704826129271257e-06, | |
| "loss": 0.3106, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.0916905444126073, | |
| "grad_norm": 0.23634859060639352, | |
| "learning_rate": 2.5579850421155294e-06, | |
| "loss": 0.3215, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.1203438395415475, | |
| "grad_norm": 0.40366184729596755, | |
| "learning_rate": 2.413859806375159e-06, | |
| "loss": 0.3119, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.1489971346704873, | |
| "grad_norm": 0.21759429767669508, | |
| "learning_rate": 2.272610708834719e-06, | |
| "loss": 0.3094, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.177650429799427, | |
| "grad_norm": 0.21797932422789995, | |
| "learning_rate": 2.1343948376235146e-06, | |
| "loss": 0.3081, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.206303724928367, | |
| "grad_norm": 0.22015734090400496, | |
| "learning_rate": 1.9993659075123117e-06, | |
| "loss": 0.3118, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.2349570200573066, | |
| "grad_norm": 0.5859620771202967, | |
| "learning_rate": 1.8676740889616835e-06, | |
| "loss": 0.3194, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.2636103151862463, | |
| "grad_norm": 0.21809409710426375, | |
| "learning_rate": 1.739465841112125e-06, | |
| "loss": 0.3156, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.292263610315186, | |
| "grad_norm": 0.311384088738318, | |
| "learning_rate": 1.6148837489016406e-06, | |
| "loss": 0.3105, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.3209169054441263, | |
| "grad_norm": 0.22764092498020874, | |
| "learning_rate": 1.49406636449199e-06, | |
| "loss": 0.3209, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.349570200573066, | |
| "grad_norm": 0.22336147023416364, | |
| "learning_rate": 1.3771480531799054e-06, | |
| "loss": 0.3217, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.378223495702006, | |
| "grad_norm": 0.22717728346077168, | |
| "learning_rate": 1.2642588439646951e-06, | |
| "loss": 0.3211, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.4068767908309456, | |
| "grad_norm": 0.208665334531538, | |
| "learning_rate": 1.1555242849383668e-06, | |
| "loss": 0.3183, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.4355300859598854, | |
| "grad_norm": 0.21821518140433077, | |
| "learning_rate": 1.0510653036591583e-06, | |
| "loss": 0.3188, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.464183381088825, | |
| "grad_norm": 0.22416726569351633, | |
| "learning_rate": 9.509980726637003e-07, | |
| "loss": 0.3167, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.492836676217765, | |
| "grad_norm": 0.2135708085936856, | |
| "learning_rate": 8.5543388026743e-07, | |
| "loss": 0.315, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.5214899713467047, | |
| "grad_norm": 0.20626691858117885, | |
| "learning_rate": 7.644790067969005e-07, | |
| "loss": 0.3151, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.5501432664756445, | |
| "grad_norm": 0.203689315528671, | |
| "learning_rate": 6.7823460639167e-07, | |
| "loss": 0.3122, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.5787965616045847, | |
| "grad_norm": 0.20939291314273653, | |
| "learning_rate": 5.967965945071896e-07, | |
| "loss": 0.3153, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.6074498567335245, | |
| "grad_norm": 0.23023262138795553, | |
| "learning_rate": 5.202555412438309e-07, | |
| "loss": 0.3094, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.6361031518624642, | |
| "grad_norm": 0.235572721264662, | |
| "learning_rate": 4.486965706206597e-07, | |
| "loss": 0.3146, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.664756446991404, | |
| "grad_norm": 0.2315057700635808, | |
| "learning_rate": 3.8219926590600365e-07, | |
| "loss": 0.3144, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.693409742120344, | |
| "grad_norm": 0.21729980205720664, | |
| "learning_rate": 3.2083758111006946e-07, | |
| "loss": 0.3191, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.7220630372492836, | |
| "grad_norm": 0.2031412349579613, | |
| "learning_rate": 2.6467975873807617e-07, | |
| "loss": 0.3127, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.7507163323782233, | |
| "grad_norm": 0.19878302412351165, | |
| "learning_rate": 2.1378825389533508e-07, | |
| "loss": 0.3169, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.7793696275071635, | |
| "grad_norm": 0.20562940798947288, | |
| "learning_rate": 1.6821966482872264e-07, | |
| "loss": 0.3197, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.8080229226361033, | |
| "grad_norm": 0.20192515439147846, | |
| "learning_rate": 1.28024669981755e-07, | |
| "loss": 0.3154, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.836676217765043, | |
| "grad_norm": 0.20191838585812194, | |
| "learning_rate": 9.324797163330012e-08, | |
| "loss": 0.3125, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.865329512893983, | |
| "grad_norm": 0.2153477946539778, | |
| "learning_rate": 6.39282461825852e-08, | |
| "loss": 0.3119, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.8939828080229226, | |
| "grad_norm": 0.21913342629281218, | |
| "learning_rate": 4.009810113580426e-08, | |
| "loss": 0.3175, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.9226361031518624, | |
| "grad_norm": 0.19798983439143433, | |
| "learning_rate": 2.178403884215141e-08, | |
| "loss": 0.3081, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.951289398280802, | |
| "grad_norm": 0.22183768018570765, | |
| "learning_rate": 9.006427019622177e-09, | |
| "loss": 0.3181, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.9799426934097424, | |
| "grad_norm": 0.1912655799602591, | |
| "learning_rate": 1.7794761033496089e-09, | |
| "loss": 0.3146, | |
| "step": 1040 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1047, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5074006013116416.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |