| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.99952614120992, | |
| "eval_steps": 500, | |
| "global_step": 7910, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00631811720107408, | |
| "grad_norm": 2.410219669342041, | |
| "learning_rate": 1.1378002528445008e-07, | |
| "loss": 0.6234, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01263623440214816, | |
| "grad_norm": 2.261991500854492, | |
| "learning_rate": 2.4020227560050574e-07, | |
| "loss": 0.6184, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01895435160322224, | |
| "grad_norm": 2.1056859493255615, | |
| "learning_rate": 3.6662452591656137e-07, | |
| "loss": 0.6112, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02527246880429632, | |
| "grad_norm": 1.712091326713562, | |
| "learning_rate": 4.93046776232617e-07, | |
| "loss": 0.6003, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0315905860053704, | |
| "grad_norm": 1.321094274520874, | |
| "learning_rate": 6.194690265486726e-07, | |
| "loss": 0.575, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03790870320644448, | |
| "grad_norm": 0.8089994192123413, | |
| "learning_rate": 7.458912768647282e-07, | |
| "loss": 0.5377, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04422682040751856, | |
| "grad_norm": 0.544200599193573, | |
| "learning_rate": 8.72313527180784e-07, | |
| "loss": 0.512, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05054493760859264, | |
| "grad_norm": 0.44749483466148376, | |
| "learning_rate": 9.987357774968396e-07, | |
| "loss": 0.4917, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05686305480966672, | |
| "grad_norm": 0.30434444546699524, | |
| "learning_rate": 1.1251580278128951e-06, | |
| "loss": 0.4749, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0631811720107408, | |
| "grad_norm": 0.24813058972358704, | |
| "learning_rate": 1.2515802781289506e-06, | |
| "loss": 0.4607, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06949928921181488, | |
| "grad_norm": 0.21706120669841766, | |
| "learning_rate": 1.3780025284450064e-06, | |
| "loss": 0.448, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07581740641288896, | |
| "grad_norm": 0.2046414017677307, | |
| "learning_rate": 1.5044247787610621e-06, | |
| "loss": 0.4406, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08213552361396304, | |
| "grad_norm": 0.1882794201374054, | |
| "learning_rate": 1.6308470290771178e-06, | |
| "loss": 0.4367, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08845364081503712, | |
| "grad_norm": 0.19263681769371033, | |
| "learning_rate": 1.7572692793931734e-06, | |
| "loss": 0.4266, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0947717580161112, | |
| "grad_norm": 0.1742035299539566, | |
| "learning_rate": 1.8836915297092289e-06, | |
| "loss": 0.4198, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10108987521718528, | |
| "grad_norm": 0.1775059998035431, | |
| "learning_rate": 2.0101137800252844e-06, | |
| "loss": 0.4164, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10740799241825937, | |
| "grad_norm": 0.18586362898349762, | |
| "learning_rate": 2.13653603034134e-06, | |
| "loss": 0.4101, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11372610961933344, | |
| "grad_norm": 0.17294418811798096, | |
| "learning_rate": 2.262958280657396e-06, | |
| "loss": 0.4083, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12004422682040752, | |
| "grad_norm": 0.1728675216436386, | |
| "learning_rate": 2.3893805309734516e-06, | |
| "loss": 0.4029, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1263623440214816, | |
| "grad_norm": 0.1797151267528534, | |
| "learning_rate": 2.515802781289507e-06, | |
| "loss": 0.4007, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13268046122255567, | |
| "grad_norm": 0.187180295586586, | |
| "learning_rate": 2.6422250316055626e-06, | |
| "loss": 0.3938, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.13899857842362975, | |
| "grad_norm": 0.17990782856941223, | |
| "learning_rate": 2.768647281921619e-06, | |
| "loss": 0.3902, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14531669562470384, | |
| "grad_norm": 0.19836974143981934, | |
| "learning_rate": 2.895069532237674e-06, | |
| "loss": 0.3891, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.15163481282577793, | |
| "grad_norm": 0.17586922645568848, | |
| "learning_rate": 3.02149178255373e-06, | |
| "loss": 0.3876, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15795293002685198, | |
| "grad_norm": 0.19539974629878998, | |
| "learning_rate": 3.1479140328697856e-06, | |
| "loss": 0.3819, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16427104722792607, | |
| "grad_norm": 0.18709833920001984, | |
| "learning_rate": 3.274336283185841e-06, | |
| "loss": 0.3789, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.17058916442900016, | |
| "grad_norm": 0.18259377777576447, | |
| "learning_rate": 3.4007585335018966e-06, | |
| "loss": 0.3771, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.17690728163007424, | |
| "grad_norm": 0.1889650523662567, | |
| "learning_rate": 3.5271807838179523e-06, | |
| "loss": 0.3757, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18322539883114833, | |
| "grad_norm": 0.17683972418308258, | |
| "learning_rate": 3.6536030341340076e-06, | |
| "loss": 0.378, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1895435160322224, | |
| "grad_norm": 0.19599057734012604, | |
| "learning_rate": 3.7800252844500634e-06, | |
| "loss": 0.3683, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19586163323329647, | |
| "grad_norm": 0.19569683074951172, | |
| "learning_rate": 3.906447534766119e-06, | |
| "loss": 0.37, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.20217975043437056, | |
| "grad_norm": 0.2033437043428421, | |
| "learning_rate": 4.032869785082175e-06, | |
| "loss": 0.3648, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.20849786763544464, | |
| "grad_norm": 0.1874990016222, | |
| "learning_rate": 4.15929203539823e-06, | |
| "loss": 0.3636, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21481598483651873, | |
| "grad_norm": 0.1825045645236969, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.363, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2211341020375928, | |
| "grad_norm": 0.19459910690784454, | |
| "learning_rate": 4.412136536030342e-06, | |
| "loss": 0.362, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22745221923866688, | |
| "grad_norm": 0.1864989548921585, | |
| "learning_rate": 4.538558786346398e-06, | |
| "loss": 0.357, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.23377033643974096, | |
| "grad_norm": 0.1745409220457077, | |
| "learning_rate": 4.664981036662453e-06, | |
| "loss": 0.3564, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.24008845364081505, | |
| "grad_norm": 0.18947263062000275, | |
| "learning_rate": 4.791403286978508e-06, | |
| "loss": 0.3537, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2464065708418891, | |
| "grad_norm": 0.1780448704957962, | |
| "learning_rate": 4.9178255372945645e-06, | |
| "loss": 0.3538, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2527246880429632, | |
| "grad_norm": 0.21806994080543518, | |
| "learning_rate": 5.04424778761062e-06, | |
| "loss": 0.3517, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2590428052440373, | |
| "grad_norm": 0.19830353558063507, | |
| "learning_rate": 5.170670037926675e-06, | |
| "loss": 0.3504, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.26536092244511134, | |
| "grad_norm": 0.1746763288974762, | |
| "learning_rate": 5.297092288242731e-06, | |
| "loss": 0.3456, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.27167903964618545, | |
| "grad_norm": 0.18027839064598083, | |
| "learning_rate": 5.4235145385587875e-06, | |
| "loss": 0.3476, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2779971568472595, | |
| "grad_norm": 0.18963277339935303, | |
| "learning_rate": 5.549936788874842e-06, | |
| "loss": 0.3454, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2843152740483336, | |
| "grad_norm": 0.1782628297805786, | |
| "learning_rate": 5.676359039190898e-06, | |
| "loss": 0.344, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2906333912494077, | |
| "grad_norm": 0.21438680589199066, | |
| "learning_rate": 5.802781289506953e-06, | |
| "loss": 0.3441, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.29695150845048174, | |
| "grad_norm": 0.20768363773822784, | |
| "learning_rate": 5.9292035398230096e-06, | |
| "loss": 0.343, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.30326962565155585, | |
| "grad_norm": 0.1901923269033432, | |
| "learning_rate": 6.055625790139065e-06, | |
| "loss": 0.3405, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3095877428526299, | |
| "grad_norm": 0.19777809083461761, | |
| "learning_rate": 6.182048040455121e-06, | |
| "loss": 0.3403, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.31590586005370397, | |
| "grad_norm": 0.1863890141248703, | |
| "learning_rate": 6.3084702907711755e-06, | |
| "loss": 0.337, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3222239772547781, | |
| "grad_norm": 0.18657594919204712, | |
| "learning_rate": 6.434892541087232e-06, | |
| "loss": 0.3334, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.32854209445585214, | |
| "grad_norm": 0.2064000368118286, | |
| "learning_rate": 6.561314791403287e-06, | |
| "loss": 0.3328, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.33486021165692625, | |
| "grad_norm": 0.1871696412563324, | |
| "learning_rate": 6.687737041719343e-06, | |
| "loss": 0.3393, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3411783288580003, | |
| "grad_norm": 0.20120146870613098, | |
| "learning_rate": 6.814159292035398e-06, | |
| "loss": 0.3334, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.34749644605907437, | |
| "grad_norm": 0.19019120931625366, | |
| "learning_rate": 6.9405815423514546e-06, | |
| "loss": 0.3366, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3538145632601485, | |
| "grad_norm": 0.19137969613075256, | |
| "learning_rate": 7.067003792667511e-06, | |
| "loss": 0.3346, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.36013268046122254, | |
| "grad_norm": 0.20125152170658112, | |
| "learning_rate": 7.193426042983566e-06, | |
| "loss": 0.3289, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.36645079766229666, | |
| "grad_norm": 0.17702394723892212, | |
| "learning_rate": 7.319848293299622e-06, | |
| "loss": 0.3309, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3727689148633707, | |
| "grad_norm": 0.1984817534685135, | |
| "learning_rate": 7.446270543615677e-06, | |
| "loss": 0.3316, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3790870320644448, | |
| "grad_norm": 0.1926579773426056, | |
| "learning_rate": 7.572692793931733e-06, | |
| "loss": 0.3289, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3854051492655189, | |
| "grad_norm": 0.21035262942314148, | |
| "learning_rate": 7.699115044247788e-06, | |
| "loss": 0.3282, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.39172326646659295, | |
| "grad_norm": 0.18808738887310028, | |
| "learning_rate": 7.825537294563843e-06, | |
| "loss": 0.3272, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.39804138366766706, | |
| "grad_norm": 0.19714747369289398, | |
| "learning_rate": 7.951959544879899e-06, | |
| "loss": 0.3248, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4043595008687411, | |
| "grad_norm": 0.1970880627632141, | |
| "learning_rate": 8.078381795195956e-06, | |
| "loss": 0.3242, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4106776180698152, | |
| "grad_norm": 0.18770354986190796, | |
| "learning_rate": 8.204804045512011e-06, | |
| "loss": 0.3215, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4169957352708893, | |
| "grad_norm": 0.2027129977941513, | |
| "learning_rate": 8.331226295828066e-06, | |
| "loss": 0.3247, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.42331385247196335, | |
| "grad_norm": 0.21426306664943695, | |
| "learning_rate": 8.457648546144122e-06, | |
| "loss": 0.3217, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.42963196967303746, | |
| "grad_norm": 0.2167753279209137, | |
| "learning_rate": 8.584070796460177e-06, | |
| "loss": 0.322, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.4359500868741115, | |
| "grad_norm": 0.20410393178462982, | |
| "learning_rate": 8.710493046776234e-06, | |
| "loss": 0.3208, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4422682040751856, | |
| "grad_norm": 0.20711293816566467, | |
| "learning_rate": 8.83691529709229e-06, | |
| "loss": 0.319, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4485863212762597, | |
| "grad_norm": 0.20640410482883453, | |
| "learning_rate": 8.963337547408345e-06, | |
| "loss": 0.3172, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.45490443847733375, | |
| "grad_norm": 0.2493702918291092, | |
| "learning_rate": 9.0897597977244e-06, | |
| "loss": 0.3177, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4612225556784078, | |
| "grad_norm": 0.24222460389137268, | |
| "learning_rate": 9.216182048040457e-06, | |
| "loss": 0.3167, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4675406728794819, | |
| "grad_norm": 0.20584948360919952, | |
| "learning_rate": 9.34260429835651e-06, | |
| "loss": 0.3165, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.473858790080556, | |
| "grad_norm": 0.19482427835464478, | |
| "learning_rate": 9.469026548672568e-06, | |
| "loss": 0.3138, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4801769072816301, | |
| "grad_norm": 0.19475619494915009, | |
| "learning_rate": 9.595448798988623e-06, | |
| "loss": 0.3171, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.48649502448270415, | |
| "grad_norm": 0.179108127951622, | |
| "learning_rate": 9.721871049304678e-06, | |
| "loss": 0.3103, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4928131416837782, | |
| "grad_norm": 0.19913727045059204, | |
| "learning_rate": 9.848293299620733e-06, | |
| "loss": 0.314, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4991312588848523, | |
| "grad_norm": 0.23399747908115387, | |
| "learning_rate": 9.97471554993679e-06, | |
| "loss": 0.3125, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5054493760859264, | |
| "grad_norm": 0.19475406408309937, | |
| "learning_rate": 9.999968841159285e-06, | |
| "loss": 0.3118, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5117674932870004, | |
| "grad_norm": 0.18734484910964966, | |
| "learning_rate": 9.999842259034458e-06, | |
| "loss": 0.3128, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5180856104880746, | |
| "grad_norm": 0.2116527259349823, | |
| "learning_rate": 9.99961830866117e-06, | |
| "loss": 0.3072, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5244037276891487, | |
| "grad_norm": 0.20056816935539246, | |
| "learning_rate": 9.999296994400692e-06, | |
| "loss": 0.3117, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5307218448902227, | |
| "grad_norm": 0.187480166554451, | |
| "learning_rate": 9.99887832251038e-06, | |
| "loss": 0.3086, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5370399620912968, | |
| "grad_norm": 0.18775825202465057, | |
| "learning_rate": 9.998362301143562e-06, | |
| "loss": 0.3079, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5433580792923709, | |
| "grad_norm": 0.19840385019779205, | |
| "learning_rate": 9.997748940349378e-06, | |
| "loss": 0.3072, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5496761964934449, | |
| "grad_norm": 0.1993194818496704, | |
| "learning_rate": 9.997038252072573e-06, | |
| "loss": 0.3065, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.555994313694519, | |
| "grad_norm": 0.188375785946846, | |
| "learning_rate": 9.996230250153283e-06, | |
| "loss": 0.3075, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5623124308955931, | |
| "grad_norm": 0.21323969960212708, | |
| "learning_rate": 9.995324950326746e-06, | |
| "loss": 0.3064, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5686305480966672, | |
| "grad_norm": 0.2170088142156601, | |
| "learning_rate": 9.994322370223011e-06, | |
| "loss": 0.3007, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5749486652977412, | |
| "grad_norm": 0.1998039036989212, | |
| "learning_rate": 9.993222529366591e-06, | |
| "loss": 0.3022, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5812667824988154, | |
| "grad_norm": 0.20587877929210663, | |
| "learning_rate": 9.992025449176073e-06, | |
| "loss": 0.3001, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5875848996998895, | |
| "grad_norm": 0.20559850335121155, | |
| "learning_rate": 9.990731152963715e-06, | |
| "loss": 0.3068, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5939030169009635, | |
| "grad_norm": 0.2025015950202942, | |
| "learning_rate": 9.989339665934983e-06, | |
| "loss": 0.3042, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6002211341020376, | |
| "grad_norm": 0.19664855301380157, | |
| "learning_rate": 9.987851015188064e-06, | |
| "loss": 0.3045, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6065392513031117, | |
| "grad_norm": 0.19013217091560364, | |
| "learning_rate": 9.986265229713332e-06, | |
| "loss": 0.2992, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6128573685041857, | |
| "grad_norm": 0.18943046033382416, | |
| "learning_rate": 9.984582340392797e-06, | |
| "loss": 0.3017, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6191754857052598, | |
| "grad_norm": 0.19746196269989014, | |
| "learning_rate": 9.982802379999486e-06, | |
| "loss": 0.3016, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6254936029063339, | |
| "grad_norm": 0.19490814208984375, | |
| "learning_rate": 9.98092538319682e-06, | |
| "loss": 0.3004, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6318117201074079, | |
| "grad_norm": 0.20448216795921326, | |
| "learning_rate": 9.978951386537929e-06, | |
| "loss": 0.3003, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.638129837308482, | |
| "grad_norm": 0.2098686397075653, | |
| "learning_rate": 9.976880428464948e-06, | |
| "loss": 0.2992, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6444479545095562, | |
| "grad_norm": 0.2074064463376999, | |
| "learning_rate": 9.974712549308257e-06, | |
| "loss": 0.2984, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6507660717106303, | |
| "grad_norm": 0.19775456190109253, | |
| "learning_rate": 9.97244779128571e-06, | |
| "loss": 0.2966, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6570841889117043, | |
| "grad_norm": 0.20709405839443207, | |
| "learning_rate": 9.970086198501803e-06, | |
| "loss": 0.2983, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6634023061127784, | |
| "grad_norm": 0.21704506874084473, | |
| "learning_rate": 9.967627816946816e-06, | |
| "loss": 0.2989, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6697204233138525, | |
| "grad_norm": 0.22157025337219238, | |
| "learning_rate": 9.965072694495922e-06, | |
| "loss": 0.298, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6760385405149265, | |
| "grad_norm": 0.22472302615642548, | |
| "learning_rate": 9.96242088090825e-06, | |
| "loss": 0.2976, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6823566577160006, | |
| "grad_norm": 0.2012009471654892, | |
| "learning_rate": 9.959672427825917e-06, | |
| "loss": 0.2935, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6886747749170747, | |
| "grad_norm": 0.19134068489074707, | |
| "learning_rate": 9.956827388773025e-06, | |
| "loss": 0.2974, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6949928921181487, | |
| "grad_norm": 0.18882884085178375, | |
| "learning_rate": 9.953885819154615e-06, | |
| "loss": 0.2926, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7013110093192229, | |
| "grad_norm": 0.2316889613866806, | |
| "learning_rate": 9.950847776255592e-06, | |
| "loss": 0.2979, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.707629126520297, | |
| "grad_norm": 0.21829363703727722, | |
| "learning_rate": 9.947713319239605e-06, | |
| "loss": 0.2947, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7139472437213711, | |
| "grad_norm": 0.19675135612487793, | |
| "learning_rate": 9.944482509147896e-06, | |
| "loss": 0.2939, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7202653609224451, | |
| "grad_norm": 0.21681798994541168, | |
| "learning_rate": 9.941155408898117e-06, | |
| "loss": 0.2943, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7265834781235192, | |
| "grad_norm": 0.18257145583629608, | |
| "learning_rate": 9.937732083283096e-06, | |
| "loss": 0.2917, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7329015953245933, | |
| "grad_norm": 0.20622026920318604, | |
| "learning_rate": 9.934212598969577e-06, | |
| "loss": 0.2948, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7392197125256673, | |
| "grad_norm": 0.16587024927139282, | |
| "learning_rate": 9.930597024496933e-06, | |
| "loss": 0.2918, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7455378297267414, | |
| "grad_norm": 0.1997261643409729, | |
| "learning_rate": 9.926885430275807e-06, | |
| "loss": 0.2922, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7518559469278155, | |
| "grad_norm": 0.20139716565608978, | |
| "learning_rate": 9.923077888586775e-06, | |
| "loss": 0.2891, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7581740641288895, | |
| "grad_norm": 0.20793363451957703, | |
| "learning_rate": 9.919174473578901e-06, | |
| "loss": 0.2918, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7644921813299637, | |
| "grad_norm": 0.19905509054660797, | |
| "learning_rate": 9.915175261268327e-06, | |
| "loss": 0.2929, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7708102985310378, | |
| "grad_norm": 0.19855041801929474, | |
| "learning_rate": 9.911080329536761e-06, | |
| "loss": 0.2921, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7771284157321118, | |
| "grad_norm": 0.24103382229804993, | |
| "learning_rate": 9.906889758129994e-06, | |
| "loss": 0.2919, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7834465329331859, | |
| "grad_norm": 0.24005091190338135, | |
| "learning_rate": 9.902603628656312e-06, | |
| "loss": 0.2921, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.78976465013426, | |
| "grad_norm": 0.19127513468265533, | |
| "learning_rate": 9.898222024584938e-06, | |
| "loss": 0.2911, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7960827673353341, | |
| "grad_norm": 0.2415689080953598, | |
| "learning_rate": 9.893745031244385e-06, | |
| "loss": 0.2893, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8024008845364081, | |
| "grad_norm": 0.21930722892284393, | |
| "learning_rate": 9.889172735820803e-06, | |
| "loss": 0.293, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8087190017374822, | |
| "grad_norm": 0.23149755597114563, | |
| "learning_rate": 9.884505227356281e-06, | |
| "loss": 0.291, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8150371189385563, | |
| "grad_norm": 0.20088982582092285, | |
| "learning_rate": 9.87974259674711e-06, | |
| "loss": 0.2877, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8213552361396304, | |
| "grad_norm": 0.201844722032547, | |
| "learning_rate": 9.87488493674202e-06, | |
| "loss": 0.2892, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8276733533407045, | |
| "grad_norm": 0.2128770351409912, | |
| "learning_rate": 9.86993234194036e-06, | |
| "loss": 0.2882, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8339914705417786, | |
| "grad_norm": 0.21982018649578094, | |
| "learning_rate": 9.86488490879027e-06, | |
| "loss": 0.2889, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8403095877428526, | |
| "grad_norm": 0.20911258459091187, | |
| "learning_rate": 9.859742735586801e-06, | |
| "loss": 0.2881, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8466277049439267, | |
| "grad_norm": 0.22615337371826172, | |
| "learning_rate": 9.854505922469985e-06, | |
| "loss": 0.2896, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8529458221450008, | |
| "grad_norm": 0.1955297738313675, | |
| "learning_rate": 9.849174571422906e-06, | |
| "loss": 0.2885, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8592639393460749, | |
| "grad_norm": 0.1870257705450058, | |
| "learning_rate": 9.843748786269704e-06, | |
| "loss": 0.2849, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.8655820565471489, | |
| "grad_norm": 0.20946596562862396, | |
| "learning_rate": 9.838228672673551e-06, | |
| "loss": 0.2873, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.871900173748223, | |
| "grad_norm": 0.18047629296779633, | |
| "learning_rate": 9.832614338134595e-06, | |
| "loss": 0.2862, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8782182909492972, | |
| "grad_norm": 0.19568774104118347, | |
| "learning_rate": 9.826905891987872e-06, | |
| "loss": 0.2857, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.8845364081503712, | |
| "grad_norm": 0.22279143333435059, | |
| "learning_rate": 9.821103445401167e-06, | |
| "loss": 0.2851, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.8908545253514453, | |
| "grad_norm": 0.21086236834526062, | |
| "learning_rate": 9.81520711137286e-06, | |
| "loss": 0.2849, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.8971726425525194, | |
| "grad_norm": 0.2367515116930008, | |
| "learning_rate": 9.809217004729714e-06, | |
| "loss": 0.2821, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9034907597535934, | |
| "grad_norm": 0.21128222346305847, | |
| "learning_rate": 9.803133242124649e-06, | |
| "loss": 0.2857, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9098088769546675, | |
| "grad_norm": 0.22519482672214508, | |
| "learning_rate": 9.796955942034465e-06, | |
| "loss": 0.2852, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9161269941557416, | |
| "grad_norm": 0.19642499089241028, | |
| "learning_rate": 9.790685224757534e-06, | |
| "loss": 0.2823, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9224451113568156, | |
| "grad_norm": 0.21369688212871552, | |
| "learning_rate": 9.784321212411463e-06, | |
| "loss": 0.2839, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9287632285578897, | |
| "grad_norm": 0.21286526322364807, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 0.2824, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9350813457589638, | |
| "grad_norm": 0.22185811400413513, | |
| "learning_rate": 9.771313800064157e-06, | |
| "loss": 0.2835, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.941399462960038, | |
| "grad_norm": 0.2697184383869171, | |
| "learning_rate": 9.764670653372709e-06, | |
| "loss": 0.2827, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.947717580161112, | |
| "grad_norm": 0.18580107390880585, | |
| "learning_rate": 9.757934718226751e-06, | |
| "loss": 0.2835, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9540356973621861, | |
| "grad_norm": 0.19771607220172882, | |
| "learning_rate": 9.751106125803663e-06, | |
| "loss": 0.2822, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9603538145632602, | |
| "grad_norm": 0.21847136318683624, | |
| "learning_rate": 9.744185009085258e-06, | |
| "loss": 0.284, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9666719317643342, | |
| "grad_norm": 0.18815948069095612, | |
| "learning_rate": 9.73717150285519e-06, | |
| "loss": 0.2819, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9729900489654083, | |
| "grad_norm": 0.19956186413764954, | |
| "learning_rate": 9.730065743696332e-06, | |
| "loss": 0.2828, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.9793081661664824, | |
| "grad_norm": 0.18478693068027496, | |
| "learning_rate": 9.722867869988112e-06, | |
| "loss": 0.2819, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.9856262833675564, | |
| "grad_norm": 0.21556143462657928, | |
| "learning_rate": 9.715578021903827e-06, | |
| "loss": 0.2805, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.9919444005686305, | |
| "grad_norm": 0.1989905834197998, | |
| "learning_rate": 9.7081963414079e-06, | |
| "loss": 0.2788, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.9982625177697046, | |
| "grad_norm": 0.1941995471715927, | |
| "learning_rate": 9.70072297225313e-06, | |
| "loss": 0.2804, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.0050544937608592, | |
| "grad_norm": 0.192391499876976, | |
| "learning_rate": 9.693158059977879e-06, | |
| "loss": 0.2898, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.0113726109619334, | |
| "grad_norm": 0.19495341181755066, | |
| "learning_rate": 9.685501751903246e-06, | |
| "loss": 0.2747, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0176907281630074, | |
| "grad_norm": 0.1872604936361313, | |
| "learning_rate": 9.677754197130196e-06, | |
| "loss": 0.2749, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.0240088453640814, | |
| "grad_norm": 0.21903474628925323, | |
| "learning_rate": 9.669915546536659e-06, | |
| "loss": 0.2726, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.0303269625651557, | |
| "grad_norm": 0.22876089811325073, | |
| "learning_rate": 9.661985952774584e-06, | |
| "loss": 0.2722, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.0366450797662297, | |
| "grad_norm": 0.19803361594676971, | |
| "learning_rate": 9.653965570266977e-06, | |
| "loss": 0.2723, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.0429631969673037, | |
| "grad_norm": 0.18463590741157532, | |
| "learning_rate": 9.645854555204882e-06, | |
| "loss": 0.2708, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.0492813141683779, | |
| "grad_norm": 0.18571729958057404, | |
| "learning_rate": 9.637653065544349e-06, | |
| "loss": 0.2726, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.055599431369452, | |
| "grad_norm": 0.199079692363739, | |
| "learning_rate": 9.629361261003353e-06, | |
| "loss": 0.2738, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.061917548570526, | |
| "grad_norm": 0.20288918912410736, | |
| "learning_rate": 9.620979303058686e-06, | |
| "loss": 0.2746, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.0682356657716001, | |
| "grad_norm": 0.2032773643732071, | |
| "learning_rate": 9.612507354942811e-06, | |
| "loss": 0.2736, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.0745537829726741, | |
| "grad_norm": 0.19241447746753693, | |
| "learning_rate": 9.603945581640682e-06, | |
| "loss": 0.2721, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0808719001737481, | |
| "grad_norm": 0.18638016283512115, | |
| "learning_rate": 9.595294149886532e-06, | |
| "loss": 0.27, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.0871900173748223, | |
| "grad_norm": 0.1852736473083496, | |
| "learning_rate": 9.58655322816063e-06, | |
| "loss": 0.2714, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.0935081345758964, | |
| "grad_norm": 0.1990862339735031, | |
| "learning_rate": 9.577722986685992e-06, | |
| "loss": 0.2706, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.0998262517769706, | |
| "grad_norm": 0.19899272918701172, | |
| "learning_rate": 9.568803597425072e-06, | |
| "loss": 0.275, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.1061443689780446, | |
| "grad_norm": 0.18742632865905762, | |
| "learning_rate": 9.559795234076414e-06, | |
| "loss": 0.2721, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.1124624861791186, | |
| "grad_norm": 0.223663330078125, | |
| "learning_rate": 9.550698072071263e-06, | |
| "loss": 0.2716, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.1187806033801928, | |
| "grad_norm": 0.21346202492713928, | |
| "learning_rate": 9.541512288570155e-06, | |
| "loss": 0.274, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.1250987205812668, | |
| "grad_norm": 0.19517794251441956, | |
| "learning_rate": 9.532238062459465e-06, | |
| "loss": 0.2711, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.1314168377823408, | |
| "grad_norm": 0.18628506362438202, | |
| "learning_rate": 9.522875574347917e-06, | |
| "loss": 0.2719, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.137734954983415, | |
| "grad_norm": 0.2409992814064026, | |
| "learning_rate": 9.51342500656308e-06, | |
| "loss": 0.2704, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.144053072184489, | |
| "grad_norm": 0.2048967182636261, | |
| "learning_rate": 9.503886543147804e-06, | |
| "loss": 0.2703, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.150371189385563, | |
| "grad_norm": 0.1800081878900528, | |
| "learning_rate": 9.494260369856649e-06, | |
| "loss": 0.2693, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.1566893065866373, | |
| "grad_norm": 0.1908334493637085, | |
| "learning_rate": 9.484546674152253e-06, | |
| "loss": 0.2705, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.1630074237877113, | |
| "grad_norm": 0.18866339325904846, | |
| "learning_rate": 9.47474564520169e-06, | |
| "loss": 0.2695, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.1693255409887853, | |
| "grad_norm": 0.17103448510169983, | |
| "learning_rate": 9.464857473872788e-06, | |
| "loss": 0.2699, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.1756436581898595, | |
| "grad_norm": 0.1825484037399292, | |
| "learning_rate": 9.454882352730405e-06, | |
| "loss": 0.2702, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.1819617753909335, | |
| "grad_norm": 0.21534956991672516, | |
| "learning_rate": 9.444820476032687e-06, | |
| "loss": 0.2701, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.1882798925920075, | |
| "grad_norm": 0.20504914224147797, | |
| "learning_rate": 9.434672039727275e-06, | |
| "loss": 0.2668, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.1945980097930817, | |
| "grad_norm": 0.1951032131910324, | |
| "learning_rate": 9.424437241447497e-06, | |
| "loss": 0.2681, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.2009161269941557, | |
| "grad_norm": 0.24697691202163696, | |
| "learning_rate": 9.41411628050852e-06, | |
| "loss": 0.2687, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.2072342441952297, | |
| "grad_norm": 0.1977747082710266, | |
| "learning_rate": 9.40370935790346e-06, | |
| "loss": 0.2706, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.213552361396304, | |
| "grad_norm": 0.2046399563550949, | |
| "learning_rate": 9.393216676299481e-06, | |
| "loss": 0.2672, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.219870478597378, | |
| "grad_norm": 0.21050798892974854, | |
| "learning_rate": 9.38263844003383e-06, | |
| "loss": 0.2677, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.226188595798452, | |
| "grad_norm": 0.18349182605743408, | |
| "learning_rate": 9.371974855109876e-06, | |
| "loss": 0.2676, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.2325067129995262, | |
| "grad_norm": 0.2518089711666107, | |
| "learning_rate": 9.361226129193086e-06, | |
| "loss": 0.2659, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.2388248302006002, | |
| "grad_norm": 0.18753299117088318, | |
| "learning_rate": 9.350392471606989e-06, | |
| "loss": 0.2641, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.2451429474016744, | |
| "grad_norm": 0.2322888821363449, | |
| "learning_rate": 9.339474093329094e-06, | |
| "loss": 0.2675, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.2514610646027484, | |
| "grad_norm": 0.19198372960090637, | |
| "learning_rate": 9.328471206986778e-06, | |
| "loss": 0.269, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.2577791818038224, | |
| "grad_norm": 0.1776944249868393, | |
| "learning_rate": 9.317384026853161e-06, | |
| "loss": 0.2673, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.2640972990048964, | |
| "grad_norm": 0.21030068397521973, | |
| "learning_rate": 9.306212768842914e-06, | |
| "loss": 0.2672, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2704154162059706, | |
| "grad_norm": 0.25448349118232727, | |
| "learning_rate": 9.294957650508065e-06, | |
| "loss": 0.2685, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.2767335334070447, | |
| "grad_norm": 0.1928747445344925, | |
| "learning_rate": 9.283618891033764e-06, | |
| "loss": 0.2669, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.2830516506081189, | |
| "grad_norm": 0.19075071811676025, | |
| "learning_rate": 9.272196711234001e-06, | |
| "loss": 0.2658, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.2893697678091929, | |
| "grad_norm": 0.18030743300914764, | |
| "learning_rate": 9.260691333547329e-06, | |
| "loss": 0.269, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.2956878850102669, | |
| "grad_norm": 0.20846770703792572, | |
| "learning_rate": 9.249102982032506e-06, | |
| "loss": 0.268, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.3020060022113409, | |
| "grad_norm": 0.18990422785282135, | |
| "learning_rate": 9.237431882364149e-06, | |
| "loss": 0.2674, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.308324119412415, | |
| "grad_norm": 0.21943022310733795, | |
| "learning_rate": 9.22567826182834e-06, | |
| "loss": 0.2655, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.3146422366134891, | |
| "grad_norm": 0.21548326313495636, | |
| "learning_rate": 9.213842349318185e-06, | |
| "loss": 0.2657, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.3209603538145633, | |
| "grad_norm": 0.18391166627407074, | |
| "learning_rate": 9.201924375329372e-06, | |
| "loss": 0.2663, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.3272784710156373, | |
| "grad_norm": 0.17586641013622284, | |
| "learning_rate": 9.189924571955671e-06, | |
| "loss": 0.2624, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.3335965882167113, | |
| "grad_norm": 0.19197408854961395, | |
| "learning_rate": 9.177843172884423e-06, | |
| "loss": 0.2647, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.3399147054177856, | |
| "grad_norm": 0.21062326431274414, | |
| "learning_rate": 9.165680413391987e-06, | |
| "loss": 0.265, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.3462328226188596, | |
| "grad_norm": 0.19581826031208038, | |
| "learning_rate": 9.153436530339147e-06, | |
| "loss": 0.2638, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.3525509398199338, | |
| "grad_norm": 0.2166038602590561, | |
| "learning_rate": 9.14111176216652e-06, | |
| "loss": 0.2657, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.3588690570210078, | |
| "grad_norm": 0.2010088860988617, | |
| "learning_rate": 9.128706348889895e-06, | |
| "loss": 0.2638, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.3651871742220818, | |
| "grad_norm": 0.2053796499967575, | |
| "learning_rate": 9.116220532095563e-06, | |
| "loss": 0.264, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.3715052914231558, | |
| "grad_norm": 0.17751292884349823, | |
| "learning_rate": 9.10365455493562e-06, | |
| "loss": 0.2653, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.37782340862423, | |
| "grad_norm": 0.22349873185157776, | |
| "learning_rate": 9.091008662123224e-06, | |
| "loss": 0.2642, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.384141525825304, | |
| "grad_norm": 0.1846960186958313, | |
| "learning_rate": 9.078283099927829e-06, | |
| "loss": 0.2653, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.3904596430263783, | |
| "grad_norm": 0.2242564558982849, | |
| "learning_rate": 9.065478116170394e-06, | |
| "loss": 0.2621, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.3967777602274523, | |
| "grad_norm": 0.241655170917511, | |
| "learning_rate": 9.052593960218556e-06, | |
| "loss": 0.2652, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.4030958774285263, | |
| "grad_norm": 0.19567032158374786, | |
| "learning_rate": 9.039630882981769e-06, | |
| "loss": 0.2642, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.4094139946296003, | |
| "grad_norm": 0.21501778066158295, | |
| "learning_rate": 9.026589136906422e-06, | |
| "loss": 0.2625, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.4157321118306745, | |
| "grad_norm": 0.19091379642486572, | |
| "learning_rate": 9.013468975970923e-06, | |
| "loss": 0.2646, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.4220502290317485, | |
| "grad_norm": 0.17913809418678284, | |
| "learning_rate": 9.00027065568075e-06, | |
| "loss": 0.2638, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.4283683462328227, | |
| "grad_norm": 0.18866880238056183, | |
| "learning_rate": 8.986994433063476e-06, | |
| "loss": 0.2634, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.4346864634338967, | |
| "grad_norm": 0.20900848507881165, | |
| "learning_rate": 8.973640566663769e-06, | |
| "loss": 0.2643, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.4410045806349707, | |
| "grad_norm": 0.1879900097846985, | |
| "learning_rate": 8.96020931653835e-06, | |
| "loss": 0.2633, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.4473226978360447, | |
| "grad_norm": 0.17993497848510742, | |
| "learning_rate": 8.946700944250925e-06, | |
| "loss": 0.2628, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.453640815037119, | |
| "grad_norm": 0.2076902538537979, | |
| "learning_rate": 8.93311571286711e-06, | |
| "loss": 0.2629, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.459958932238193, | |
| "grad_norm": 0.24252377450466156, | |
| "learning_rate": 8.919453886949285e-06, | |
| "loss": 0.2625, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.4662770494392672, | |
| "grad_norm": 0.19852754473686218, | |
| "learning_rate": 8.905715732551457e-06, | |
| "loss": 0.263, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.4725951666403412, | |
| "grad_norm": 0.1704029142856598, | |
| "learning_rate": 8.89190151721407e-06, | |
| "loss": 0.2642, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.4789132838414152, | |
| "grad_norm": 0.19873927533626556, | |
| "learning_rate": 8.878011509958804e-06, | |
| "loss": 0.2612, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.4852314010424894, | |
| "grad_norm": 0.1872422695159912, | |
| "learning_rate": 8.864045981283327e-06, | |
| "loss": 0.259, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.4915495182435634, | |
| "grad_norm": 0.20828309655189514, | |
| "learning_rate": 8.850005203156035e-06, | |
| "loss": 0.2614, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.4978676354446376, | |
| "grad_norm": 0.18343457579612732, | |
| "learning_rate": 8.835889449010743e-06, | |
| "loss": 0.2618, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.5041857526457116, | |
| "grad_norm": 0.1891496777534485, | |
| "learning_rate": 8.821698993741381e-06, | |
| "loss": 0.264, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.5105038698467856, | |
| "grad_norm": 0.19773255288600922, | |
| "learning_rate": 8.80743411369662e-06, | |
| "loss": 0.2609, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.5168219870478596, | |
| "grad_norm": 0.20208434760570526, | |
| "learning_rate": 8.7930950866745e-06, | |
| "loss": 0.2632, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.5231401042489339, | |
| "grad_norm": 0.2181108295917511, | |
| "learning_rate": 8.778682191917019e-06, | |
| "loss": 0.2619, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.5294582214500079, | |
| "grad_norm": 0.20136655867099762, | |
| "learning_rate": 8.764195710104699e-06, | |
| "loss": 0.2625, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.535776338651082, | |
| "grad_norm": 0.254148930311203, | |
| "learning_rate": 8.749635923351108e-06, | |
| "loss": 0.2601, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.542094455852156, | |
| "grad_norm": 0.2224704623222351, | |
| "learning_rate": 8.73500311519738e-06, | |
| "loss": 0.2619, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.54841257305323, | |
| "grad_norm": 0.17686180770397186, | |
| "learning_rate": 8.720297570606686e-06, | |
| "loss": 0.2607, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.554730690254304, | |
| "grad_norm": 0.18937917053699493, | |
| "learning_rate": 8.705519575958684e-06, | |
| "loss": 0.2616, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.5610488074553783, | |
| "grad_norm": 0.19412845373153687, | |
| "learning_rate": 8.690669419043945e-06, | |
| "loss": 0.2622, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.5673669246564523, | |
| "grad_norm": 0.19065144658088684, | |
| "learning_rate": 8.675747389058342e-06, | |
| "loss": 0.2615, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.5736850418575266, | |
| "grad_norm": 0.17359939217567444, | |
| "learning_rate": 8.660753776597433e-06, | |
| "loss": 0.261, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.5800031590586006, | |
| "grad_norm": 0.19566282629966736, | |
| "learning_rate": 8.645688873650785e-06, | |
| "loss": 0.2623, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.5863212762596746, | |
| "grad_norm": 0.1743886023759842, | |
| "learning_rate": 8.630552973596294e-06, | |
| "loss": 0.2613, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.5926393934607486, | |
| "grad_norm": 0.20789675414562225, | |
| "learning_rate": 8.615346371194475e-06, | |
| "loss": 0.2603, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.5989575106618228, | |
| "grad_norm": 0.17617076635360718, | |
| "learning_rate": 8.600069362582722e-06, | |
| "loss": 0.2613, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.605275627862897, | |
| "grad_norm": 0.18429051339626312, | |
| "learning_rate": 8.58472224526953e-06, | |
| "loss": 0.2623, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.611593745063971, | |
| "grad_norm": 0.2026170939207077, | |
| "learning_rate": 8.569305318128717e-06, | |
| "loss": 0.2614, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.617911862265045, | |
| "grad_norm": 0.1982942372560501, | |
| "learning_rate": 8.553818881393595e-06, | |
| "loss": 0.2591, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.624229979466119, | |
| "grad_norm": 0.17273586988449097, | |
| "learning_rate": 8.538263236651119e-06, | |
| "loss": 0.2612, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.630548096667193, | |
| "grad_norm": 0.19549575448036194, | |
| "learning_rate": 8.522638686836024e-06, | |
| "loss": 0.259, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.6368662138682673, | |
| "grad_norm": 0.23418502509593964, | |
| "learning_rate": 8.50694553622492e-06, | |
| "loss": 0.2582, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.6431843310693415, | |
| "grad_norm": 0.19169150292873383, | |
| "learning_rate": 8.491184090430365e-06, | |
| "loss": 0.2592, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.6495024482704155, | |
| "grad_norm": 0.20778028666973114, | |
| "learning_rate": 8.475354656394916e-06, | |
| "loss": 0.2624, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.6558205654714895, | |
| "grad_norm": 0.19188308715820312, | |
| "learning_rate": 8.459457542385154e-06, | |
| "loss": 0.2589, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.6621386826725635, | |
| "grad_norm": 0.187831848859787, | |
| "learning_rate": 8.44349305798567e-06, | |
| "loss": 0.2594, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.6684567998736377, | |
| "grad_norm": 0.20327366888523102, | |
| "learning_rate": 8.427461514093056e-06, | |
| "loss": 0.2595, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.6747749170747117, | |
| "grad_norm": 0.19990861415863037, | |
| "learning_rate": 8.411363222909825e-06, | |
| "loss": 0.2582, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.681093034275786, | |
| "grad_norm": 0.19513264298439026, | |
| "learning_rate": 8.395198497938354e-06, | |
| "loss": 0.2587, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.68741115147686, | |
| "grad_norm": 0.18786491453647614, | |
| "learning_rate": 8.378967653974766e-06, | |
| "loss": 0.2561, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.693729268677934, | |
| "grad_norm": 0.2018646001815796, | |
| "learning_rate": 8.362671007102798e-06, | |
| "loss": 0.2582, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.700047385879008, | |
| "grad_norm": 0.17802584171295166, | |
| "learning_rate": 8.34630887468766e-06, | |
| "loss": 0.2584, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.7063655030800822, | |
| "grad_norm": 0.1678951233625412, | |
| "learning_rate": 8.329881575369838e-06, | |
| "loss": 0.2574, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.7126836202811562, | |
| "grad_norm": 0.18521824479103088, | |
| "learning_rate": 8.313389429058895e-06, | |
| "loss": 0.26, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.7190017374822304, | |
| "grad_norm": 0.18977366387844086, | |
| "learning_rate": 8.296832756927245e-06, | |
| "loss": 0.2586, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.7253198546833044, | |
| "grad_norm": 0.19465599954128265, | |
| "learning_rate": 8.280211881403892e-06, | |
| "loss": 0.2599, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.7316379718843784, | |
| "grad_norm": 0.20573335886001587, | |
| "learning_rate": 8.263527126168156e-06, | |
| "loss": 0.2582, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.7379560890854524, | |
| "grad_norm": 0.18216483294963837, | |
| "learning_rate": 8.246778816143365e-06, | |
| "loss": 0.2594, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.7442742062865266, | |
| "grad_norm": 0.1724158674478531, | |
| "learning_rate": 8.229967277490533e-06, | |
| "loss": 0.2585, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.7505923234876009, | |
| "grad_norm": 0.22212329506874084, | |
| "learning_rate": 8.213092837602004e-06, | |
| "loss": 0.2587, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.7569104406886749, | |
| "grad_norm": 0.21226562559604645, | |
| "learning_rate": 8.196155825095073e-06, | |
| "loss": 0.2592, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.7632285578897489, | |
| "grad_norm": 0.1901644766330719, | |
| "learning_rate": 8.179156569805597e-06, | |
| "loss": 0.2584, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.7695466750908229, | |
| "grad_norm": 0.1988213062286377, | |
| "learning_rate": 8.16209540278156e-06, | |
| "loss": 0.2595, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.7758647922918969, | |
| "grad_norm": 0.1761639416217804, | |
| "learning_rate": 8.144972656276637e-06, | |
| "loss": 0.2576, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.782182909492971, | |
| "grad_norm": 0.2082483023405075, | |
| "learning_rate": 8.127788663743712e-06, | |
| "loss": 0.2576, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.7885010266940453, | |
| "grad_norm": 0.17774218320846558, | |
| "learning_rate": 8.110543759828395e-06, | |
| "loss": 0.2574, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.7948191438951193, | |
| "grad_norm": 0.18034055829048157, | |
| "learning_rate": 8.0932382803625e-06, | |
| "loss": 0.2572, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.8011372610961933, | |
| "grad_norm": 0.21685677766799927, | |
| "learning_rate": 8.075872562357502e-06, | |
| "loss": 0.2585, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.8074553782972673, | |
| "grad_norm": 0.18717004358768463, | |
| "learning_rate": 8.058446943997977e-06, | |
| "loss": 0.258, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.8137734954983415, | |
| "grad_norm": 0.1846955120563507, | |
| "learning_rate": 8.040961764635025e-06, | |
| "loss": 0.2573, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.8200916126994156, | |
| "grad_norm": 0.17588602006435394, | |
| "learning_rate": 8.02341736477964e-06, | |
| "loss": 0.2585, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.8264097299004898, | |
| "grad_norm": 0.16006359457969666, | |
| "learning_rate": 8.0058140860961e-06, | |
| "loss": 0.2581, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.8327278471015638, | |
| "grad_norm": 0.20451048016548157, | |
| "learning_rate": 7.988152271395304e-06, | |
| "loss": 0.2569, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.8390459643026378, | |
| "grad_norm": 0.22039860486984253, | |
| "learning_rate": 7.970432264628094e-06, | |
| "loss": 0.2548, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.8453640815037118, | |
| "grad_norm": 0.20109356939792633, | |
| "learning_rate": 7.95265441087856e-06, | |
| "loss": 0.2557, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.851682198704786, | |
| "grad_norm": 0.18628036975860596, | |
| "learning_rate": 7.934819056357321e-06, | |
| "loss": 0.255, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.85800031590586, | |
| "grad_norm": 0.17076027393341064, | |
| "learning_rate": 7.916926548394783e-06, | |
| "loss": 0.2575, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.8643184331069342, | |
| "grad_norm": 0.1676408052444458, | |
| "learning_rate": 7.898977235434368e-06, | |
| "loss": 0.2569, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.8706365503080082, | |
| "grad_norm": 0.18232934176921844, | |
| "learning_rate": 7.88097146702574e-06, | |
| "loss": 0.2548, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.8769546675090822, | |
| "grad_norm": 0.1734633445739746, | |
| "learning_rate": 7.862909593817984e-06, | |
| "loss": 0.2568, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.8832727847101562, | |
| "grad_norm": 0.17797045409679413, | |
| "learning_rate": 7.844791967552792e-06, | |
| "loss": 0.2586, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.8895909019112305, | |
| "grad_norm": 0.19380344450473785, | |
| "learning_rate": 7.826618941057597e-06, | |
| "loss": 0.2567, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.8959090191123047, | |
| "grad_norm": 0.20007206499576569, | |
| "learning_rate": 7.808390868238723e-06, | |
| "loss": 0.2575, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.9022271363133787, | |
| "grad_norm": 0.18448038399219513, | |
| "learning_rate": 7.790108104074468e-06, | |
| "loss": 0.2574, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.9085452535144527, | |
| "grad_norm": 0.17711378633975983, | |
| "learning_rate": 7.77177100460821e-06, | |
| "loss": 0.2578, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.9148633707155267, | |
| "grad_norm": 0.18232811987400055, | |
| "learning_rate": 7.753379926941468e-06, | |
| "loss": 0.2577, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.9211814879166007, | |
| "grad_norm": 0.1973661184310913, | |
| "learning_rate": 7.734935229226945e-06, | |
| "loss": 0.254, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.927499605117675, | |
| "grad_norm": 0.17610979080200195, | |
| "learning_rate": 7.716437270661552e-06, | |
| "loss": 0.2541, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.9338177223187492, | |
| "grad_norm": 0.18116143345832825, | |
| "learning_rate": 7.697886411479422e-06, | |
| "loss": 0.2562, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.9401358395198232, | |
| "grad_norm": 0.19937658309936523, | |
| "learning_rate": 7.679283012944887e-06, | |
| "loss": 0.2565, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.9464539567208972, | |
| "grad_norm": 0.17094001173973083, | |
| "learning_rate": 7.660627437345438e-06, | |
| "loss": 0.2546, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.9527720739219712, | |
| "grad_norm": 0.17260311543941498, | |
| "learning_rate": 7.641920047984683e-06, | |
| "loss": 0.2535, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.9590901911230452, | |
| "grad_norm": 0.16419674456119537, | |
| "learning_rate": 7.6231612091752625e-06, | |
| "loss": 0.2574, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.9654083083241194, | |
| "grad_norm": 0.17597036063671112, | |
| "learning_rate": 7.604351286231759e-06, | |
| "loss": 0.2538, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.9717264255251936, | |
| "grad_norm": 0.19706901907920837, | |
| "learning_rate": 7.585490645463574e-06, | |
| "loss": 0.2525, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.9780445427262676, | |
| "grad_norm": 0.16717633605003357, | |
| "learning_rate": 7.5665796541678106e-06, | |
| "loss": 0.2561, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.9843626599273416, | |
| "grad_norm": 0.18098637461662292, | |
| "learning_rate": 7.547618680622104e-06, | |
| "loss": 0.2538, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.9906807771284156, | |
| "grad_norm": 0.19447918236255646, | |
| "learning_rate": 7.528608094077464e-06, | |
| "loss": 0.2556, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.9969988943294898, | |
| "grad_norm": 0.21584630012512207, | |
| "learning_rate": 7.50954826475107e-06, | |
| "loss": 0.2532, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.0037908703206444, | |
| "grad_norm": 0.18063998222351074, | |
| "learning_rate": 7.490439563819073e-06, | |
| "loss": 0.2674, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.0101089875217184, | |
| "grad_norm": 0.20729950070381165, | |
| "learning_rate": 7.4712823634093605e-06, | |
| "loss": 0.2439, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.0164271047227924, | |
| "grad_norm": 0.16232196986675262, | |
| "learning_rate": 7.452077036594311e-06, | |
| "loss": 0.245, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.022745221923867, | |
| "grad_norm": 0.172638937830925, | |
| "learning_rate": 7.432823957383533e-06, | |
| "loss": 0.245, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.029063339124941, | |
| "grad_norm": 0.16291241347789764, | |
| "learning_rate": 7.413523500716571e-06, | |
| "loss": 0.2437, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.035381456326015, | |
| "grad_norm": 0.1787315011024475, | |
| "learning_rate": 7.394176042455619e-06, | |
| "loss": 0.2467, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.041699573527089, | |
| "grad_norm": 0.19181819260120392, | |
| "learning_rate": 7.374781959378185e-06, | |
| "loss": 0.2449, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.048017690728163, | |
| "grad_norm": 0.17782440781593323, | |
| "learning_rate": 7.355341629169768e-06, | |
| "loss": 0.2457, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.0543358079292373, | |
| "grad_norm": 0.18428935110569, | |
| "learning_rate": 7.335855430416489e-06, | |
| "loss": 0.2475, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.0606539251303113, | |
| "grad_norm": 0.16668711602687836, | |
| "learning_rate": 7.3163237425977305e-06, | |
| "loss": 0.2442, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.0669720423313853, | |
| "grad_norm": 0.20328602194786072, | |
| "learning_rate": 7.296746946078737e-06, | |
| "loss": 0.2428, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.0732901595324593, | |
| "grad_norm": 0.17452338337898254, | |
| "learning_rate": 7.277125422103213e-06, | |
| "loss": 0.2434, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.0796082767335333, | |
| "grad_norm": 0.19674983620643616, | |
| "learning_rate": 7.2574595527859e-06, | |
| "loss": 0.2459, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.0859263939346073, | |
| "grad_norm": 0.16700546443462372, | |
| "learning_rate": 7.23774972110513e-06, | |
| "loss": 0.2441, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.0922445111356818, | |
| "grad_norm": 0.1824389100074768, | |
| "learning_rate": 7.217996310895367e-06, | |
| "loss": 0.2447, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.0985626283367558, | |
| "grad_norm": 0.1628822386264801, | |
| "learning_rate": 7.19819970683974e-06, | |
| "loss": 0.245, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.10488074553783, | |
| "grad_norm": 0.19150730967521667, | |
| "learning_rate": 7.178360294462545e-06, | |
| "loss": 0.2439, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.111198862738904, | |
| "grad_norm": 0.1673995554447174, | |
| "learning_rate": 7.158478460121735e-06, | |
| "loss": 0.2442, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.117516979939978, | |
| "grad_norm": 0.19296851754188538, | |
| "learning_rate": 7.138554591001405e-06, | |
| "loss": 0.246, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.123835097141052, | |
| "grad_norm": 0.17618988454341888, | |
| "learning_rate": 7.118589075104243e-06, | |
| "loss": 0.2418, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.1301532143421262, | |
| "grad_norm": 0.19375811517238617, | |
| "learning_rate": 7.0985823012439745e-06, | |
| "loss": 0.2429, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.1364713315432002, | |
| "grad_norm": 0.20015262067317963, | |
| "learning_rate": 7.078534659037801e-06, | |
| "loss": 0.2439, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.1427894487442742, | |
| "grad_norm": 0.1756194531917572, | |
| "learning_rate": 7.0584465388988e-06, | |
| "loss": 0.2441, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.1491075659453482, | |
| "grad_norm": 0.18751130998134613, | |
| "learning_rate": 7.038318332028326e-06, | |
| "loss": 0.2442, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.1554256831464222, | |
| "grad_norm": 0.16298574209213257, | |
| "learning_rate": 7.018150430408394e-06, | |
| "loss": 0.2447, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.1617438003474962, | |
| "grad_norm": 0.20823705196380615, | |
| "learning_rate": 6.997943226794051e-06, | |
| "loss": 0.2441, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.1680619175485707, | |
| "grad_norm": 0.19422686100006104, | |
| "learning_rate": 6.97769711470571e-06, | |
| "loss": 0.2432, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.1743800347496447, | |
| "grad_norm": 0.16952840983867645, | |
| "learning_rate": 6.95741248842151e-06, | |
| "loss": 0.2443, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.1806981519507187, | |
| "grad_norm": 0.17325712740421295, | |
| "learning_rate": 6.937089742969615e-06, | |
| "loss": 0.2441, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.1870162691517927, | |
| "grad_norm": 0.1852918565273285, | |
| "learning_rate": 6.916729274120539e-06, | |
| "loss": 0.2465, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.1933343863528667, | |
| "grad_norm": 0.16571369767189026, | |
| "learning_rate": 6.896331478379429e-06, | |
| "loss": 0.2434, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.199652503553941, | |
| "grad_norm": 0.18638812005519867, | |
| "learning_rate": 6.875896752978345e-06, | |
| "loss": 0.2461, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.205970620755015, | |
| "grad_norm": 0.18144486844539642, | |
| "learning_rate": 6.855425495868524e-06, | |
| "loss": 0.2438, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.212288737956089, | |
| "grad_norm": 0.1876654475927353, | |
| "learning_rate": 6.834918105712638e-06, | |
| "loss": 0.244, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.218606855157163, | |
| "grad_norm": 0.18819020688533783, | |
| "learning_rate": 6.814374981877013e-06, | |
| "loss": 0.2432, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.224924972358237, | |
| "grad_norm": 0.1788501888513565, | |
| "learning_rate": 6.793796524423868e-06, | |
| "loss": 0.245, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.231243089559311, | |
| "grad_norm": 0.19036491215229034, | |
| "learning_rate": 6.773183134103522e-06, | |
| "loss": 0.2428, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.2375612067603856, | |
| "grad_norm": 0.18438424170017242, | |
| "learning_rate": 6.752535212346576e-06, | |
| "loss": 0.2422, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.2438793239614596, | |
| "grad_norm": 0.16770315170288086, | |
| "learning_rate": 6.7318531612561145e-06, | |
| "loss": 0.2426, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.2501974411625336, | |
| "grad_norm": 0.1698455810546875, | |
| "learning_rate": 6.711137383599859e-06, | |
| "loss": 0.2441, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.2565155583636076, | |
| "grad_norm": 0.16267286241054535, | |
| "learning_rate": 6.690388282802338e-06, | |
| "loss": 0.2435, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.2628336755646816, | |
| "grad_norm": 0.19407695531845093, | |
| "learning_rate": 6.6696062629370155e-06, | |
| "loss": 0.2417, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.2691517927657556, | |
| "grad_norm": 0.20387399196624756, | |
| "learning_rate": 6.648791728718436e-06, | |
| "loss": 0.2407, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.27546990996683, | |
| "grad_norm": 0.17418253421783447, | |
| "learning_rate": 6.627945085494335e-06, | |
| "loss": 0.2451, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.281788027167904, | |
| "grad_norm": 0.1878381371498108, | |
| "learning_rate": 6.607066739237748e-06, | |
| "loss": 0.2442, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.288106144368978, | |
| "grad_norm": 0.16501325368881226, | |
| "learning_rate": 6.586157096539105e-06, | |
| "loss": 0.2427, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.294424261570052, | |
| "grad_norm": 0.17008960247039795, | |
| "learning_rate": 6.565216564598307e-06, | |
| "loss": 0.2459, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.300742378771126, | |
| "grad_norm": 0.167978435754776, | |
| "learning_rate": 6.544245551216804e-06, | |
| "loss": 0.2416, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.3070604959722, | |
| "grad_norm": 0.17641465365886688, | |
| "learning_rate": 6.5232444647896465e-06, | |
| "loss": 0.2435, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.3133786131732745, | |
| "grad_norm": 0.1629774123430252, | |
| "learning_rate": 6.50221371429754e-06, | |
| "loss": 0.244, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.3196967303743485, | |
| "grad_norm": 0.1710384041070938, | |
| "learning_rate": 6.481153709298872e-06, | |
| "loss": 0.2437, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.3260148475754225, | |
| "grad_norm": 0.1770370900630951, | |
| "learning_rate": 6.4600648599217394e-06, | |
| "loss": 0.2421, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.3323329647764965, | |
| "grad_norm": 0.17405395209789276, | |
| "learning_rate": 6.4389475768559675e-06, | |
| "loss": 0.2414, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.3386510819775705, | |
| "grad_norm": 0.1998765915632248, | |
| "learning_rate": 6.417802271345102e-06, | |
| "loss": 0.2416, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.344969199178645, | |
| "grad_norm": 0.18685515224933624, | |
| "learning_rate": 6.3966293551784035e-06, | |
| "loss": 0.2431, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.351287316379719, | |
| "grad_norm": 0.17079129815101624, | |
| "learning_rate": 6.375429240682837e-06, | |
| "loss": 0.2423, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.357605433580793, | |
| "grad_norm": 0.18592600524425507, | |
| "learning_rate": 6.354202340715027e-06, | |
| "loss": 0.2419, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.363923550781867, | |
| "grad_norm": 0.17736919224262238, | |
| "learning_rate": 6.332949068653229e-06, | |
| "loss": 0.2424, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.370241667982941, | |
| "grad_norm": 0.1869024783372879, | |
| "learning_rate": 6.311669838389279e-06, | |
| "loss": 0.2446, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.376559785184015, | |
| "grad_norm": 0.17358314990997314, | |
| "learning_rate": 6.290365064320521e-06, | |
| "loss": 0.2425, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.382877902385089, | |
| "grad_norm": 0.16948603093624115, | |
| "learning_rate": 6.2690351613417545e-06, | |
| "loss": 0.2441, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.3891960195861635, | |
| "grad_norm": 0.16800999641418457, | |
| "learning_rate": 6.247680544837142e-06, | |
| "loss": 0.2425, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.3955141367872375, | |
| "grad_norm": 0.17783384025096893, | |
| "learning_rate": 6.226301630672127e-06, | |
| "loss": 0.2437, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.4018322539883115, | |
| "grad_norm": 0.16958226263523102, | |
| "learning_rate": 6.204898835185325e-06, | |
| "loss": 0.2435, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.4081503711893855, | |
| "grad_norm": 0.19137728214263916, | |
| "learning_rate": 6.18347257518043e-06, | |
| "loss": 0.2442, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.4144684883904595, | |
| "grad_norm": 0.1784157156944275, | |
| "learning_rate": 6.162023267918086e-06, | |
| "loss": 0.2421, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.420786605591534, | |
| "grad_norm": 0.15680409967899323, | |
| "learning_rate": 6.140551331107767e-06, | |
| "loss": 0.2421, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.427104722792608, | |
| "grad_norm": 0.18923278152942657, | |
| "learning_rate": 6.1190571828996425e-06, | |
| "loss": 0.241, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.433422839993682, | |
| "grad_norm": 0.2097504884004593, | |
| "learning_rate": 6.097541241876428e-06, | |
| "loss": 0.243, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.439740957194756, | |
| "grad_norm": 0.18435165286064148, | |
| "learning_rate": 6.076003927045242e-06, | |
| "loss": 0.2427, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.44605907439583, | |
| "grad_norm": 0.181401789188385, | |
| "learning_rate": 6.05444565782944e-06, | |
| "loss": 0.2416, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.452377191596904, | |
| "grad_norm": 0.17077374458312988, | |
| "learning_rate": 6.032866854060451e-06, | |
| "loss": 0.2435, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.4586953087979784, | |
| "grad_norm": 0.18238386511802673, | |
| "learning_rate": 6.011267935969596e-06, | |
| "loss": 0.2424, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.4650134259990524, | |
| "grad_norm": 0.18740853667259216, | |
| "learning_rate": 5.9896493241799115e-06, | |
| "loss": 0.2415, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.4713315432001264, | |
| "grad_norm": 0.1816156506538391, | |
| "learning_rate": 5.968011439697951e-06, | |
| "loss": 0.2432, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.4776496604012004, | |
| "grad_norm": 0.16910015046596527, | |
| "learning_rate": 5.946354703905591e-06, | |
| "loss": 0.243, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.4839677776022744, | |
| "grad_norm": 0.1906070113182068, | |
| "learning_rate": 5.924679538551825e-06, | |
| "loss": 0.2416, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.490285894803349, | |
| "grad_norm": 0.1867346614599228, | |
| "learning_rate": 5.902986365744544e-06, | |
| "loss": 0.2437, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.496604012004423, | |
| "grad_norm": 0.187602698802948, | |
| "learning_rate": 5.881275607942325e-06, | |
| "loss": 0.2408, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.502922129205497, | |
| "grad_norm": 0.1724424809217453, | |
| "learning_rate": 5.859547687946199e-06, | |
| "loss": 0.2426, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.509240246406571, | |
| "grad_norm": 0.1793140023946762, | |
| "learning_rate": 5.837803028891418e-06, | |
| "loss": 0.2425, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.515558363607645, | |
| "grad_norm": 0.17329296469688416, | |
| "learning_rate": 5.816042054239212e-06, | |
| "loss": 0.2441, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.521876480808719, | |
| "grad_norm": 0.22843770682811737, | |
| "learning_rate": 5.794265187768551e-06, | |
| "loss": 0.241, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.528194598009793, | |
| "grad_norm": 0.1654650717973709, | |
| "learning_rate": 5.772472853567882e-06, | |
| "loss": 0.2426, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.5345127152108673, | |
| "grad_norm": 0.17043884098529816, | |
| "learning_rate": 5.750665476026875e-06, | |
| "loss": 0.2406, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.5408308324119413, | |
| "grad_norm": 0.16985023021697998, | |
| "learning_rate": 5.728843479828161e-06, | |
| "loss": 0.2401, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.5471489496130153, | |
| "grad_norm": 0.17778819799423218, | |
| "learning_rate": 5.707007289939055e-06, | |
| "loss": 0.2441, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.5534670668140893, | |
| "grad_norm": 0.1612013876438141, | |
| "learning_rate": 5.6851573316032845e-06, | |
| "loss": 0.2399, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.5597851840151638, | |
| "grad_norm": 0.17063820362091064, | |
| "learning_rate": 5.66329403033271e-06, | |
| "loss": 0.2412, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.5661033012162378, | |
| "grad_norm": 0.16587677597999573, | |
| "learning_rate": 5.641417811899033e-06, | |
| "loss": 0.239, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.5724214184173118, | |
| "grad_norm": 0.17766372859477997, | |
| "learning_rate": 5.619529102325507e-06, | |
| "loss": 0.2411, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.5787395356183858, | |
| "grad_norm": 0.18175509572029114, | |
| "learning_rate": 5.597628327878645e-06, | |
| "loss": 0.242, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.5850576528194598, | |
| "grad_norm": 0.16519029438495636, | |
| "learning_rate": 5.575715915059909e-06, | |
| "loss": 0.2425, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.5913757700205338, | |
| "grad_norm": 0.17657625675201416, | |
| "learning_rate": 5.553792290597414e-06, | |
| "loss": 0.2406, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.5976938872216078, | |
| "grad_norm": 0.17835581302642822, | |
| "learning_rate": 5.531857881437612e-06, | |
| "loss": 0.2412, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.6040120044226818, | |
| "grad_norm": 0.2040930986404419, | |
| "learning_rate": 5.509913114736981e-06, | |
| "loss": 0.2389, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.610330121623756, | |
| "grad_norm": 0.17634861171245575, | |
| "learning_rate": 5.487958417853699e-06, | |
| "loss": 0.2409, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.61664823882483, | |
| "grad_norm": 0.16980887949466705, | |
| "learning_rate": 5.465994218339333e-06, | |
| "loss": 0.2397, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.6229663560259042, | |
| "grad_norm": 0.16278938949108124, | |
| "learning_rate": 5.444020943930506e-06, | |
| "loss": 0.2419, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.6292844732269782, | |
| "grad_norm": 0.18307939171791077, | |
| "learning_rate": 5.4220390225405606e-06, | |
| "loss": 0.241, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.6356025904280527, | |
| "grad_norm": 0.16562727093696594, | |
| "learning_rate": 5.400048882251245e-06, | |
| "loss": 0.2391, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.6419207076291267, | |
| "grad_norm": 0.18560691177845, | |
| "learning_rate": 5.378050951304356e-06, | |
| "loss": 0.2417, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.6482388248302007, | |
| "grad_norm": 0.18558987975120544, | |
| "learning_rate": 5.3560456580934085e-06, | |
| "loss": 0.2415, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.6545569420312747, | |
| "grad_norm": 0.16538389027118683, | |
| "learning_rate": 5.334033431155294e-06, | |
| "loss": 0.2423, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.6608750592323487, | |
| "grad_norm": 0.17581807076931, | |
| "learning_rate": 5.312014699161935e-06, | |
| "loss": 0.2402, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.6671931764334227, | |
| "grad_norm": 0.18032985925674438, | |
| "learning_rate": 5.289989890911928e-06, | |
| "loss": 0.2421, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.6735112936344967, | |
| "grad_norm": 0.18549709022045135, | |
| "learning_rate": 5.267959435322209e-06, | |
| "loss": 0.2413, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.679829410835571, | |
| "grad_norm": 0.1603822559118271, | |
| "learning_rate": 5.245923761419688e-06, | |
| "loss": 0.2407, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.686147528036645, | |
| "grad_norm": 0.17524629831314087, | |
| "learning_rate": 5.223883298332894e-06, | |
| "loss": 0.2395, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.692465645237719, | |
| "grad_norm": 0.16933143138885498, | |
| "learning_rate": 5.20183847528363e-06, | |
| "loss": 0.2387, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.698783762438793, | |
| "grad_norm": 0.17397332191467285, | |
| "learning_rate": 5.179789721578597e-06, | |
| "loss": 0.2392, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.7051018796398676, | |
| "grad_norm": 0.1716376543045044, | |
| "learning_rate": 5.157737466601049e-06, | |
| "loss": 0.2412, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.7114199968409416, | |
| "grad_norm": 0.17333756387233734, | |
| "learning_rate": 5.135682139802422e-06, | |
| "loss": 0.241, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.7177381140420156, | |
| "grad_norm": 0.1601376235485077, | |
| "learning_rate": 5.113624170693977e-06, | |
| "loss": 0.2423, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.7240562312430896, | |
| "grad_norm": 0.18671631813049316, | |
| "learning_rate": 5.091563988838425e-06, | |
| "loss": 0.2396, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.7303743484441636, | |
| "grad_norm": 0.17103099822998047, | |
| "learning_rate": 5.069502023841576e-06, | |
| "loss": 0.2399, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.7366924656452376, | |
| "grad_norm": 0.17045724391937256, | |
| "learning_rate": 5.047438705343961e-06, | |
| "loss": 0.2407, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.7430105828463116, | |
| "grad_norm": 0.171345517039299, | |
| "learning_rate": 5.025374463012472e-06, | |
| "loss": 0.2411, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.7493287000473856, | |
| "grad_norm": 0.16573168337345123, | |
| "learning_rate": 5.00330972653199e-06, | |
| "loss": 0.2394, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.75564681724846, | |
| "grad_norm": 0.16506439447402954, | |
| "learning_rate": 4.981244925597018e-06, | |
| "loss": 0.24, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.761964934449534, | |
| "grad_norm": 0.17510944604873657, | |
| "learning_rate": 4.959180489903318e-06, | |
| "loss": 0.2406, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.768283051650608, | |
| "grad_norm": 0.17315103113651276, | |
| "learning_rate": 4.937116849139538e-06, | |
| "loss": 0.2407, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.774601168851682, | |
| "grad_norm": 0.17643538117408752, | |
| "learning_rate": 4.915054432978842e-06, | |
| "loss": 0.2407, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.7809192860527565, | |
| "grad_norm": 0.1600533127784729, | |
| "learning_rate": 4.89299367107055e-06, | |
| "loss": 0.2407, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.7872374032538305, | |
| "grad_norm": 0.1802552044391632, | |
| "learning_rate": 4.870934993031763e-06, | |
| "loss": 0.2419, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.7935555204549045, | |
| "grad_norm": 0.1862618327140808, | |
| "learning_rate": 4.848878828439008e-06, | |
| "loss": 0.2411, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.7998736376559785, | |
| "grad_norm": 0.17863595485687256, | |
| "learning_rate": 4.8268256068198525e-06, | |
| "loss": 0.242, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.8061917548570525, | |
| "grad_norm": 0.1779400110244751, | |
| "learning_rate": 4.804775757644558e-06, | |
| "loss": 0.241, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.8125098720581265, | |
| "grad_norm": 0.16401080787181854, | |
| "learning_rate": 4.782729710317713e-06, | |
| "loss": 0.2412, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.8188279892592005, | |
| "grad_norm": 0.16927611827850342, | |
| "learning_rate": 4.760687894169867e-06, | |
| "loss": 0.2385, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.825146106460275, | |
| "grad_norm": 0.1770433932542801, | |
| "learning_rate": 4.738650738449161e-06, | |
| "loss": 0.2379, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.831464223661349, | |
| "grad_norm": 0.16366536915302277, | |
| "learning_rate": 4.7166186723129895e-06, | |
| "loss": 0.2409, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.837782340862423, | |
| "grad_norm": 0.15729236602783203, | |
| "learning_rate": 4.694592124819628e-06, | |
| "loss": 0.2408, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.844100458063497, | |
| "grad_norm": 0.16710855066776276, | |
| "learning_rate": 4.672571524919875e-06, | |
| "loss": 0.2404, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.8504185752645714, | |
| "grad_norm": 0.15631146728992462, | |
| "learning_rate": 4.65055730144871e-06, | |
| "loss": 0.2385, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.8567366924656454, | |
| "grad_norm": 0.15912912786006927, | |
| "learning_rate": 4.628549883116933e-06, | |
| "loss": 0.2404, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.8630548096667194, | |
| "grad_norm": 0.1697085201740265, | |
| "learning_rate": 4.606549698502824e-06, | |
| "loss": 0.238, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.8693729268677934, | |
| "grad_norm": 0.1617184579372406, | |
| "learning_rate": 4.584557176043782e-06, | |
| "loss": 0.2386, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.8756910440688674, | |
| "grad_norm": 0.16806644201278687, | |
| "learning_rate": 4.562572744028e-06, | |
| "loss": 0.2396, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.8820091612699414, | |
| "grad_norm": 0.15261489152908325, | |
| "learning_rate": 4.540596830586113e-06, | |
| "loss": 0.2398, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.8883272784710154, | |
| "grad_norm": 0.16236743330955505, | |
| "learning_rate": 4.518629863682861e-06, | |
| "loss": 0.2404, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.8946453956720894, | |
| "grad_norm": 0.16257306933403015, | |
| "learning_rate": 4.496672271108758e-06, | |
| "loss": 0.2381, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.900963512873164, | |
| "grad_norm": 0.1624518632888794, | |
| "learning_rate": 4.474724480471762e-06, | |
| "loss": 0.2422, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.907281630074238, | |
| "grad_norm": 0.1868724673986435, | |
| "learning_rate": 4.452786919188943e-06, | |
| "loss": 0.2387, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.913599747275312, | |
| "grad_norm": 0.15944679081439972, | |
| "learning_rate": 4.430860014478162e-06, | |
| "loss": 0.2375, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.919917864476386, | |
| "grad_norm": 0.16003431379795074, | |
| "learning_rate": 4.40894419334975e-06, | |
| "loss": 0.2397, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.9262359816774604, | |
| "grad_norm": 0.17390407621860504, | |
| "learning_rate": 4.387039882598198e-06, | |
| "loss": 0.2399, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.9325540988785344, | |
| "grad_norm": 0.17524614930152893, | |
| "learning_rate": 4.365147508793839e-06, | |
| "loss": 0.2387, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.9388722160796084, | |
| "grad_norm": 0.17224489152431488, | |
| "learning_rate": 4.343267498274535e-06, | |
| "loss": 0.2399, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.9451903332806824, | |
| "grad_norm": 0.17266203463077545, | |
| "learning_rate": 4.321400277137395e-06, | |
| "loss": 0.2376, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.9515084504817564, | |
| "grad_norm": 0.15991342067718506, | |
| "learning_rate": 4.299546271230457e-06, | |
| "loss": 0.2367, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.9578265676828304, | |
| "grad_norm": 0.15629249811172485, | |
| "learning_rate": 4.277705906144399e-06, | |
| "loss": 0.2386, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.9641446848839044, | |
| "grad_norm": 0.16798162460327148, | |
| "learning_rate": 4.255879607204262e-06, | |
| "loss": 0.2387, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.970462802084979, | |
| "grad_norm": 0.16710205376148224, | |
| "learning_rate": 4.234067799461153e-06, | |
| "loss": 0.24, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.976780919286053, | |
| "grad_norm": 0.16920699179172516, | |
| "learning_rate": 4.212270907683979e-06, | |
| "loss": 0.2415, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.983099036487127, | |
| "grad_norm": 0.1665589064359665, | |
| "learning_rate": 4.190489356351163e-06, | |
| "loss": 0.2395, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.989417153688201, | |
| "grad_norm": 0.1775292009115219, | |
| "learning_rate": 4.168723569642388e-06, | |
| "loss": 0.2377, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.9957352708892753, | |
| "grad_norm": 0.1743878573179245, | |
| "learning_rate": 4.146973971430333e-06, | |
| "loss": 0.2384, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 3.0025272468804296, | |
| "grad_norm": 0.17591196298599243, | |
| "learning_rate": 4.125240985272419e-06, | |
| "loss": 0.2507, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.0088453640815036, | |
| "grad_norm": 0.17899581789970398, | |
| "learning_rate": 4.103525034402554e-06, | |
| "loss": 0.2312, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 3.0151634812825776, | |
| "grad_norm": 0.16859206557273865, | |
| "learning_rate": 4.0818265417228995e-06, | |
| "loss": 0.2318, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.021481598483652, | |
| "grad_norm": 0.16476421058177948, | |
| "learning_rate": 4.060145929795635e-06, | |
| "loss": 0.2291, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 3.027799715684726, | |
| "grad_norm": 0.16536416113376617, | |
| "learning_rate": 4.03848362083472e-06, | |
| "loss": 0.2316, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 3.0341178328858, | |
| "grad_norm": 0.1791021227836609, | |
| "learning_rate": 4.01684003669768e-06, | |
| "loss": 0.2297, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.040435950086874, | |
| "grad_norm": 0.16363908350467682, | |
| "learning_rate": 3.9952155988773876e-06, | |
| "loss": 0.2309, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 3.046754067287948, | |
| "grad_norm": 0.16255658864974976, | |
| "learning_rate": 3.973610728493859e-06, | |
| "loss": 0.2297, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 3.053072184489022, | |
| "grad_norm": 0.16152887046337128, | |
| "learning_rate": 3.952025846286039e-06, | |
| "loss": 0.2297, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 3.0593903016900965, | |
| "grad_norm": 0.16641443967819214, | |
| "learning_rate": 3.930461372603627e-06, | |
| "loss": 0.2331, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 3.0657084188911705, | |
| "grad_norm": 0.1551787257194519, | |
| "learning_rate": 3.9089177273988776e-06, | |
| "loss": 0.2297, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.0720265360922445, | |
| "grad_norm": 0.16921547055244446, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.2312, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 3.0783446532933185, | |
| "grad_norm": 0.16538488864898682, | |
| "learning_rate": 3.865894600195123e-06, | |
| "loss": 0.2292, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 3.0846627704943925, | |
| "grad_norm": 0.16358362138271332, | |
| "learning_rate": 3.844415956039856e-06, | |
| "loss": 0.2314, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 3.0909808876954665, | |
| "grad_norm": 0.161546528339386, | |
| "learning_rate": 3.822959816033417e-06, | |
| "loss": 0.2298, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 3.097299004896541, | |
| "grad_norm": 0.16105768084526062, | |
| "learning_rate": 3.80152659801834e-06, | |
| "loss": 0.2309, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.103617122097615, | |
| "grad_norm": 0.16659840941429138, | |
| "learning_rate": 3.7801167193907746e-06, | |
| "loss": 0.232, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 3.109935239298689, | |
| "grad_norm": 0.1748570203781128, | |
| "learning_rate": 3.7587305970923495e-06, | |
| "loss": 0.2314, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 3.116253356499763, | |
| "grad_norm": 0.18045374751091003, | |
| "learning_rate": 3.73736864760206e-06, | |
| "loss": 0.2298, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 3.122571473700837, | |
| "grad_norm": 0.1649467945098877, | |
| "learning_rate": 3.7160312869281476e-06, | |
| "loss": 0.2317, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 3.1288895909019114, | |
| "grad_norm": 0.16685360670089722, | |
| "learning_rate": 3.694718930600012e-06, | |
| "loss": 0.2282, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.1352077081029854, | |
| "grad_norm": 0.1727149933576584, | |
| "learning_rate": 3.673431993660106e-06, | |
| "loss": 0.2291, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 3.1415258253040594, | |
| "grad_norm": 0.17158806324005127, | |
| "learning_rate": 3.6521708906558653e-06, | |
| "loss": 0.2308, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.1478439425051334, | |
| "grad_norm": 0.16147060692310333, | |
| "learning_rate": 3.6309360356316183e-06, | |
| "loss": 0.2297, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 3.1541620597062074, | |
| "grad_norm": 0.1733555942773819, | |
| "learning_rate": 3.6097278421205408e-06, | |
| "loss": 0.2293, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 3.1604801769072814, | |
| "grad_norm": 0.15878255665302277, | |
| "learning_rate": 3.588546723136598e-06, | |
| "loss": 0.2309, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.166798294108356, | |
| "grad_norm": 0.1642056703567505, | |
| "learning_rate": 3.567393091166489e-06, | |
| "loss": 0.2292, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 3.17311641130943, | |
| "grad_norm": 0.16453072428703308, | |
| "learning_rate": 3.5462673581616298e-06, | |
| "loss": 0.2314, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 3.179434528510504, | |
| "grad_norm": 0.16374921798706055, | |
| "learning_rate": 3.5251699355301253e-06, | |
| "loss": 0.2314, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 3.185752645711578, | |
| "grad_norm": 0.1619606912136078, | |
| "learning_rate": 3.504101234128757e-06, | |
| "loss": 0.2321, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 3.192070762912652, | |
| "grad_norm": 0.16570039093494415, | |
| "learning_rate": 3.4830616642549734e-06, | |
| "loss": 0.231, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.198388880113726, | |
| "grad_norm": 0.15959931910037994, | |
| "learning_rate": 3.462051635638919e-06, | |
| "loss": 0.2316, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 3.2047069973148004, | |
| "grad_norm": 0.16513067483901978, | |
| "learning_rate": 3.441071557435438e-06, | |
| "loss": 0.2317, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 3.2110251145158744, | |
| "grad_norm": 0.15392282605171204, | |
| "learning_rate": 3.420121838216114e-06, | |
| "loss": 0.2305, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 3.2173432317169484, | |
| "grad_norm": 0.1638430505990982, | |
| "learning_rate": 3.39920288596131e-06, | |
| "loss": 0.2308, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 3.2236613489180224, | |
| "grad_norm": 0.15217792987823486, | |
| "learning_rate": 3.378315108052227e-06, | |
| "loss": 0.2322, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.2299794661190964, | |
| "grad_norm": 0.17944923043251038, | |
| "learning_rate": 3.3574589112629683e-06, | |
| "loss": 0.2319, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 3.2362975833201704, | |
| "grad_norm": 0.16493919491767883, | |
| "learning_rate": 3.3366347017526162e-06, | |
| "loss": 0.2314, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 3.242615700521245, | |
| "grad_norm": 0.15931478142738342, | |
| "learning_rate": 3.3158428850573273e-06, | |
| "loss": 0.2308, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 3.248933817722319, | |
| "grad_norm": 0.16134731471538544, | |
| "learning_rate": 3.295083866082429e-06, | |
| "loss": 0.2298, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 3.255251934923393, | |
| "grad_norm": 0.1602196991443634, | |
| "learning_rate": 3.274358049094541e-06, | |
| "loss": 0.231, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.261570052124467, | |
| "grad_norm": 0.15763860940933228, | |
| "learning_rate": 3.253665837713694e-06, | |
| "loss": 0.2296, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 3.267888169325541, | |
| "grad_norm": 0.15692386031150818, | |
| "learning_rate": 3.2330076349054767e-06, | |
| "loss": 0.2301, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 3.2742062865266153, | |
| "grad_norm": 0.17300792038440704, | |
| "learning_rate": 3.2123838429731858e-06, | |
| "loss": 0.2297, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 3.2805244037276893, | |
| "grad_norm": 0.17287859320640564, | |
| "learning_rate": 3.1917948635499956e-06, | |
| "loss": 0.2301, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 3.2868425209287633, | |
| "grad_norm": 0.169038787484169, | |
| "learning_rate": 3.1712410975911224e-06, | |
| "loss": 0.2293, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.2931606381298373, | |
| "grad_norm": 0.1676977425813675, | |
| "learning_rate": 3.150722945366035e-06, | |
| "loss": 0.2307, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 3.2994787553309113, | |
| "grad_norm": 0.1699369102716446, | |
| "learning_rate": 3.1302408064506496e-06, | |
| "loss": 0.2288, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 3.3057968725319853, | |
| "grad_norm": 0.16887900233268738, | |
| "learning_rate": 3.109795079719544e-06, | |
| "loss": 0.2329, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 3.3121149897330597, | |
| "grad_norm": 0.17086876928806305, | |
| "learning_rate": 3.0893861633382015e-06, | |
| "loss": 0.2297, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 3.3184331069341337, | |
| "grad_norm": 0.19851693511009216, | |
| "learning_rate": 3.0690144547552513e-06, | |
| "loss": 0.2309, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.3247512241352077, | |
| "grad_norm": 0.18008504807949066, | |
| "learning_rate": 3.048680350694724e-06, | |
| "loss": 0.234, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 3.3310693413362817, | |
| "grad_norm": 0.18023867905139923, | |
| "learning_rate": 3.0283842471483314e-06, | |
| "loss": 0.2299, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 3.3373874585373557, | |
| "grad_norm": 0.17149996757507324, | |
| "learning_rate": 3.008126539367754e-06, | |
| "loss": 0.2309, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 3.34370557573843, | |
| "grad_norm": 0.1631331443786621, | |
| "learning_rate": 2.9879076218569426e-06, | |
| "loss": 0.2304, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 3.350023692939504, | |
| "grad_norm": 0.1726110428571701, | |
| "learning_rate": 2.9677278883644367e-06, | |
| "loss": 0.2289, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.356341810140578, | |
| "grad_norm": 0.16877932846546173, | |
| "learning_rate": 2.9475877318756928e-06, | |
| "loss": 0.2307, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 3.362659927341652, | |
| "grad_norm": 0.1572154462337494, | |
| "learning_rate": 2.9274875446054397e-06, | |
| "loss": 0.2307, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 3.368978044542726, | |
| "grad_norm": 0.16440938413143158, | |
| "learning_rate": 2.9074277179900324e-06, | |
| "loss": 0.2302, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 3.3752961617438, | |
| "grad_norm": 0.16500261425971985, | |
| "learning_rate": 2.887408642679825e-06, | |
| "loss": 0.2307, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 3.381614278944874, | |
| "grad_norm": 0.16005001962184906, | |
| "learning_rate": 2.867430708531585e-06, | |
| "loss": 0.2293, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.3879323961459487, | |
| "grad_norm": 0.1674973964691162, | |
| "learning_rate": 2.847494304600874e-06, | |
| "loss": 0.2301, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 3.3942505133470227, | |
| "grad_norm": 0.16450868546962738, | |
| "learning_rate": 2.827599819134489e-06, | |
| "loss": 0.23, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 3.4005686305480967, | |
| "grad_norm": 0.1648285835981369, | |
| "learning_rate": 2.807747639562889e-06, | |
| "loss": 0.2305, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 3.4068867477491707, | |
| "grad_norm": 0.17181555926799774, | |
| "learning_rate": 2.7879381524926635e-06, | |
| "loss": 0.2311, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 3.4132048649502447, | |
| "grad_norm": 0.16114503145217896, | |
| "learning_rate": 2.7681717436989954e-06, | |
| "loss": 0.2307, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.419522982151319, | |
| "grad_norm": 0.15842120349407196, | |
| "learning_rate": 2.748448798118149e-06, | |
| "loss": 0.2301, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 3.425841099352393, | |
| "grad_norm": 0.16943858563899994, | |
| "learning_rate": 2.728769699839975e-06, | |
| "loss": 0.2305, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 3.432159216553467, | |
| "grad_norm": 0.1570242941379547, | |
| "learning_rate": 2.7091348321004286e-06, | |
| "loss": 0.2286, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 3.438477333754541, | |
| "grad_norm": 0.16255582869052887, | |
| "learning_rate": 2.689544577274113e-06, | |
| "loss": 0.2305, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 3.444795450955615, | |
| "grad_norm": 0.16005097329616547, | |
| "learning_rate": 2.669999316866819e-06, | |
| "loss": 0.2303, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.451113568156689, | |
| "grad_norm": 0.1680128127336502, | |
| "learning_rate": 2.6504994315081114e-06, | |
| "loss": 0.2295, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 3.4574316853577636, | |
| "grad_norm": 0.1641710102558136, | |
| "learning_rate": 2.631045300943904e-06, | |
| "loss": 0.2318, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 3.4637498025588376, | |
| "grad_norm": 0.1590966135263443, | |
| "learning_rate": 2.61163730402908e-06, | |
| "loss": 0.2298, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 3.4700679197599116, | |
| "grad_norm": 0.16159506142139435, | |
| "learning_rate": 2.5922758187200893e-06, | |
| "loss": 0.2292, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 3.4763860369609856, | |
| "grad_norm": 0.1627105474472046, | |
| "learning_rate": 2.572961222067612e-06, | |
| "loss": 0.2287, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.4827041541620596, | |
| "grad_norm": 0.1647382527589798, | |
| "learning_rate": 2.5536938902092056e-06, | |
| "loss": 0.2297, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 3.489022271363134, | |
| "grad_norm": 0.17726825177669525, | |
| "learning_rate": 2.5344741983619734e-06, | |
| "loss": 0.2275, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 3.495340388564208, | |
| "grad_norm": 0.18429596722126007, | |
| "learning_rate": 2.515302520815275e-06, | |
| "loss": 0.2304, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 3.501658505765282, | |
| "grad_norm": 0.16635169088840485, | |
| "learning_rate": 2.4961792309234194e-06, | |
| "loss": 0.2301, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 3.507976622966356, | |
| "grad_norm": 0.17560289800167084, | |
| "learning_rate": 2.4771047010984066e-06, | |
| "loss": 0.2303, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.51429474016743, | |
| "grad_norm": 0.16308391094207764, | |
| "learning_rate": 2.4580793028026636e-06, | |
| "loss": 0.2283, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 3.520612857368504, | |
| "grad_norm": 0.16081936657428741, | |
| "learning_rate": 2.439103406541821e-06, | |
| "loss": 0.2323, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 3.526930974569578, | |
| "grad_norm": 0.15498140454292297, | |
| "learning_rate": 2.4201773818574956e-06, | |
| "loss": 0.2305, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 3.5332490917706525, | |
| "grad_norm": 0.16058135032653809, | |
| "learning_rate": 2.4013015973200895e-06, | |
| "loss": 0.2308, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 3.5395672089717265, | |
| "grad_norm": 0.16022346913814545, | |
| "learning_rate": 2.3824764205216144e-06, | |
| "loss": 0.2308, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.5458853261728005, | |
| "grad_norm": 0.1624903380870819, | |
| "learning_rate": 2.363702218068535e-06, | |
| "loss": 0.2316, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 3.5522034433738745, | |
| "grad_norm": 0.15978513658046722, | |
| "learning_rate": 2.344979355574629e-06, | |
| "loss": 0.2279, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 3.5585215605749485, | |
| "grad_norm": 0.15280455350875854, | |
| "learning_rate": 2.326308197653862e-06, | |
| "loss": 0.2283, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 3.564839677776023, | |
| "grad_norm": 0.16099567711353302, | |
| "learning_rate": 2.307689107913295e-06, | |
| "loss": 0.2289, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 3.571157794977097, | |
| "grad_norm": 0.15736475586891174, | |
| "learning_rate": 2.289122448945997e-06, | |
| "loss": 0.2293, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.577475912178171, | |
| "grad_norm": 0.15528954565525055, | |
| "learning_rate": 2.270608582323992e-06, | |
| "loss": 0.2283, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 3.583794029379245, | |
| "grad_norm": 0.1545080840587616, | |
| "learning_rate": 2.2521478685912027e-06, | |
| "loss": 0.2279, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 3.590112146580319, | |
| "grad_norm": 0.17268432676792145, | |
| "learning_rate": 2.233740667256446e-06, | |
| "loss": 0.2264, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 3.596430263781393, | |
| "grad_norm": 0.17080992460250854, | |
| "learning_rate": 2.2153873367864203e-06, | |
| "loss": 0.2307, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 3.602748380982467, | |
| "grad_norm": 0.16012567281723022, | |
| "learning_rate": 2.19708823459873e-06, | |
| "loss": 0.2304, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.6090664981835414, | |
| "grad_norm": 0.1589348316192627, | |
| "learning_rate": 2.178843717054923e-06, | |
| "loss": 0.229, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 3.6153846153846154, | |
| "grad_norm": 0.16951771080493927, | |
| "learning_rate": 2.1606541394535528e-06, | |
| "loss": 0.2276, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 3.6217027325856894, | |
| "grad_norm": 0.1633329540491104, | |
| "learning_rate": 2.1425198560232585e-06, | |
| "loss": 0.2286, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 3.6280208497867634, | |
| "grad_norm": 0.15090343356132507, | |
| "learning_rate": 2.12444121991586e-06, | |
| "loss": 0.2299, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 3.634338966987838, | |
| "grad_norm": 0.15929211676120758, | |
| "learning_rate": 2.106418583199493e-06, | |
| "loss": 0.231, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.640657084188912, | |
| "grad_norm": 0.16133394837379456, | |
| "learning_rate": 2.088452296851744e-06, | |
| "loss": 0.2299, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 3.646975201389986, | |
| "grad_norm": 0.15688304603099823, | |
| "learning_rate": 2.070542710752818e-06, | |
| "loss": 0.2282, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 3.65329331859106, | |
| "grad_norm": 0.1701997071504593, | |
| "learning_rate": 2.052690173678724e-06, | |
| "loss": 0.2287, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 3.659611435792134, | |
| "grad_norm": 0.16671252250671387, | |
| "learning_rate": 2.034895033294483e-06, | |
| "loss": 0.2299, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 3.665929552993208, | |
| "grad_norm": 0.16977478563785553, | |
| "learning_rate": 2.0171576361473587e-06, | |
| "loss": 0.2282, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.672247670194282, | |
| "grad_norm": 0.1764647513628006, | |
| "learning_rate": 1.999478327660109e-06, | |
| "loss": 0.2294, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 3.6785657873953563, | |
| "grad_norm": 0.16209015250205994, | |
| "learning_rate": 1.9818574521242507e-06, | |
| "loss": 0.2306, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 3.6848839045964303, | |
| "grad_norm": 0.16386057436466217, | |
| "learning_rate": 1.9642953526933685e-06, | |
| "loss": 0.2273, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 3.6912020217975043, | |
| "grad_norm": 0.20157091319561005, | |
| "learning_rate": 1.9467923713764296e-06, | |
| "loss": 0.2285, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 3.6975201389985783, | |
| "grad_norm": 0.14894433319568634, | |
| "learning_rate": 1.9293488490311085e-06, | |
| "loss": 0.2297, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.7038382561996523, | |
| "grad_norm": 0.16043171286582947, | |
| "learning_rate": 1.9119651253571676e-06, | |
| "loss": 0.2301, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 3.710156373400727, | |
| "grad_norm": 0.15590202808380127, | |
| "learning_rate": 1.894641538889832e-06, | |
| "loss": 0.2303, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 3.716474490601801, | |
| "grad_norm": 0.15428245067596436, | |
| "learning_rate": 1.877378426993201e-06, | |
| "loss": 0.2268, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 3.722792607802875, | |
| "grad_norm": 0.15511804819107056, | |
| "learning_rate": 1.86017612585367e-06, | |
| "loss": 0.2293, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 3.729110725003949, | |
| "grad_norm": 0.15739892423152924, | |
| "learning_rate": 1.843034970473398e-06, | |
| "loss": 0.2307, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.735428842205023, | |
| "grad_norm": 0.1598675698041916, | |
| "learning_rate": 1.82595529466377e-06, | |
| "loss": 0.2292, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 3.741746959406097, | |
| "grad_norm": 0.1549026221036911, | |
| "learning_rate": 1.8089374310389052e-06, | |
| "loss": 0.2306, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 3.748065076607171, | |
| "grad_norm": 0.16567422449588776, | |
| "learning_rate": 1.7919817110091691e-06, | |
| "loss": 0.2314, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 3.7543831938082453, | |
| "grad_norm": 0.16314323246479034, | |
| "learning_rate": 1.775088464774734e-06, | |
| "loss": 0.231, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 3.7607013110093193, | |
| "grad_norm": 0.15875166654586792, | |
| "learning_rate": 1.7582580213191381e-06, | |
| "loss": 0.2281, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.7670194282103933, | |
| "grad_norm": 0.15357348322868347, | |
| "learning_rate": 1.7414907084028804e-06, | |
| "loss": 0.2265, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 3.7733375454114673, | |
| "grad_norm": 0.16420722007751465, | |
| "learning_rate": 1.724786852557041e-06, | |
| "loss": 0.2307, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 3.7796556626125417, | |
| "grad_norm": 0.1632334589958191, | |
| "learning_rate": 1.70814677907692e-06, | |
| "loss": 0.2309, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 3.7859737798136157, | |
| "grad_norm": 0.16144877672195435, | |
| "learning_rate": 1.6915708120157042e-06, | |
| "loss": 0.2283, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 3.7922918970146897, | |
| "grad_norm": 0.1612851768732071, | |
| "learning_rate": 1.6750592741781496e-06, | |
| "loss": 0.2284, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.7986100142157637, | |
| "grad_norm": 0.1625714898109436, | |
| "learning_rate": 1.6586124871143062e-06, | |
| "loss": 0.2307, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 3.8049281314168377, | |
| "grad_norm": 0.15983229875564575, | |
| "learning_rate": 1.6422307711132462e-06, | |
| "loss": 0.23, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 3.8112462486179117, | |
| "grad_norm": 0.16138029098510742, | |
| "learning_rate": 1.6259144451968383e-06, | |
| "loss": 0.2293, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 3.8175643658189857, | |
| "grad_norm": 0.15706180036067963, | |
| "learning_rate": 1.6096638271135172e-06, | |
| "loss": 0.2293, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 3.82388248302006, | |
| "grad_norm": 0.15325595438480377, | |
| "learning_rate": 1.593479233332112e-06, | |
| "loss": 0.2276, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.830200600221134, | |
| "grad_norm": 0.1517479419708252, | |
| "learning_rate": 1.577360979035678e-06, | |
| "loss": 0.2296, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 3.836518717422208, | |
| "grad_norm": 0.15618766844272614, | |
| "learning_rate": 1.5613093781153503e-06, | |
| "loss": 0.2292, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 3.842836834623282, | |
| "grad_norm": 0.1522364616394043, | |
| "learning_rate": 1.5453247431642493e-06, | |
| "loss": 0.2286, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 3.849154951824356, | |
| "grad_norm": 0.1619284451007843, | |
| "learning_rate": 1.5294073854713754e-06, | |
| "loss": 0.2302, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 3.8554730690254306, | |
| "grad_norm": 0.15237174928188324, | |
| "learning_rate": 1.5135576150155567e-06, | |
| "loss": 0.2303, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.8617911862265046, | |
| "grad_norm": 0.15762847661972046, | |
| "learning_rate": 1.4977757404594063e-06, | |
| "loss": 0.2282, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 3.8681093034275786, | |
| "grad_norm": 0.15904614329338074, | |
| "learning_rate": 1.4820620691433175e-06, | |
| "loss": 0.2298, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 3.8744274206286526, | |
| "grad_norm": 0.159016951918602, | |
| "learning_rate": 1.4664169070794753e-06, | |
| "loss": 0.2301, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 3.8807455378297266, | |
| "grad_norm": 0.15268373489379883, | |
| "learning_rate": 1.4508405589458968e-06, | |
| "loss": 0.2299, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 3.8870636550308006, | |
| "grad_norm": 0.16221952438354492, | |
| "learning_rate": 1.4353333280805e-06, | |
| "loss": 0.2263, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.8933817722318746, | |
| "grad_norm": 0.1568318009376526, | |
| "learning_rate": 1.419895516475192e-06, | |
| "loss": 0.2285, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 3.899699889432949, | |
| "grad_norm": 0.15674127638339996, | |
| "learning_rate": 1.4045274247699957e-06, | |
| "loss": 0.2315, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 3.906018006634023, | |
| "grad_norm": 0.15392176806926727, | |
| "learning_rate": 1.3892293522471834e-06, | |
| "loss": 0.2304, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 3.912336123835097, | |
| "grad_norm": 0.15840460360050201, | |
| "learning_rate": 1.374001596825461e-06, | |
| "loss": 0.2272, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 3.918654241036171, | |
| "grad_norm": 0.15263865888118744, | |
| "learning_rate": 1.3588444550541568e-06, | |
| "loss": 0.2313, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.9249723582372456, | |
| "grad_norm": 0.14992570877075195, | |
| "learning_rate": 1.3437582221074574e-06, | |
| "loss": 0.2289, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 3.9312904754383196, | |
| "grad_norm": 0.14820538461208344, | |
| "learning_rate": 1.3287431917786426e-06, | |
| "loss": 0.2302, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 3.9376085926393936, | |
| "grad_norm": 0.15514026582241058, | |
| "learning_rate": 1.3137996564743783e-06, | |
| "loss": 0.2286, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 3.9439267098404676, | |
| "grad_norm": 0.15012729167938232, | |
| "learning_rate": 1.2989279072090184e-06, | |
| "loss": 0.2301, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 3.9502448270415416, | |
| "grad_norm": 0.15299195051193237, | |
| "learning_rate": 1.2841282335989363e-06, | |
| "loss": 0.2308, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.9565629442426156, | |
| "grad_norm": 0.1466607302427292, | |
| "learning_rate": 1.2694009238568794e-06, | |
| "loss": 0.2291, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 3.9628810614436896, | |
| "grad_norm": 0.15444868803024292, | |
| "learning_rate": 1.2547462647863711e-06, | |
| "loss": 0.2296, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 3.969199178644764, | |
| "grad_norm": 0.15740527212619781, | |
| "learning_rate": 1.2401645417761126e-06, | |
| "loss": 0.2298, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 3.975517295845838, | |
| "grad_norm": 0.1578647792339325, | |
| "learning_rate": 1.225656038794425e-06, | |
| "loss": 0.2321, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 3.981835413046912, | |
| "grad_norm": 0.15657520294189453, | |
| "learning_rate": 1.211221038383728e-06, | |
| "loss": 0.2285, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.988153530247986, | |
| "grad_norm": 0.1587335765361786, | |
| "learning_rate": 1.1968598216550315e-06, | |
| "loss": 0.2278, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 3.99447164744906, | |
| "grad_norm": 0.15161466598510742, | |
| "learning_rate": 1.182572668282463e-06, | |
| "loss": 0.2261, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 4.001263623440215, | |
| "grad_norm": 0.15584523975849152, | |
| "learning_rate": 1.1683598564978188e-06, | |
| "loss": 0.2443, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 4.007581740641289, | |
| "grad_norm": 0.15158313512802124, | |
| "learning_rate": 1.15422166308515e-06, | |
| "loss": 0.2254, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 4.013899857842363, | |
| "grad_norm": 0.15783625841140747, | |
| "learning_rate": 1.1401583633753683e-06, | |
| "loss": 0.2218, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.020217975043437, | |
| "grad_norm": 0.151853546500206, | |
| "learning_rate": 1.1261702312408867e-06, | |
| "loss": 0.223, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 4.026536092244511, | |
| "grad_norm": 0.14669708907604218, | |
| "learning_rate": 1.1122575390902824e-06, | |
| "loss": 0.2233, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 4.032854209445585, | |
| "grad_norm": 0.1561277061700821, | |
| "learning_rate": 1.0984205578629958e-06, | |
| "loss": 0.2262, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 4.03917232664666, | |
| "grad_norm": 0.15337461233139038, | |
| "learning_rate": 1.084659557024057e-06, | |
| "loss": 0.2248, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 4.045490443847734, | |
| "grad_norm": 0.15551766753196716, | |
| "learning_rate": 1.0709748045588269e-06, | |
| "loss": 0.2248, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.051808561048808, | |
| "grad_norm": 0.1567201167345047, | |
| "learning_rate": 1.057366566967789e-06, | |
| "loss": 0.2246, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 4.058126678249882, | |
| "grad_norm": 0.14856794476509094, | |
| "learning_rate": 1.043835109261357e-06, | |
| "loss": 0.2241, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 4.064444795450956, | |
| "grad_norm": 0.1545330137014389, | |
| "learning_rate": 1.0303806949547118e-06, | |
| "loss": 0.224, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 4.07076291265203, | |
| "grad_norm": 0.1541059911251068, | |
| "learning_rate": 1.0170035860626676e-06, | |
| "loss": 0.2262, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 4.077081029853104, | |
| "grad_norm": 0.15895813703536987, | |
| "learning_rate": 1.0037040430945782e-06, | |
| "loss": 0.2254, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.083399147054178, | |
| "grad_norm": 0.15541358292102814, | |
| "learning_rate": 9.904823250492546e-07, | |
| "loss": 0.2258, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 4.089717264255252, | |
| "grad_norm": 0.16455316543579102, | |
| "learning_rate": 9.773386894099269e-07, | |
| "loss": 0.2234, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 4.096035381456326, | |
| "grad_norm": 0.15118283033370972, | |
| "learning_rate": 9.642733921392233e-07, | |
| "loss": 0.2252, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 4.1023534986574, | |
| "grad_norm": 0.14733092486858368, | |
| "learning_rate": 9.512866876741949e-07, | |
| "loss": 0.2231, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 4.108671615858475, | |
| "grad_norm": 0.15276247262954712, | |
| "learning_rate": 9.383788289213541e-07, | |
| "loss": 0.225, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.114989733059549, | |
| "grad_norm": 0.1504809558391571, | |
| "learning_rate": 9.255500672517497e-07, | |
| "loss": 0.2242, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 4.121307850260623, | |
| "grad_norm": 0.1528443992137909, | |
| "learning_rate": 9.128006524960747e-07, | |
| "loss": 0.2249, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 4.127625967461697, | |
| "grad_norm": 0.147428497672081, | |
| "learning_rate": 9.001308329397996e-07, | |
| "loss": 0.2214, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 4.133944084662771, | |
| "grad_norm": 0.1520494669675827, | |
| "learning_rate": 8.875408553183357e-07, | |
| "loss": 0.2249, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 4.140262201863845, | |
| "grad_norm": 0.16425903141498566, | |
| "learning_rate": 8.750309648122307e-07, | |
| "loss": 0.2264, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.146580319064919, | |
| "grad_norm": 0.15226700901985168, | |
| "learning_rate": 8.62601405042397e-07, | |
| "loss": 0.2245, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 4.152898436265993, | |
| "grad_norm": 0.15050509572029114, | |
| "learning_rate": 8.502524180653632e-07, | |
| "loss": 0.2272, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 4.159216553467067, | |
| "grad_norm": 0.15115346014499664, | |
| "learning_rate": 8.379842443685626e-07, | |
| "loss": 0.2259, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 4.165534670668141, | |
| "grad_norm": 0.14687852561473846, | |
| "learning_rate": 8.257971228656502e-07, | |
| "loss": 0.224, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 4.171852787869215, | |
| "grad_norm": 0.15161781013011932, | |
| "learning_rate": 8.136912908918482e-07, | |
| "loss": 0.2251, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.178170905070289, | |
| "grad_norm": 0.15190783143043518, | |
| "learning_rate": 8.016669841993258e-07, | |
| "loss": 0.2241, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 4.1844890222713635, | |
| "grad_norm": 0.14926742017269135, | |
| "learning_rate": 7.897244369526036e-07, | |
| "loss": 0.2249, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 4.1908071394724375, | |
| "grad_norm": 0.16139356791973114, | |
| "learning_rate": 7.778638817240042e-07, | |
| "loss": 0.2264, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 4.1971252566735116, | |
| "grad_norm": 0.15186412632465363, | |
| "learning_rate": 7.660855494891107e-07, | |
| "loss": 0.222, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 4.2034433738745856, | |
| "grad_norm": 0.14822430908679962, | |
| "learning_rate": 7.543896696222763e-07, | |
| "loss": 0.2247, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.20976149107566, | |
| "grad_norm": 0.15279975533485413, | |
| "learning_rate": 7.427764698921519e-07, | |
| "loss": 0.2239, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 4.216079608276734, | |
| "grad_norm": 0.1534373015165329, | |
| "learning_rate": 7.312461764572571e-07, | |
| "loss": 0.2231, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 4.222397725477808, | |
| "grad_norm": 0.15967117249965668, | |
| "learning_rate": 7.197990138615712e-07, | |
| "loss": 0.2247, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 4.228715842678882, | |
| "grad_norm": 0.16143904626369476, | |
| "learning_rate": 7.084352050301607e-07, | |
| "loss": 0.2257, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 4.235033959879956, | |
| "grad_norm": 0.1529376357793808, | |
| "learning_rate": 6.971549712648401e-07, | |
| "loss": 0.2242, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.24135207708103, | |
| "grad_norm": 0.15520897507667542, | |
| "learning_rate": 6.859585322398605e-07, | |
| "loss": 0.2238, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 4.247670194282104, | |
| "grad_norm": 0.1552317589521408, | |
| "learning_rate": 6.74846105997633e-07, | |
| "loss": 0.2235, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 4.2539883114831785, | |
| "grad_norm": 0.15890224277973175, | |
| "learning_rate": 6.638179089444791e-07, | |
| "loss": 0.2253, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 4.2603064286842525, | |
| "grad_norm": 0.15153637528419495, | |
| "learning_rate": 6.528741558464207e-07, | |
| "loss": 0.2232, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 4.2666245458853265, | |
| "grad_norm": 0.15470515191555023, | |
| "learning_rate": 6.420150598249947e-07, | |
| "loss": 0.2244, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.2729426630864005, | |
| "grad_norm": 0.1615689992904663, | |
| "learning_rate": 6.312408323531083e-07, | |
| "loss": 0.2246, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 4.2792607802874745, | |
| "grad_norm": 0.1515345424413681, | |
| "learning_rate": 6.205516832509089e-07, | |
| "loss": 0.2239, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 4.2855788974885485, | |
| "grad_norm": 0.14731772243976593, | |
| "learning_rate": 6.0994782068171e-07, | |
| "loss": 0.2249, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 4.2918970146896225, | |
| "grad_norm": 0.15626221895217896, | |
| "learning_rate": 5.99429451147932e-07, | |
| "loss": 0.2264, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 4.2982151318906965, | |
| "grad_norm": 0.15102536976337433, | |
| "learning_rate": 5.889967794870794e-07, | |
| "loss": 0.2244, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.3045332490917705, | |
| "grad_norm": 0.150013267993927, | |
| "learning_rate": 5.786500088677543e-07, | |
| "loss": 0.2262, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 4.3108513662928445, | |
| "grad_norm": 0.15447860956192017, | |
| "learning_rate": 5.683893407857027e-07, | |
| "loss": 0.2234, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 4.3171694834939185, | |
| "grad_norm": 0.15842311084270477, | |
| "learning_rate": 5.582149750598842e-07, | |
| "loss": 0.2229, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 4.3234876006949925, | |
| "grad_norm": 0.16022993624210358, | |
| "learning_rate": 5.481271098285818e-07, | |
| "loss": 0.2262, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 4.329805717896067, | |
| "grad_norm": 0.15244120359420776, | |
| "learning_rate": 5.381259415455475e-07, | |
| "loss": 0.2241, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.336123835097141, | |
| "grad_norm": 0.154579758644104, | |
| "learning_rate": 5.282116649761738e-07, | |
| "loss": 0.2225, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 4.342441952298215, | |
| "grad_norm": 0.1539810597896576, | |
| "learning_rate": 5.183844731937004e-07, | |
| "loss": 0.2238, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 4.348760069499289, | |
| "grad_norm": 0.15259358286857605, | |
| "learning_rate": 5.086445575754551e-07, | |
| "loss": 0.2249, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 4.355078186700363, | |
| "grad_norm": 0.15488529205322266, | |
| "learning_rate": 4.989921077991272e-07, | |
| "loss": 0.2253, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 4.361396303901437, | |
| "grad_norm": 0.15208259224891663, | |
| "learning_rate": 4.89427311839073e-07, | |
| "loss": 0.2232, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.367714421102511, | |
| "grad_norm": 0.15295451879501343, | |
| "learning_rate": 4.799503559626528e-07, | |
| "loss": 0.2231, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 4.374032538303585, | |
| "grad_norm": 0.14501479268074036, | |
| "learning_rate": 4.7056142472660993e-07, | |
| "loss": 0.2226, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 4.380350655504659, | |
| "grad_norm": 0.15459899604320526, | |
| "learning_rate": 4.6126070097346933e-07, | |
| "loss": 0.2242, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 4.386668772705733, | |
| "grad_norm": 0.14847847819328308, | |
| "learning_rate": 4.520483658279817e-07, | |
| "loss": 0.2223, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 4.392986889906807, | |
| "grad_norm": 0.15139150619506836, | |
| "learning_rate": 4.4292459869359484e-07, | |
| "loss": 0.2253, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.399305007107882, | |
| "grad_norm": 0.1547953188419342, | |
| "learning_rate": 4.3388957724895874e-07, | |
| "loss": 0.2246, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 4.405623124308956, | |
| "grad_norm": 0.15533864498138428, | |
| "learning_rate": 4.249434774444672e-07, | |
| "loss": 0.2235, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 4.41194124151003, | |
| "grad_norm": 0.15549246966838837, | |
| "learning_rate": 4.1608647349883123e-07, | |
| "loss": 0.2234, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 4.418259358711104, | |
| "grad_norm": 0.15497823059558868, | |
| "learning_rate": 4.073187378956811e-07, | |
| "loss": 0.2258, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 4.424577475912178, | |
| "grad_norm": 0.14938384294509888, | |
| "learning_rate": 3.9864044138021915e-07, | |
| "loss": 0.2238, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.430895593113252, | |
| "grad_norm": 0.1605786234140396, | |
| "learning_rate": 3.9005175295588227e-07, | |
| "loss": 0.2269, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 4.437213710314326, | |
| "grad_norm": 0.14927615225315094, | |
| "learning_rate": 3.815528398810553e-07, | |
| "loss": 0.2239, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 4.4435318275154, | |
| "grad_norm": 0.15817302465438843, | |
| "learning_rate": 3.7314386766581725e-07, | |
| "loss": 0.2245, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 4.449849944716474, | |
| "grad_norm": 0.14866997301578522, | |
| "learning_rate": 3.6482500006871315e-07, | |
| "loss": 0.2235, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 4.456168061917548, | |
| "grad_norm": 0.15268754959106445, | |
| "learning_rate": 3.5659639909356725e-07, | |
| "loss": 0.2238, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.462486179118622, | |
| "grad_norm": 0.15132968127727509, | |
| "learning_rate": 3.4845822498632773e-07, | |
| "loss": 0.2255, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 4.468804296319696, | |
| "grad_norm": 0.15053577721118927, | |
| "learning_rate": 3.4041063623194705e-07, | |
| "loss": 0.2244, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 4.475122413520771, | |
| "grad_norm": 0.15395694971084595, | |
| "learning_rate": 3.3245378955129306e-07, | |
| "loss": 0.2248, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 4.481440530721845, | |
| "grad_norm": 0.15405914187431335, | |
| "learning_rate": 3.245878398980995e-07, | |
| "loss": 0.2238, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 4.487758647922919, | |
| "grad_norm": 0.1462317258119583, | |
| "learning_rate": 3.168129404559467e-07, | |
| "loss": 0.2232, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.494076765123993, | |
| "grad_norm": 0.15240703523159027, | |
| "learning_rate": 3.0912924263527934e-07, | |
| "loss": 0.2226, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 4.500394882325067, | |
| "grad_norm": 0.1582237184047699, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.2238, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 4.506712999526141, | |
| "grad_norm": 0.14886438846588135, | |
| "learning_rate": 2.940360486168453e-07, | |
| "loss": 0.2245, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 4.513031116727215, | |
| "grad_norm": 0.15885640680789948, | |
| "learning_rate": 2.8662684634792436e-07, | |
| "loss": 0.2261, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 4.519349233928289, | |
| "grad_norm": 0.1518273651599884, | |
| "learning_rate": 2.793094335524571e-07, | |
| "loss": 0.2236, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.525667351129363, | |
| "grad_norm": 0.14824968576431274, | |
| "learning_rate": 2.7208395273167376e-07, | |
| "loss": 0.2243, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 4.531985468330437, | |
| "grad_norm": 0.16252179443836212, | |
| "learning_rate": 2.6495054459649285e-07, | |
| "loss": 0.224, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 4.538303585531511, | |
| "grad_norm": 0.1533941775560379, | |
| "learning_rate": 2.5790934806479095e-07, | |
| "loss": 0.2241, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 4.544621702732586, | |
| "grad_norm": 0.1516910344362259, | |
| "learning_rate": 2.5096050025868734e-07, | |
| "loss": 0.2233, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 4.55093981993366, | |
| "grad_norm": 0.15374279022216797, | |
| "learning_rate": 2.4410413650188035e-07, | |
| "loss": 0.2251, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.557257937134734, | |
| "grad_norm": 0.15357162058353424, | |
| "learning_rate": 2.3734039031700684e-07, | |
| "loss": 0.2246, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 4.563576054335808, | |
| "grad_norm": 0.1557237058877945, | |
| "learning_rate": 2.3066939342304696e-07, | |
| "loss": 0.2216, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 4.569894171536882, | |
| "grad_norm": 0.15244849026203156, | |
| "learning_rate": 2.240912757327557e-07, | |
| "loss": 0.2248, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 4.576212288737956, | |
| "grad_norm": 0.15775157511234283, | |
| "learning_rate": 2.176061653501338e-07, | |
| "loss": 0.2242, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 4.58253040593903, | |
| "grad_norm": 0.15207399427890778, | |
| "learning_rate": 2.1121418856793363e-07, | |
| "loss": 0.2245, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.588848523140104, | |
| "grad_norm": 0.1468561291694641, | |
| "learning_rate": 2.0491546986519896e-07, | |
| "loss": 0.2229, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 4.595166640341178, | |
| "grad_norm": 0.15533696115016937, | |
| "learning_rate": 1.987101319048418e-07, | |
| "loss": 0.2245, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 4.601484757542252, | |
| "grad_norm": 0.15302863717079163, | |
| "learning_rate": 1.925982955312511e-07, | |
| "loss": 0.2221, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 4.607802874743326, | |
| "grad_norm": 0.15562526881694794, | |
| "learning_rate": 1.8658007976794235e-07, | |
| "loss": 0.226, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 4.6141209919444, | |
| "grad_norm": 0.14804142713546753, | |
| "learning_rate": 1.8065560181523889e-07, | |
| "loss": 0.2225, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.620439109145474, | |
| "grad_norm": 0.144419863820076, | |
| "learning_rate": 1.748249770479893e-07, | |
| "loss": 0.2246, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 4.626757226346549, | |
| "grad_norm": 0.15179699659347534, | |
| "learning_rate": 1.6908831901331968e-07, | |
| "loss": 0.2279, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 4.633075343547623, | |
| "grad_norm": 0.15003693103790283, | |
| "learning_rate": 1.6344573942842333e-07, | |
| "loss": 0.2248, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 4.639393460748697, | |
| "grad_norm": 0.14552009105682373, | |
| "learning_rate": 1.5789734817838577e-07, | |
| "loss": 0.2237, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 4.645711577949771, | |
| "grad_norm": 0.15160489082336426, | |
| "learning_rate": 1.5244325331404242e-07, | |
| "loss": 0.2232, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.652029695150845, | |
| "grad_norm": 0.15886756777763367, | |
| "learning_rate": 1.470835610498761e-07, | |
| "loss": 0.2217, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 4.658347812351919, | |
| "grad_norm": 0.1517808735370636, | |
| "learning_rate": 1.4181837576195179e-07, | |
| "loss": 0.2235, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 4.664665929552993, | |
| "grad_norm": 0.14629001915454865, | |
| "learning_rate": 1.366477999858773e-07, | |
| "loss": 0.2251, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 4.670984046754067, | |
| "grad_norm": 0.15069714188575745, | |
| "learning_rate": 1.315719344148092e-07, | |
| "loss": 0.2233, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 4.677302163955141, | |
| "grad_norm": 0.15253259241580963, | |
| "learning_rate": 1.2659087789749557e-07, | |
| "loss": 0.2238, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.683620281156215, | |
| "grad_norm": 0.15447266399860382, | |
| "learning_rate": 1.2170472743634588e-07, | |
| "loss": 0.2218, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 4.68993839835729, | |
| "grad_norm": 0.15333184599876404, | |
| "learning_rate": 1.1691357818554405e-07, | |
| "loss": 0.226, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 4.696256515558364, | |
| "grad_norm": 0.1548086404800415, | |
| "learning_rate": 1.1221752344919679e-07, | |
| "loss": 0.2252, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 4.702574632759438, | |
| "grad_norm": 0.14932510256767273, | |
| "learning_rate": 1.0761665467951321e-07, | |
| "loss": 0.2232, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 4.708892749960512, | |
| "grad_norm": 0.15253929793834686, | |
| "learning_rate": 1.0311106147502747e-07, | |
| "loss": 0.223, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 4.715210867161586, | |
| "grad_norm": 0.1560056507587433, | |
| "learning_rate": 9.870083157885068e-08, | |
| "loss": 0.2248, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 4.72152898436266, | |
| "grad_norm": 0.1542298048734665, | |
| "learning_rate": 9.43860508769645e-08, | |
| "loss": 0.2223, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 4.727847101563734, | |
| "grad_norm": 0.1528465300798416, | |
| "learning_rate": 9.01668033965486e-08, | |
| "loss": 0.223, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 4.734165218764808, | |
| "grad_norm": 0.14702333509922028, | |
| "learning_rate": 8.604317130434137e-08, | |
| "loss": 0.2242, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 4.740483335965882, | |
| "grad_norm": 0.1520882397890091, | |
| "learning_rate": 8.201523490504404e-08, | |
| "loss": 0.2232, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.746801453166956, | |
| "grad_norm": 0.14876076579093933, | |
| "learning_rate": 7.808307263975301e-08, | |
| "loss": 0.2244, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 4.75311957036803, | |
| "grad_norm": 0.14352434873580933, | |
| "learning_rate": 7.424676108443551e-08, | |
| "loss": 0.2248, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 4.759437687569104, | |
| "grad_norm": 0.14998690783977509, | |
| "learning_rate": 7.050637494843526e-08, | |
| "loss": 0.225, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 4.765755804770178, | |
| "grad_norm": 0.15730910003185272, | |
| "learning_rate": 6.686198707301861e-08, | |
| "loss": 0.2256, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 4.772073921971253, | |
| "grad_norm": 0.15142279863357544, | |
| "learning_rate": 6.331366842995901e-08, | |
| "loss": 0.2251, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 4.778392039172327, | |
| "grad_norm": 0.15004810690879822, | |
| "learning_rate": 5.986148812015036e-08, | |
| "loss": 0.2271, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 4.784710156373401, | |
| "grad_norm": 0.15459021925926208, | |
| "learning_rate": 5.650551337226362e-08, | |
| "loss": 0.2247, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 4.791028273574475, | |
| "grad_norm": 0.1535128951072693, | |
| "learning_rate": 5.324580954143621e-08, | |
| "loss": 0.225, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 4.797346390775549, | |
| "grad_norm": 0.14755982160568237, | |
| "learning_rate": 5.008244010800245e-08, | |
| "loss": 0.2244, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 4.803664507976623, | |
| "grad_norm": 0.15288862586021423, | |
| "learning_rate": 4.701546667625401e-08, | |
| "loss": 0.2242, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 4.809982625177697, | |
| "grad_norm": 0.14481881260871887, | |
| "learning_rate": 4.4044948973240855e-08, | |
| "loss": 0.2241, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 4.816300742378771, | |
| "grad_norm": 0.1468980610370636, | |
| "learning_rate": 4.117094484760942e-08, | |
| "loss": 0.2223, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 4.822618859579845, | |
| "grad_norm": 0.1565941572189331, | |
| "learning_rate": 3.8393510268475155e-08, | |
| "loss": 0.2245, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 4.828936976780919, | |
| "grad_norm": 0.15280728042125702, | |
| "learning_rate": 3.5712699324331745e-08, | |
| "loss": 0.2237, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 4.835255093981994, | |
| "grad_norm": 0.14640846848487854, | |
| "learning_rate": 3.312856422200028e-08, | |
| "loss": 0.2249, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 4.841573211183068, | |
| "grad_norm": 0.15066128969192505, | |
| "learning_rate": 3.064115528561007e-08, | |
| "loss": 0.224, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 4.847891328384142, | |
| "grad_norm": 0.14239919185638428, | |
| "learning_rate": 2.8250520955618864e-08, | |
| "loss": 0.2206, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 4.854209445585216, | |
| "grad_norm": 0.1439589262008667, | |
| "learning_rate": 2.595670778787196e-08, | |
| "loss": 0.2254, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 4.86052756278629, | |
| "grad_norm": 0.15104269981384277, | |
| "learning_rate": 2.3759760452691794e-08, | |
| "loss": 0.2249, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 4.866845679987364, | |
| "grad_norm": 0.1463245004415512, | |
| "learning_rate": 2.165972173401143e-08, | |
| "loss": 0.2237, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 4.873163797188438, | |
| "grad_norm": 0.14985409379005432, | |
| "learning_rate": 1.965663252853911e-08, | |
| "loss": 0.2246, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 4.879481914389512, | |
| "grad_norm": 0.1502169370651245, | |
| "learning_rate": 1.7750531844963335e-08, | |
| "loss": 0.2245, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 4.885800031590586, | |
| "grad_norm": 0.15175861120224, | |
| "learning_rate": 1.5941456803191812e-08, | |
| "loss": 0.2221, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 4.89211814879166, | |
| "grad_norm": 0.15319672226905823, | |
| "learning_rate": 1.4229442633630353e-08, | |
| "loss": 0.2224, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 4.898436265992734, | |
| "grad_norm": 0.14487479627132416, | |
| "learning_rate": 1.2614522676493435e-08, | |
| "loss": 0.2217, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 4.904754383193808, | |
| "grad_norm": 0.14722299575805664, | |
| "learning_rate": 1.1096728381160271e-08, | |
| "loss": 0.2236, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 4.911072500394882, | |
| "grad_norm": 0.16063782572746277, | |
| "learning_rate": 9.676089305557523e-09, | |
| "loss": 0.2244, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 4.917390617595957, | |
| "grad_norm": 0.15409326553344727, | |
| "learning_rate": 8.352633115584764e-09, | |
| "loss": 0.2257, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 4.923708734797031, | |
| "grad_norm": 0.1531437486410141, | |
| "learning_rate": 7.1263855845782325e-09, | |
| "loss": 0.2231, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 4.930026851998105, | |
| "grad_norm": 0.15183156728744507, | |
| "learning_rate": 5.997370592806251e-09, | |
| "loss": 0.2241, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 4.936344969199179, | |
| "grad_norm": 0.1480141282081604, | |
| "learning_rate": 4.965610127004028e-09, | |
| "loss": 0.2229, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 4.942663086400253, | |
| "grad_norm": 0.15001103281974792, | |
| "learning_rate": 4.031124279948451e-09, | |
| "loss": 0.2231, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 4.948981203601327, | |
| "grad_norm": 0.15596900880336761, | |
| "learning_rate": 3.193931250062843e-09, | |
| "loss": 0.2249, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 4.955299320802401, | |
| "grad_norm": 0.15520991384983063, | |
| "learning_rate": 2.45404734106558e-09, | |
| "loss": 0.2253, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 4.961617438003475, | |
| "grad_norm": 0.15404628217220306, | |
| "learning_rate": 1.811486961650899e-09, | |
| "loss": 0.2246, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 4.967935555204549, | |
| "grad_norm": 0.1471293717622757, | |
| "learning_rate": 1.266262625210235e-09, | |
| "loss": 0.2225, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 4.974253672405623, | |
| "grad_norm": 0.1584838181734085, | |
| "learning_rate": 8.183849495851937e-10, | |
| "loss": 0.2237, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 4.980571789606698, | |
| "grad_norm": 0.14231979846954346, | |
| "learning_rate": 4.678626568649369e-10, | |
| "loss": 0.223, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 4.986889906807772, | |
| "grad_norm": 0.15368352830410004, | |
| "learning_rate": 2.1470257321298815e-10, | |
| "loss": 0.2237, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 4.993208024008846, | |
| "grad_norm": 0.1479187160730362, | |
| "learning_rate": 5.890962873456029e-11, | |
| "loss": 0.2235, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.99952614120992, | |
| "grad_norm": 0.14893342554569244, | |
| "learning_rate": 4.868573838523461e-13, | |
| "loss": 0.2249, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 4.99952614120992, | |
| "step": 7910, | |
| "total_flos": 3.246606278526417e+20, | |
| "train_loss": 0.08344055705064467, | |
| "train_runtime": 26260.8831, | |
| "train_samples_per_second": 308.537, | |
| "train_steps_per_second": 0.301 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7910, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.246606278526417e+20, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |