| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.99692914763958, | |
| "eval_steps": 500, | |
| "global_step": 13748, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005091568680488154, | |
| "grad_norm": 0.7564847469329834, | |
| "learning_rate": 4.9999857936989376e-05, | |
| "loss": 0.6957, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010183137360976308, | |
| "grad_norm": 0.3457886576652527, | |
| "learning_rate": 4.999971587397875e-05, | |
| "loss": 0.6941, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.015274706041464463, | |
| "grad_norm": 0.2739073932170868, | |
| "learning_rate": 4.999957381096812e-05, | |
| "loss": 0.6921, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.020366274721952616, | |
| "grad_norm": 0.2826838195323944, | |
| "learning_rate": 4.9999431747957495e-05, | |
| "loss": 0.6932, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02545784340244077, | |
| "grad_norm": 0.1553473025560379, | |
| "learning_rate": 4.999928968494687e-05, | |
| "loss": 0.6933, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.030549412082928926, | |
| "grad_norm": 0.17130891978740692, | |
| "learning_rate": 4.9999147621936234e-05, | |
| "loss": 0.6916, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03564098076341708, | |
| "grad_norm": 0.1938512623310089, | |
| "learning_rate": 4.999900555892561e-05, | |
| "loss": 0.6919, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04073254944390523, | |
| "grad_norm": 0.2490479201078415, | |
| "learning_rate": 4.999886349591498e-05, | |
| "loss": 0.6907, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04582411812439339, | |
| "grad_norm": 0.20010128617286682, | |
| "learning_rate": 4.9998721432904354e-05, | |
| "loss": 0.691, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05091568680488154, | |
| "grad_norm": 0.16262651979923248, | |
| "learning_rate": 4.999857936989373e-05, | |
| "loss": 0.6917, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.056007255485369695, | |
| "grad_norm": 0.21441149711608887, | |
| "learning_rate": 4.999843730688309e-05, | |
| "loss": 0.6872, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06109882416585785, | |
| "grad_norm": 0.20518313348293304, | |
| "learning_rate": 4.9998295243872466e-05, | |
| "loss": 0.6899, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06619039284634601, | |
| "grad_norm": 0.2965710759162903, | |
| "learning_rate": 4.999815318086184e-05, | |
| "loss": 0.6869, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07128196152683416, | |
| "grad_norm": 0.269436776638031, | |
| "learning_rate": 4.999801111785121e-05, | |
| "loss": 0.6893, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07637353020732231, | |
| "grad_norm": 0.2355806529521942, | |
| "learning_rate": 4.9997869054840585e-05, | |
| "loss": 0.688, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08146509888781046, | |
| "grad_norm": 0.2859913408756256, | |
| "learning_rate": 4.999772699182996e-05, | |
| "loss": 0.6882, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08655666756829862, | |
| "grad_norm": 0.22986085712909698, | |
| "learning_rate": 4.999758492881933e-05, | |
| "loss": 0.6892, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09164823624878678, | |
| "grad_norm": 0.1777602881193161, | |
| "learning_rate": 4.9997442865808705e-05, | |
| "loss": 0.6891, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09673980492927493, | |
| "grad_norm": 0.24697603285312653, | |
| "learning_rate": 4.999730080279808e-05, | |
| "loss": 0.6901, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10183137360976308, | |
| "grad_norm": 0.24384862184524536, | |
| "learning_rate": 4.9997158739787444e-05, | |
| "loss": 0.6864, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10692294229025123, | |
| "grad_norm": 0.22532877326011658, | |
| "learning_rate": 4.999701667677682e-05, | |
| "loss": 0.6925, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11201451097073939, | |
| "grad_norm": 0.33894965052604675, | |
| "learning_rate": 4.999687461376619e-05, | |
| "loss": 0.69, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11710607965122755, | |
| "grad_norm": 0.1438864767551422, | |
| "learning_rate": 4.9996732550755563e-05, | |
| "loss": 0.6905, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1221976483317157, | |
| "grad_norm": 0.1356513947248459, | |
| "learning_rate": 4.9996590487744937e-05, | |
| "loss": 0.6895, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12728921701220386, | |
| "grad_norm": 0.11220791190862656, | |
| "learning_rate": 4.999644842473431e-05, | |
| "loss": 0.6889, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13238078569269202, | |
| "grad_norm": 0.18669378757476807, | |
| "learning_rate": 4.999630636172368e-05, | |
| "loss": 0.6877, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13747235437318017, | |
| "grad_norm": 0.24448029696941376, | |
| "learning_rate": 4.9996164298713056e-05, | |
| "loss": 0.6821, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14256392305366833, | |
| "grad_norm": 0.33900442719459534, | |
| "learning_rate": 4.999602223570243e-05, | |
| "loss": 0.6825, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14765549173415646, | |
| "grad_norm": 0.20542432367801666, | |
| "learning_rate": 4.99958801726918e-05, | |
| "loss": 0.6835, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15274706041464461, | |
| "grad_norm": 0.17819932103157043, | |
| "learning_rate": 4.9995738109681175e-05, | |
| "loss": 0.6875, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15783862909513277, | |
| "grad_norm": 0.16522936522960663, | |
| "learning_rate": 4.999559604667055e-05, | |
| "loss": 0.6868, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16293019777562093, | |
| "grad_norm": 0.1356010138988495, | |
| "learning_rate": 4.9995453983659915e-05, | |
| "loss": 0.6888, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16802176645610908, | |
| "grad_norm": 0.20955336093902588, | |
| "learning_rate": 4.999531192064929e-05, | |
| "loss": 0.6827, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17311333513659724, | |
| "grad_norm": 0.14871099591255188, | |
| "learning_rate": 4.9995169857638654e-05, | |
| "loss": 0.687, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1782049038170854, | |
| "grad_norm": 0.1207965835928917, | |
| "learning_rate": 4.999502779462803e-05, | |
| "loss": 0.6871, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18329647249757355, | |
| "grad_norm": 0.25459375977516174, | |
| "learning_rate": 4.99948857316174e-05, | |
| "loss": 0.6879, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1883880411780617, | |
| "grad_norm": 0.12454749643802643, | |
| "learning_rate": 4.9994743668606773e-05, | |
| "loss": 0.689, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19347960985854987, | |
| "grad_norm": 0.13212984800338745, | |
| "learning_rate": 4.9994601605596147e-05, | |
| "loss": 0.6827, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.19857117853903802, | |
| "grad_norm": 0.15493592619895935, | |
| "learning_rate": 4.999445954258552e-05, | |
| "loss": 0.6916, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.20366274721952615, | |
| "grad_norm": 0.1339859962463379, | |
| "learning_rate": 4.999431747957489e-05, | |
| "loss": 0.6862, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2087543159000143, | |
| "grad_norm": 0.17246641218662262, | |
| "learning_rate": 4.9994175416564266e-05, | |
| "loss": 0.6915, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21384588458050247, | |
| "grad_norm": 0.09907015413045883, | |
| "learning_rate": 4.999403335355364e-05, | |
| "loss": 0.6884, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.21893745326099062, | |
| "grad_norm": 0.13688722252845764, | |
| "learning_rate": 4.999389129054301e-05, | |
| "loss": 0.6894, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22402902194147878, | |
| "grad_norm": 0.1572660207748413, | |
| "learning_rate": 4.9993749227532385e-05, | |
| "loss": 0.6857, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.22912059062196694, | |
| "grad_norm": 0.22315748035907745, | |
| "learning_rate": 4.999360716452176e-05, | |
| "loss": 0.6819, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2342121593024551, | |
| "grad_norm": 0.10592522472143173, | |
| "learning_rate": 4.9993465101511125e-05, | |
| "loss": 0.6886, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23930372798294325, | |
| "grad_norm": 0.10022767633199692, | |
| "learning_rate": 4.99933230385005e-05, | |
| "loss": 0.687, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2443952966634314, | |
| "grad_norm": 0.12280535697937012, | |
| "learning_rate": 4.999318097548987e-05, | |
| "loss": 0.6862, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.24948686534391956, | |
| "grad_norm": 0.11044813692569733, | |
| "learning_rate": 4.9993038912479244e-05, | |
| "loss": 0.6889, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2545784340244077, | |
| "grad_norm": 0.13826850056648254, | |
| "learning_rate": 4.999289684946862e-05, | |
| "loss": 0.6875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2596700027048959, | |
| "grad_norm": 0.10267098248004913, | |
| "learning_rate": 4.999275478645799e-05, | |
| "loss": 0.6877, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26476157138538403, | |
| "grad_norm": 0.08445768803358078, | |
| "learning_rate": 4.999261272344736e-05, | |
| "loss": 0.6879, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2698531400658722, | |
| "grad_norm": 0.1590685397386551, | |
| "learning_rate": 4.999247066043673e-05, | |
| "loss": 0.6872, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27494470874636034, | |
| "grad_norm": 0.1537754386663437, | |
| "learning_rate": 4.99923285974261e-05, | |
| "loss": 0.6812, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2800362774268485, | |
| "grad_norm": 0.12273769080638885, | |
| "learning_rate": 4.9992186534415476e-05, | |
| "loss": 0.6855, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28512784610733666, | |
| "grad_norm": 0.19177380204200745, | |
| "learning_rate": 4.999204447140485e-05, | |
| "loss": 0.6857, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.29021941478782476, | |
| "grad_norm": 0.1194639578461647, | |
| "learning_rate": 4.999190240839422e-05, | |
| "loss": 0.6851, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2953109834683129, | |
| "grad_norm": 0.12458167970180511, | |
| "learning_rate": 4.9991760345383595e-05, | |
| "loss": 0.6875, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.30040255214880107, | |
| "grad_norm": 0.15139921009540558, | |
| "learning_rate": 4.999161828237297e-05, | |
| "loss": 0.6843, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.30549412082928923, | |
| "grad_norm": 0.13201646506786346, | |
| "learning_rate": 4.9991476219362335e-05, | |
| "loss": 0.688, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3105856895097774, | |
| "grad_norm": 0.10855768620967865, | |
| "learning_rate": 4.999133415635171e-05, | |
| "loss": 0.6859, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.31567725819026554, | |
| "grad_norm": 0.14113789796829224, | |
| "learning_rate": 4.999119209334108e-05, | |
| "loss": 0.6858, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3207688268707537, | |
| "grad_norm": 0.19037926197052002, | |
| "learning_rate": 4.9991050030330454e-05, | |
| "loss": 0.6851, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.32586039555124185, | |
| "grad_norm": 0.18522581458091736, | |
| "learning_rate": 4.999090796731983e-05, | |
| "loss": 0.6854, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33095196423173, | |
| "grad_norm": 0.24384431540966034, | |
| "learning_rate": 4.99907659043092e-05, | |
| "loss": 0.6852, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.33604353291221817, | |
| "grad_norm": 0.21666169166564941, | |
| "learning_rate": 4.999062384129857e-05, | |
| "loss": 0.6836, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3411351015927063, | |
| "grad_norm": 0.1427813023328781, | |
| "learning_rate": 4.9990481778287946e-05, | |
| "loss": 0.6899, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3462266702731945, | |
| "grad_norm": 0.09287853538990021, | |
| "learning_rate": 4.999033971527732e-05, | |
| "loss": 0.6861, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.35131823895368264, | |
| "grad_norm": 0.1490527093410492, | |
| "learning_rate": 4.999019765226669e-05, | |
| "loss": 0.6859, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3564098076341708, | |
| "grad_norm": 0.0858352780342102, | |
| "learning_rate": 4.9990055589256066e-05, | |
| "loss": 0.6888, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36150137631465895, | |
| "grad_norm": 0.15133963525295258, | |
| "learning_rate": 4.998991352624544e-05, | |
| "loss": 0.6837, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3665929449951471, | |
| "grad_norm": 0.14562425017356873, | |
| "learning_rate": 4.9989771463234805e-05, | |
| "loss": 0.6862, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.37168451367563526, | |
| "grad_norm": 0.15240037441253662, | |
| "learning_rate": 4.998962940022418e-05, | |
| "loss": 0.6839, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3767760823561234, | |
| "grad_norm": 0.1231294646859169, | |
| "learning_rate": 4.9989487337213544e-05, | |
| "loss": 0.6868, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3818676510366116, | |
| "grad_norm": 0.14511612057685852, | |
| "learning_rate": 4.998934527420292e-05, | |
| "loss": 0.6816, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.38695921971709973, | |
| "grad_norm": 0.1693543940782547, | |
| "learning_rate": 4.998920321119229e-05, | |
| "loss": 0.6843, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3920507883975879, | |
| "grad_norm": 0.09580985456705093, | |
| "learning_rate": 4.9989061148181664e-05, | |
| "loss": 0.6875, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.39714235707807605, | |
| "grad_norm": 0.1047905758023262, | |
| "learning_rate": 4.998891908517104e-05, | |
| "loss": 0.6859, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4022339257585642, | |
| "grad_norm": 0.1090409904718399, | |
| "learning_rate": 4.998877702216041e-05, | |
| "loss": 0.6852, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4073254944390523, | |
| "grad_norm": 0.12578189373016357, | |
| "learning_rate": 4.998863495914978e-05, | |
| "loss": 0.6867, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.41241706311954046, | |
| "grad_norm": 0.11900747567415237, | |
| "learning_rate": 4.9988492896139156e-05, | |
| "loss": 0.6814, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4175086318000286, | |
| "grad_norm": 0.11401454359292984, | |
| "learning_rate": 4.998835083312853e-05, | |
| "loss": 0.6899, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4226002004805168, | |
| "grad_norm": 0.1015952005982399, | |
| "learning_rate": 4.99882087701179e-05, | |
| "loss": 0.6865, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.42769176916100493, | |
| "grad_norm": 0.16676318645477295, | |
| "learning_rate": 4.9988066707107276e-05, | |
| "loss": 0.6891, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4327833378414931, | |
| "grad_norm": 0.10982430726289749, | |
| "learning_rate": 4.998792464409665e-05, | |
| "loss": 0.6863, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43787490652198124, | |
| "grad_norm": 0.13861846923828125, | |
| "learning_rate": 4.9987782581086015e-05, | |
| "loss": 0.6898, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.4429664752024694, | |
| "grad_norm": 0.09421814233064651, | |
| "learning_rate": 4.998764051807539e-05, | |
| "loss": 0.6861, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44805804388295756, | |
| "grad_norm": 0.14085189998149872, | |
| "learning_rate": 4.998749845506476e-05, | |
| "loss": 0.683, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4531496125634457, | |
| "grad_norm": 0.1671237349510193, | |
| "learning_rate": 4.9987356392054134e-05, | |
| "loss": 0.6835, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45824118124393387, | |
| "grad_norm": 0.13570742309093475, | |
| "learning_rate": 4.998721432904351e-05, | |
| "loss": 0.685, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.463332749924422, | |
| "grad_norm": 0.10402018576860428, | |
| "learning_rate": 4.998707226603288e-05, | |
| "loss": 0.6872, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4684243186049102, | |
| "grad_norm": 0.10226580500602722, | |
| "learning_rate": 4.9986930203022254e-05, | |
| "loss": 0.6871, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.47351588728539834, | |
| "grad_norm": 0.10132193565368652, | |
| "learning_rate": 4.998678814001163e-05, | |
| "loss": 0.6902, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4786074559658865, | |
| "grad_norm": 0.11389295756816864, | |
| "learning_rate": 4.9986646077001e-05, | |
| "loss": 0.6818, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.48369902464637465, | |
| "grad_norm": 0.13445314764976501, | |
| "learning_rate": 4.9986504013990366e-05, | |
| "loss": 0.6882, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4887905933268628, | |
| "grad_norm": 0.08756639063358307, | |
| "learning_rate": 4.998636195097974e-05, | |
| "loss": 0.6893, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.49388216200735097, | |
| "grad_norm": 0.08042973279953003, | |
| "learning_rate": 4.998621988796911e-05, | |
| "loss": 0.6891, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.4989737306878391, | |
| "grad_norm": 0.10082978755235672, | |
| "learning_rate": 4.9986077824958485e-05, | |
| "loss": 0.6903, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5040652993683272, | |
| "grad_norm": 0.0936272069811821, | |
| "learning_rate": 4.998593576194786e-05, | |
| "loss": 0.6864, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5091568680488154, | |
| "grad_norm": 0.12317179143428802, | |
| "learning_rate": 4.9985793698937225e-05, | |
| "loss": 0.6856, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5142484367293035, | |
| "grad_norm": 0.12630991637706757, | |
| "learning_rate": 4.99856516359266e-05, | |
| "loss": 0.6843, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5193400054097917, | |
| "grad_norm": 0.13727591931819916, | |
| "learning_rate": 4.998550957291597e-05, | |
| "loss": 0.6844, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5244315740902799, | |
| "grad_norm": 0.20466050505638123, | |
| "learning_rate": 4.9985367509905344e-05, | |
| "loss": 0.6826, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5295231427707681, | |
| "grad_norm": 0.18256479501724243, | |
| "learning_rate": 4.998522544689472e-05, | |
| "loss": 0.6779, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5346147114512562, | |
| "grad_norm": 0.20778831839561462, | |
| "learning_rate": 4.998508338388409e-05, | |
| "loss": 0.6853, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5397062801317444, | |
| "grad_norm": 0.08918287605047226, | |
| "learning_rate": 4.9984941320873464e-05, | |
| "loss": 0.6867, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5447978488122325, | |
| "grad_norm": 0.08476213365793228, | |
| "learning_rate": 4.9984799257862837e-05, | |
| "loss": 0.6843, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5498894174927207, | |
| "grad_norm": 0.11851644515991211, | |
| "learning_rate": 4.998465719485221e-05, | |
| "loss": 0.6831, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5549809861732088, | |
| "grad_norm": 0.18159732222557068, | |
| "learning_rate": 4.998451513184158e-05, | |
| "loss": 0.6827, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.560072554853697, | |
| "grad_norm": 0.1184081956744194, | |
| "learning_rate": 4.9984373068830956e-05, | |
| "loss": 0.6835, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5651641235341851, | |
| "grad_norm": 0.13530392944812775, | |
| "learning_rate": 4.998423100582032e-05, | |
| "loss": 0.6832, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5702556922146733, | |
| "grad_norm": 0.17794091999530792, | |
| "learning_rate": 4.9984088942809695e-05, | |
| "loss": 0.6844, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5753472608951614, | |
| "grad_norm": 0.18658983707427979, | |
| "learning_rate": 4.998394687979907e-05, | |
| "loss": 0.682, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5804388295756495, | |
| "grad_norm": 0.11487103253602982, | |
| "learning_rate": 4.998380481678844e-05, | |
| "loss": 0.687, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5855303982561377, | |
| "grad_norm": 0.09985563158988953, | |
| "learning_rate": 4.9983662753777815e-05, | |
| "loss": 0.6855, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5906219669366258, | |
| "grad_norm": 0.1510723978281021, | |
| "learning_rate": 4.998352069076718e-05, | |
| "loss": 0.684, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.595713535617114, | |
| "grad_norm": 0.15650640428066254, | |
| "learning_rate": 4.9983378627756554e-05, | |
| "loss": 0.6807, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6008051042976021, | |
| "grad_norm": 0.3100273311138153, | |
| "learning_rate": 4.998323656474593e-05, | |
| "loss": 0.6837, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6058966729780904, | |
| "grad_norm": 0.09822337329387665, | |
| "learning_rate": 4.99830945017353e-05, | |
| "loss": 0.6851, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6109882416585785, | |
| "grad_norm": 0.16111738979816437, | |
| "learning_rate": 4.9982952438724673e-05, | |
| "loss": 0.6827, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6160798103390667, | |
| "grad_norm": 0.1878943145275116, | |
| "learning_rate": 4.9982810375714047e-05, | |
| "loss": 0.6871, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6211713790195548, | |
| "grad_norm": 0.1281467080116272, | |
| "learning_rate": 4.998266831270342e-05, | |
| "loss": 0.6866, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.626262947700043, | |
| "grad_norm": 0.1051391065120697, | |
| "learning_rate": 4.998252624969279e-05, | |
| "loss": 0.6869, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6313545163805311, | |
| "grad_norm": 0.138059601187706, | |
| "learning_rate": 4.9982384186682166e-05, | |
| "loss": 0.6825, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6364460850610193, | |
| "grad_norm": 0.10719313472509384, | |
| "learning_rate": 4.998224212367153e-05, | |
| "loss": 0.6837, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6415376537415074, | |
| "grad_norm": 0.09252595156431198, | |
| "learning_rate": 4.9982100060660905e-05, | |
| "loss": 0.689, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6466292224219956, | |
| "grad_norm": 0.12894387543201447, | |
| "learning_rate": 4.998195799765028e-05, | |
| "loss": 0.6833, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6517207911024837, | |
| "grad_norm": 0.10794473439455032, | |
| "learning_rate": 4.998181593463965e-05, | |
| "loss": 0.6866, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6568123597829719, | |
| "grad_norm": 0.11546550691127777, | |
| "learning_rate": 4.9981673871629025e-05, | |
| "loss": 0.6861, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.66190392846346, | |
| "grad_norm": 0.10733726620674133, | |
| "learning_rate": 4.99815318086184e-05, | |
| "loss": 0.683, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6669954971439482, | |
| "grad_norm": 0.17388881742954254, | |
| "learning_rate": 4.998138974560777e-05, | |
| "loss": 0.686, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6720870658244363, | |
| "grad_norm": 0.15069304406642914, | |
| "learning_rate": 4.9981247682597144e-05, | |
| "loss": 0.6828, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6771786345049245, | |
| "grad_norm": 0.14276649057865143, | |
| "learning_rate": 4.998110561958652e-05, | |
| "loss": 0.6814, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6822702031854126, | |
| "grad_norm": 0.12937600910663605, | |
| "learning_rate": 4.998096355657589e-05, | |
| "loss": 0.6868, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6873617718659009, | |
| "grad_norm": 0.1466054916381836, | |
| "learning_rate": 4.998082149356526e-05, | |
| "loss": 0.6848, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.692453340546389, | |
| "grad_norm": 0.14180545508861542, | |
| "learning_rate": 4.9980679430554636e-05, | |
| "loss": 0.6847, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.6975449092268771, | |
| "grad_norm": 0.11979173868894577, | |
| "learning_rate": 4.9980537367544e-05, | |
| "loss": 0.6809, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7026364779073653, | |
| "grad_norm": 0.15614405274391174, | |
| "learning_rate": 4.9980395304533376e-05, | |
| "loss": 0.6802, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7077280465878534, | |
| "grad_norm": 0.16178403794765472, | |
| "learning_rate": 4.998025324152274e-05, | |
| "loss": 0.6766, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7128196152683416, | |
| "grad_norm": 0.11734528839588165, | |
| "learning_rate": 4.9980111178512115e-05, | |
| "loss": 0.6853, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7179111839488297, | |
| "grad_norm": 0.09437315165996552, | |
| "learning_rate": 4.997996911550149e-05, | |
| "loss": 0.6859, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7230027526293179, | |
| "grad_norm": 0.08119911700487137, | |
| "learning_rate": 4.997982705249086e-05, | |
| "loss": 0.6902, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.728094321309806, | |
| "grad_norm": 0.14570364356040955, | |
| "learning_rate": 4.9979684989480235e-05, | |
| "loss": 0.6841, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7331858899902942, | |
| "grad_norm": 0.12333963066339493, | |
| "learning_rate": 4.997954292646961e-05, | |
| "loss": 0.6819, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7382774586707823, | |
| "grad_norm": 0.11946499347686768, | |
| "learning_rate": 4.997940086345898e-05, | |
| "loss": 0.6847, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7433690273512705, | |
| "grad_norm": 0.12417126446962357, | |
| "learning_rate": 4.9979258800448354e-05, | |
| "loss": 0.6826, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7484605960317586, | |
| "grad_norm": 0.11672031134366989, | |
| "learning_rate": 4.997911673743773e-05, | |
| "loss": 0.6831, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7535521647122468, | |
| "grad_norm": 0.1273321509361267, | |
| "learning_rate": 4.99789746744271e-05, | |
| "loss": 0.6828, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7586437333927349, | |
| "grad_norm": 0.1056080237030983, | |
| "learning_rate": 4.997883261141647e-05, | |
| "loss": 0.6868, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7637353020732232, | |
| "grad_norm": 0.12784817814826965, | |
| "learning_rate": 4.9978690548405846e-05, | |
| "loss": 0.6819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7688268707537113, | |
| "grad_norm": 0.16047458350658417, | |
| "learning_rate": 4.997854848539521e-05, | |
| "loss": 0.6825, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7739184394341995, | |
| "grad_norm": 0.11385879665613174, | |
| "learning_rate": 4.9978406422384586e-05, | |
| "loss": 0.686, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7790100081146876, | |
| "grad_norm": 0.13264243304729462, | |
| "learning_rate": 4.997826435937396e-05, | |
| "loss": 0.6799, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7841015767951758, | |
| "grad_norm": 0.2524195611476898, | |
| "learning_rate": 4.997812229636333e-05, | |
| "loss": 0.6771, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7891931454756639, | |
| "grad_norm": 0.14071324467658997, | |
| "learning_rate": 4.9977980233352705e-05, | |
| "loss": 0.6833, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7942847141561521, | |
| "grad_norm": 0.12755858898162842, | |
| "learning_rate": 4.997783817034208e-05, | |
| "loss": 0.6831, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7993762828366402, | |
| "grad_norm": 0.17357097566127777, | |
| "learning_rate": 4.997769610733145e-05, | |
| "loss": 0.6829, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8044678515171284, | |
| "grad_norm": 0.13588126003742218, | |
| "learning_rate": 4.997755404432082e-05, | |
| "loss": 0.6896, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8095594201976165, | |
| "grad_norm": 0.0981392115354538, | |
| "learning_rate": 4.997741198131019e-05, | |
| "loss": 0.6859, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8146509888781046, | |
| "grad_norm": 0.13461001217365265, | |
| "learning_rate": 4.9977269918299564e-05, | |
| "loss": 0.6811, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8197425575585928, | |
| "grad_norm": 0.25011003017425537, | |
| "learning_rate": 4.997712785528894e-05, | |
| "loss": 0.6801, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8248341262390809, | |
| "grad_norm": 0.19415059685707092, | |
| "learning_rate": 4.997698579227831e-05, | |
| "loss": 0.68, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8299256949195691, | |
| "grad_norm": 0.1742919236421585, | |
| "learning_rate": 4.997684372926768e-05, | |
| "loss": 0.684, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8350172636000572, | |
| "grad_norm": 0.12717512249946594, | |
| "learning_rate": 4.9976701666257056e-05, | |
| "loss": 0.6864, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8401088322805454, | |
| "grad_norm": 0.09787630289793015, | |
| "learning_rate": 4.997655960324642e-05, | |
| "loss": 0.6865, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8452004009610335, | |
| "grad_norm": 0.11440135538578033, | |
| "learning_rate": 4.9976417540235796e-05, | |
| "loss": 0.6846, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8502919696415218, | |
| "grad_norm": 0.13598810136318207, | |
| "learning_rate": 4.997627547722517e-05, | |
| "loss": 0.6861, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8553835383220099, | |
| "grad_norm": 0.1623242348432541, | |
| "learning_rate": 4.997613341421454e-05, | |
| "loss": 0.6797, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8604751070024981, | |
| "grad_norm": 0.12565261125564575, | |
| "learning_rate": 4.9975991351203915e-05, | |
| "loss": 0.6847, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8655666756829862, | |
| "grad_norm": 0.11019585281610489, | |
| "learning_rate": 4.997584928819329e-05, | |
| "loss": 0.6802, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8706582443634744, | |
| "grad_norm": 0.10026270151138306, | |
| "learning_rate": 4.997570722518266e-05, | |
| "loss": 0.6863, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8757498130439625, | |
| "grad_norm": 0.10043281316757202, | |
| "learning_rate": 4.9975565162172034e-05, | |
| "loss": 0.6843, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8808413817244507, | |
| "grad_norm": 0.0944572165608406, | |
| "learning_rate": 4.997542309916141e-05, | |
| "loss": 0.684, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8859329504049388, | |
| "grad_norm": 0.12859639525413513, | |
| "learning_rate": 4.997528103615078e-05, | |
| "loss": 0.6856, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.891024519085427, | |
| "grad_norm": 0.11585383117198944, | |
| "learning_rate": 4.9975138973140154e-05, | |
| "loss": 0.6807, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8961160877659151, | |
| "grad_norm": 0.13746441900730133, | |
| "learning_rate": 4.997499691012953e-05, | |
| "loss": 0.6846, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9012076564464033, | |
| "grad_norm": 0.09316791594028473, | |
| "learning_rate": 4.997485484711889e-05, | |
| "loss": 0.6848, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9062992251268914, | |
| "grad_norm": 0.07422750443220139, | |
| "learning_rate": 4.9974712784108266e-05, | |
| "loss": 0.688, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9113907938073796, | |
| "grad_norm": 0.08577447384595871, | |
| "learning_rate": 4.997457072109763e-05, | |
| "loss": 0.6856, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9164823624878677, | |
| "grad_norm": 0.09143663942813873, | |
| "learning_rate": 4.9974428658087006e-05, | |
| "loss": 0.6848, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.921573931168356, | |
| "grad_norm": 0.10064688324928284, | |
| "learning_rate": 4.997428659507638e-05, | |
| "loss": 0.6889, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.926665499848844, | |
| "grad_norm": 0.0921172946691513, | |
| "learning_rate": 4.997414453206575e-05, | |
| "loss": 0.6824, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9317570685293322, | |
| "grad_norm": 0.12253455817699432, | |
| "learning_rate": 4.9974002469055125e-05, | |
| "loss": 0.686, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9368486372098204, | |
| "grad_norm": 0.16046911478042603, | |
| "learning_rate": 4.99738604060445e-05, | |
| "loss": 0.6833, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9419402058903085, | |
| "grad_norm": 0.1947670727968216, | |
| "learning_rate": 4.997371834303387e-05, | |
| "loss": 0.676, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9470317745707967, | |
| "grad_norm": 0.17206092178821564, | |
| "learning_rate": 4.9973576280023244e-05, | |
| "loss": 0.6832, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9521233432512848, | |
| "grad_norm": 0.16195142269134521, | |
| "learning_rate": 4.997343421701262e-05, | |
| "loss": 0.681, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.957214911931773, | |
| "grad_norm": 0.1436363011598587, | |
| "learning_rate": 4.997329215400199e-05, | |
| "loss": 0.6847, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9623064806122611, | |
| "grad_norm": 0.11514883488416672, | |
| "learning_rate": 4.9973150090991364e-05, | |
| "loss": 0.6823, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.9673980492927493, | |
| "grad_norm": 0.11169356852769852, | |
| "learning_rate": 4.997300802798074e-05, | |
| "loss": 0.6859, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9724896179732374, | |
| "grad_norm": 0.09914061427116394, | |
| "learning_rate": 4.99728659649701e-05, | |
| "loss": 0.6848, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9775811866537256, | |
| "grad_norm": 0.10717228055000305, | |
| "learning_rate": 4.9972723901959476e-05, | |
| "loss": 0.6833, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.9826727553342137, | |
| "grad_norm": 0.11209560185670853, | |
| "learning_rate": 4.997258183894885e-05, | |
| "loss": 0.6861, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9877643240147019, | |
| "grad_norm": 0.1293047070503235, | |
| "learning_rate": 4.997243977593822e-05, | |
| "loss": 0.6833, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.99285589269519, | |
| "grad_norm": 0.13042615354061127, | |
| "learning_rate": 4.9972297712927595e-05, | |
| "loss": 0.6837, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9979474613756782, | |
| "grad_norm": 0.1377701610326767, | |
| "learning_rate": 4.997215564991697e-05, | |
| "loss": 0.6858, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.002545784340244, | |
| "grad_norm": 0.13352081179618835, | |
| "learning_rate": 4.997201358690634e-05, | |
| "loss": 0.6173, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.0076373530207323, | |
| "grad_norm": 0.12459533661603928, | |
| "learning_rate": 4.9971871523895715e-05, | |
| "loss": 0.6836, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0127289217012203, | |
| "grad_norm": 0.10235695540904999, | |
| "learning_rate": 4.997172946088509e-05, | |
| "loss": 0.6867, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0178204903817085, | |
| "grad_norm": 0.0960664227604866, | |
| "learning_rate": 4.9971587397874454e-05, | |
| "loss": 0.6847, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0229120590621967, | |
| "grad_norm": 0.12099937349557877, | |
| "learning_rate": 4.997144533486383e-05, | |
| "loss": 0.6828, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.028003627742685, | |
| "grad_norm": 0.10949967801570892, | |
| "learning_rate": 4.99713032718532e-05, | |
| "loss": 0.6828, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.033095196423173, | |
| "grad_norm": 0.09417010843753815, | |
| "learning_rate": 4.9971161208842573e-05, | |
| "loss": 0.6866, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0381867651036611, | |
| "grad_norm": 0.08539358526468277, | |
| "learning_rate": 4.9971019145831947e-05, | |
| "loss": 0.6869, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0432783337841494, | |
| "grad_norm": 0.10147163271903992, | |
| "learning_rate": 4.997087708282131e-05, | |
| "loss": 0.68, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0483699024646376, | |
| "grad_norm": 0.15451493859291077, | |
| "learning_rate": 4.9970735019810686e-05, | |
| "loss": 0.6826, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.0534614711451256, | |
| "grad_norm": 0.09405049681663513, | |
| "learning_rate": 4.997059295680006e-05, | |
| "loss": 0.6868, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.0585530398256138, | |
| "grad_norm": 0.12649065256118774, | |
| "learning_rate": 4.997045089378943e-05, | |
| "loss": 0.6845, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.063644608506102, | |
| "grad_norm": 0.12368927896022797, | |
| "learning_rate": 4.9970308830778805e-05, | |
| "loss": 0.6804, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.0687361771865902, | |
| "grad_norm": 0.15372063219547272, | |
| "learning_rate": 4.997016676776818e-05, | |
| "loss": 0.6841, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.0738277458670782, | |
| "grad_norm": 0.14659467339515686, | |
| "learning_rate": 4.997002470475755e-05, | |
| "loss": 0.6808, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.0789193145475664, | |
| "grad_norm": 0.15990343689918518, | |
| "learning_rate": 4.9969882641746925e-05, | |
| "loss": 0.6824, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.0840108832280546, | |
| "grad_norm": 0.1342097967863083, | |
| "learning_rate": 4.99697405787363e-05, | |
| "loss": 0.6819, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.0891024519085426, | |
| "grad_norm": 0.1691288948059082, | |
| "learning_rate": 4.996959851572567e-05, | |
| "loss": 0.6803, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.0941940205890308, | |
| "grad_norm": 0.12277499586343765, | |
| "learning_rate": 4.9969456452715044e-05, | |
| "loss": 0.683, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.099285589269519, | |
| "grad_norm": 0.19435428082942963, | |
| "learning_rate": 4.996931438970441e-05, | |
| "loss": 0.6824, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.1043771579500072, | |
| "grad_norm": 0.0905819982290268, | |
| "learning_rate": 4.996917232669378e-05, | |
| "loss": 0.6894, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1094687266304952, | |
| "grad_norm": 0.07771383225917816, | |
| "learning_rate": 4.9969030263683156e-05, | |
| "loss": 0.6878, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1145602953109834, | |
| "grad_norm": 0.10115786641836166, | |
| "learning_rate": 4.996888820067253e-05, | |
| "loss": 0.6818, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.1196518639914717, | |
| "grad_norm": 0.10046926885843277, | |
| "learning_rate": 4.99687461376619e-05, | |
| "loss": 0.6872, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.1247434326719599, | |
| "grad_norm": 0.1584903746843338, | |
| "learning_rate": 4.996860407465127e-05, | |
| "loss": 0.6772, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.1298350013524479, | |
| "grad_norm": 0.19328419864177704, | |
| "learning_rate": 4.996846201164064e-05, | |
| "loss": 0.6814, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.134926570032936, | |
| "grad_norm": 0.12247782945632935, | |
| "learning_rate": 4.9968319948630015e-05, | |
| "loss": 0.6817, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.1400181387134243, | |
| "grad_norm": 0.13911662995815277, | |
| "learning_rate": 4.996817788561939e-05, | |
| "loss": 0.678, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.1451097073939125, | |
| "grad_norm": 0.16294950246810913, | |
| "learning_rate": 4.996803582260876e-05, | |
| "loss": 0.6825, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.1502012760744005, | |
| "grad_norm": 0.15820349752902985, | |
| "learning_rate": 4.9967893759598135e-05, | |
| "loss": 0.6781, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1552928447548887, | |
| "grad_norm": 0.13467393815517426, | |
| "learning_rate": 4.996775169658751e-05, | |
| "loss": 0.6862, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.160384413435377, | |
| "grad_norm": 0.11259343475103378, | |
| "learning_rate": 4.996760963357688e-05, | |
| "loss": 0.6809, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.165475982115865, | |
| "grad_norm": 0.13340143859386444, | |
| "learning_rate": 4.9967467570566254e-05, | |
| "loss": 0.6805, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.170567550796353, | |
| "grad_norm": 0.12365837395191193, | |
| "learning_rate": 4.996732550755562e-05, | |
| "loss": 0.6802, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1756591194768413, | |
| "grad_norm": 0.09431267529726028, | |
| "learning_rate": 4.996718344454499e-05, | |
| "loss": 0.6903, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.1807506881573295, | |
| "grad_norm": 0.08034133911132812, | |
| "learning_rate": 4.9967041381534366e-05, | |
| "loss": 0.6837, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.1858422568378177, | |
| "grad_norm": 0.07523014396429062, | |
| "learning_rate": 4.996689931852374e-05, | |
| "loss": 0.6836, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.1909338255183057, | |
| "grad_norm": 0.1202087476849556, | |
| "learning_rate": 4.996675725551311e-05, | |
| "loss": 0.6771, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.196025394198794, | |
| "grad_norm": 0.13852781057357788, | |
| "learning_rate": 4.9966615192502486e-05, | |
| "loss": 0.6813, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.2011169628792822, | |
| "grad_norm": 0.1234976053237915, | |
| "learning_rate": 4.996647312949186e-05, | |
| "loss": 0.6799, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.2062085315597701, | |
| "grad_norm": 0.1308223158121109, | |
| "learning_rate": 4.996633106648123e-05, | |
| "loss": 0.6839, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2113001002402584, | |
| "grad_norm": 0.11719993501901627, | |
| "learning_rate": 4.9966189003470605e-05, | |
| "loss": 0.6826, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2163916689207466, | |
| "grad_norm": 0.13690686225891113, | |
| "learning_rate": 4.996604694045998e-05, | |
| "loss": 0.6804, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.2214832376012348, | |
| "grad_norm": 0.12480480223894119, | |
| "learning_rate": 4.996590487744935e-05, | |
| "loss": 0.6831, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.2265748062817228, | |
| "grad_norm": 0.12938359379768372, | |
| "learning_rate": 4.9965762814438724e-05, | |
| "loss": 0.6821, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.231666374962211, | |
| "grad_norm": 0.14791236817836761, | |
| "learning_rate": 4.996562075142809e-05, | |
| "loss": 0.6813, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.2367579436426992, | |
| "grad_norm": 0.1450764387845993, | |
| "learning_rate": 4.9965478688417464e-05, | |
| "loss": 0.6835, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.2418495123231874, | |
| "grad_norm": 0.12077004462480545, | |
| "learning_rate": 4.996533662540683e-05, | |
| "loss": 0.6843, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.2469410810036754, | |
| "grad_norm": 0.0918821468949318, | |
| "learning_rate": 4.99651945623962e-05, | |
| "loss": 0.6836, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.2520326496841636, | |
| "grad_norm": 0.09863133728504181, | |
| "learning_rate": 4.9965052499385576e-05, | |
| "loss": 0.683, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.2571242183646518, | |
| "grad_norm": 0.10029463469982147, | |
| "learning_rate": 4.996491043637495e-05, | |
| "loss": 0.6839, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.26221578704514, | |
| "grad_norm": 0.11962137371301651, | |
| "learning_rate": 4.996476837336432e-05, | |
| "loss": 0.677, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.267307355725628, | |
| "grad_norm": 0.11363455653190613, | |
| "learning_rate": 4.9964626310353696e-05, | |
| "loss": 0.6843, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.2723989244061162, | |
| "grad_norm": 0.08753272145986557, | |
| "learning_rate": 4.996448424734307e-05, | |
| "loss": 0.6857, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2774904930866045, | |
| "grad_norm": 0.10698520392179489, | |
| "learning_rate": 4.996434218433244e-05, | |
| "loss": 0.6855, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.2825820617670924, | |
| "grad_norm": 0.09481139481067657, | |
| "learning_rate": 4.9964200121321815e-05, | |
| "loss": 0.683, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.2876736304475807, | |
| "grad_norm": 0.15638168156147003, | |
| "learning_rate": 4.996405805831119e-05, | |
| "loss": 0.6842, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.2927651991280689, | |
| "grad_norm": 0.1133870854973793, | |
| "learning_rate": 4.996391599530056e-05, | |
| "loss": 0.6831, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.297856767808557, | |
| "grad_norm": 0.1086922213435173, | |
| "learning_rate": 4.9963773932289934e-05, | |
| "loss": 0.6832, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3029483364890453, | |
| "grad_norm": 0.11375133693218231, | |
| "learning_rate": 4.99636318692793e-05, | |
| "loss": 0.6871, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.3080399051695333, | |
| "grad_norm": 0.11502251029014587, | |
| "learning_rate": 4.9963489806268674e-05, | |
| "loss": 0.6844, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.3131314738500215, | |
| "grad_norm": 0.13333244621753693, | |
| "learning_rate": 4.996334774325805e-05, | |
| "loss": 0.6821, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3182230425305097, | |
| "grad_norm": 0.17771519720554352, | |
| "learning_rate": 4.996320568024742e-05, | |
| "loss": 0.6817, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.3233146112109977, | |
| "grad_norm": 0.10856124758720398, | |
| "learning_rate": 4.996306361723679e-05, | |
| "loss": 0.6832, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.328406179891486, | |
| "grad_norm": 0.13525483012199402, | |
| "learning_rate": 4.9962921554226166e-05, | |
| "loss": 0.6848, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.3334977485719741, | |
| "grad_norm": 0.14420652389526367, | |
| "learning_rate": 4.996277949121553e-05, | |
| "loss": 0.6831, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.3385893172524623, | |
| "grad_norm": 0.10660698264837265, | |
| "learning_rate": 4.9962637428204906e-05, | |
| "loss": 0.686, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.3436808859329505, | |
| "grad_norm": 0.16599448025226593, | |
| "learning_rate": 4.996249536519428e-05, | |
| "loss": 0.6826, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.3487724546134385, | |
| "grad_norm": 0.13518887758255005, | |
| "learning_rate": 4.996235330218365e-05, | |
| "loss": 0.6854, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.3538640232939267, | |
| "grad_norm": 0.1113041415810585, | |
| "learning_rate": 4.9962211239173025e-05, | |
| "loss": 0.6832, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.3589555919744147, | |
| "grad_norm": 0.13242138922214508, | |
| "learning_rate": 4.99620691761624e-05, | |
| "loss": 0.6814, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.364047160654903, | |
| "grad_norm": 0.18434931337833405, | |
| "learning_rate": 4.996192711315177e-05, | |
| "loss": 0.6808, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.3691387293353912, | |
| "grad_norm": 0.11528836935758591, | |
| "learning_rate": 4.9961785050141144e-05, | |
| "loss": 0.685, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.3742302980158794, | |
| "grad_norm": 0.1295492947101593, | |
| "learning_rate": 4.996164298713051e-05, | |
| "loss": 0.6825, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3793218666963676, | |
| "grad_norm": 0.09657806158065796, | |
| "learning_rate": 4.9961500924119884e-05, | |
| "loss": 0.6825, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.3844134353768556, | |
| "grad_norm": 0.08716735243797302, | |
| "learning_rate": 4.996135886110926e-05, | |
| "loss": 0.6872, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.3895050040573438, | |
| "grad_norm": 0.0896734893321991, | |
| "learning_rate": 4.996121679809863e-05, | |
| "loss": 0.6817, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.394596572737832, | |
| "grad_norm": 0.10860587656497955, | |
| "learning_rate": 4.9961074735088e-05, | |
| "loss": 0.6815, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.39968814141832, | |
| "grad_norm": 0.1187656968832016, | |
| "learning_rate": 4.9960932672077376e-05, | |
| "loss": 0.686, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4047797100988082, | |
| "grad_norm": 0.11682062596082687, | |
| "learning_rate": 4.996079060906675e-05, | |
| "loss": 0.6799, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.4098712787792964, | |
| "grad_norm": 0.14465422928333282, | |
| "learning_rate": 4.996064854605612e-05, | |
| "loss": 0.6823, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.4149628474597846, | |
| "grad_norm": 0.13644230365753174, | |
| "learning_rate": 4.9960506483045495e-05, | |
| "loss": 0.6828, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4200544161402728, | |
| "grad_norm": 0.09552885591983795, | |
| "learning_rate": 4.996036442003487e-05, | |
| "loss": 0.6828, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.4251459848207608, | |
| "grad_norm": 0.1287170648574829, | |
| "learning_rate": 4.996022235702424e-05, | |
| "loss": 0.6846, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.430237553501249, | |
| "grad_norm": 0.11409243196249008, | |
| "learning_rate": 4.9960080294013615e-05, | |
| "loss": 0.6844, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.4353291221817372, | |
| "grad_norm": 0.16463157534599304, | |
| "learning_rate": 4.995993823100298e-05, | |
| "loss": 0.6842, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.4404206908622252, | |
| "grad_norm": 0.12962253391742706, | |
| "learning_rate": 4.9959796167992354e-05, | |
| "loss": 0.6831, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.4455122595427135, | |
| "grad_norm": 0.1317017823457718, | |
| "learning_rate": 4.995965410498172e-05, | |
| "loss": 0.6825, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.4506038282232017, | |
| "grad_norm": 0.1253054440021515, | |
| "learning_rate": 4.9959512041971094e-05, | |
| "loss": 0.6869, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4556953969036899, | |
| "grad_norm": 0.10968417674303055, | |
| "learning_rate": 4.995936997896047e-05, | |
| "loss": 0.6837, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.460786965584178, | |
| "grad_norm": 0.15329721570014954, | |
| "learning_rate": 4.995922791594984e-05, | |
| "loss": 0.6856, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.465878534264666, | |
| "grad_norm": 0.1338498741388321, | |
| "learning_rate": 4.995908585293921e-05, | |
| "loss": 0.6862, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.4709701029451543, | |
| "grad_norm": 0.10569129139184952, | |
| "learning_rate": 4.9958943789928586e-05, | |
| "loss": 0.6834, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.4760616716256423, | |
| "grad_norm": 0.14210055768489838, | |
| "learning_rate": 4.995880172691796e-05, | |
| "loss": 0.682, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.4811532403061305, | |
| "grad_norm": 0.13887226581573486, | |
| "learning_rate": 4.995865966390733e-05, | |
| "loss": 0.6813, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.4862448089866187, | |
| "grad_norm": 0.14252229034900665, | |
| "learning_rate": 4.9958517600896705e-05, | |
| "loss": 0.6819, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.491336377667107, | |
| "grad_norm": 0.1889895647764206, | |
| "learning_rate": 4.995837553788608e-05, | |
| "loss": 0.6833, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.4964279463475951, | |
| "grad_norm": 0.14179687201976776, | |
| "learning_rate": 4.995823347487545e-05, | |
| "loss": 0.6774, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.5015195150280833, | |
| "grad_norm": 0.1529311090707779, | |
| "learning_rate": 4.9958091411864825e-05, | |
| "loss": 0.6839, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5066110837085713, | |
| "grad_norm": 0.11903861910104752, | |
| "learning_rate": 4.995794934885419e-05, | |
| "loss": 0.6828, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.5117026523890595, | |
| "grad_norm": 0.11958228051662445, | |
| "learning_rate": 4.9957807285843564e-05, | |
| "loss": 0.6809, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.5167942210695475, | |
| "grad_norm": 0.10305473953485489, | |
| "learning_rate": 4.995766522283294e-05, | |
| "loss": 0.6834, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.5218857897500357, | |
| "grad_norm": 0.11478529125452042, | |
| "learning_rate": 4.995752315982231e-05, | |
| "loss": 0.6835, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.526977358430524, | |
| "grad_norm": 0.13605500757694244, | |
| "learning_rate": 4.995738109681168e-05, | |
| "loss": 0.6804, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.5320689271110122, | |
| "grad_norm": 0.12643195688724518, | |
| "learning_rate": 4.9957239033801056e-05, | |
| "loss": 0.6763, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.5371604957915004, | |
| "grad_norm": 0.22794055938720703, | |
| "learning_rate": 4.995709697079043e-05, | |
| "loss": 0.6822, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.5422520644719884, | |
| "grad_norm": 0.1283722072839737, | |
| "learning_rate": 4.99569549077798e-05, | |
| "loss": 0.6823, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.5473436331524766, | |
| "grad_norm": 0.12796291708946228, | |
| "learning_rate": 4.995681284476917e-05, | |
| "loss": 0.6789, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.5524352018329646, | |
| "grad_norm": 0.20063504576683044, | |
| "learning_rate": 4.995667078175854e-05, | |
| "loss": 0.6782, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.5575267705134528, | |
| "grad_norm": 0.10560201853513718, | |
| "learning_rate": 4.9956528718747915e-05, | |
| "loss": 0.6899, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.562618339193941, | |
| "grad_norm": 0.09931265562772751, | |
| "learning_rate": 4.995638665573729e-05, | |
| "loss": 0.6857, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.5677099078744292, | |
| "grad_norm": 0.09285406023263931, | |
| "learning_rate": 4.995624459272666e-05, | |
| "loss": 0.6865, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.5728014765549174, | |
| "grad_norm": 0.11098553240299225, | |
| "learning_rate": 4.9956102529716035e-05, | |
| "loss": 0.6837, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.5778930452354056, | |
| "grad_norm": 0.12747269868850708, | |
| "learning_rate": 4.99559604667054e-05, | |
| "loss": 0.6805, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.5829846139158936, | |
| "grad_norm": 0.19148799777030945, | |
| "learning_rate": 4.9955818403694774e-05, | |
| "loss": 0.6809, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.5880761825963818, | |
| "grad_norm": 0.11333976686000824, | |
| "learning_rate": 4.995567634068415e-05, | |
| "loss": 0.6834, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.5931677512768698, | |
| "grad_norm": 0.12725278735160828, | |
| "learning_rate": 4.995553427767352e-05, | |
| "loss": 0.682, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.598259319957358, | |
| "grad_norm": 0.08800658583641052, | |
| "learning_rate": 4.995539221466289e-05, | |
| "loss": 0.6863, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.6033508886378462, | |
| "grad_norm": 0.1162288561463356, | |
| "learning_rate": 4.9955250151652266e-05, | |
| "loss": 0.6816, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6084424573183345, | |
| "grad_norm": 0.11001812666654587, | |
| "learning_rate": 4.995510808864164e-05, | |
| "loss": 0.6834, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.6135340259988227, | |
| "grad_norm": 0.17772439122200012, | |
| "learning_rate": 4.995496602563101e-05, | |
| "loss": 0.6839, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.6186255946793109, | |
| "grad_norm": 0.14124180376529694, | |
| "learning_rate": 4.9954823962620386e-05, | |
| "loss": 0.6817, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.6237171633597989, | |
| "grad_norm": 0.12131723016500473, | |
| "learning_rate": 4.995468189960976e-05, | |
| "loss": 0.68, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.628808732040287, | |
| "grad_norm": 0.1277320384979248, | |
| "learning_rate": 4.995453983659913e-05, | |
| "loss": 0.6761, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.633900300720775, | |
| "grad_norm": 0.11980846524238586, | |
| "learning_rate": 4.99543977735885e-05, | |
| "loss": 0.6845, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.6389918694012633, | |
| "grad_norm": 0.18087904155254364, | |
| "learning_rate": 4.995425571057787e-05, | |
| "loss": 0.6815, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.6440834380817515, | |
| "grad_norm": 0.1640498787164688, | |
| "learning_rate": 4.9954113647567244e-05, | |
| "loss": 0.6799, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.6491750067622397, | |
| "grad_norm": 0.14339861273765564, | |
| "learning_rate": 4.995397158455662e-05, | |
| "loss": 0.684, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.654266575442728, | |
| "grad_norm": 0.11472135037183762, | |
| "learning_rate": 4.9953829521545984e-05, | |
| "loss": 0.6825, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.659358144123216, | |
| "grad_norm": 0.12307639420032501, | |
| "learning_rate": 4.995368745853536e-05, | |
| "loss": 0.6892, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6644497128037041, | |
| "grad_norm": 0.09782890975475311, | |
| "learning_rate": 4.995354539552473e-05, | |
| "loss": 0.6823, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.6695412814841921, | |
| "grad_norm": 0.1154768094420433, | |
| "learning_rate": 4.99534033325141e-05, | |
| "loss": 0.6842, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.6746328501646803, | |
| "grad_norm": 0.18311528861522675, | |
| "learning_rate": 4.9953261269503476e-05, | |
| "loss": 0.6833, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.6797244188451685, | |
| "grad_norm": 0.11727318912744522, | |
| "learning_rate": 4.995311920649285e-05, | |
| "loss": 0.6819, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.6848159875256568, | |
| "grad_norm": 0.12900975346565247, | |
| "learning_rate": 4.995297714348222e-05, | |
| "loss": 0.6849, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.689907556206145, | |
| "grad_norm": 0.12244871258735657, | |
| "learning_rate": 4.9952835080471596e-05, | |
| "loss": 0.684, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.6949991248866332, | |
| "grad_norm": 0.11466418206691742, | |
| "learning_rate": 4.995269301746097e-05, | |
| "loss": 0.6833, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.7000906935671212, | |
| "grad_norm": 0.12230440229177475, | |
| "learning_rate": 4.995255095445034e-05, | |
| "loss": 0.6831, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.7051822622476094, | |
| "grad_norm": 0.13069362938404083, | |
| "learning_rate": 4.995240889143971e-05, | |
| "loss": 0.6842, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.7102738309280974, | |
| "grad_norm": 0.11203134804964066, | |
| "learning_rate": 4.995226682842908e-05, | |
| "loss": 0.6858, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.7153653996085856, | |
| "grad_norm": 0.1311291605234146, | |
| "learning_rate": 4.9952124765418454e-05, | |
| "loss": 0.6838, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.7204569682890738, | |
| "grad_norm": 0.1232665479183197, | |
| "learning_rate": 4.995198270240783e-05, | |
| "loss": 0.6841, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.725548536969562, | |
| "grad_norm": 0.1445329189300537, | |
| "learning_rate": 4.99518406393972e-05, | |
| "loss": 0.6801, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.7306401056500502, | |
| "grad_norm": 0.1403801292181015, | |
| "learning_rate": 4.9951698576386574e-05, | |
| "loss": 0.6863, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.7357316743305384, | |
| "grad_norm": 0.09926485270261765, | |
| "learning_rate": 4.995155651337595e-05, | |
| "loss": 0.6839, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.7408232430110264, | |
| "grad_norm": 0.10609301924705505, | |
| "learning_rate": 4.995141445036532e-05, | |
| "loss": 0.6851, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.7459148116915146, | |
| "grad_norm": 0.1048160120844841, | |
| "learning_rate": 4.995127238735469e-05, | |
| "loss": 0.6875, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.7510063803720026, | |
| "grad_norm": 0.10706604272127151, | |
| "learning_rate": 4.9951130324344066e-05, | |
| "loss": 0.684, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.7560979490524908, | |
| "grad_norm": 0.1004481241106987, | |
| "learning_rate": 4.995098826133344e-05, | |
| "loss": 0.6847, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.761189517732979, | |
| "grad_norm": 0.13026846945285797, | |
| "learning_rate": 4.9950846198322806e-05, | |
| "loss": 0.6822, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.7662810864134673, | |
| "grad_norm": 0.20907576382160187, | |
| "learning_rate": 4.995070413531218e-05, | |
| "loss": 0.6808, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.7713726550939555, | |
| "grad_norm": 0.14915932714939117, | |
| "learning_rate": 4.995056207230155e-05, | |
| "loss": 0.6803, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.7764642237744435, | |
| "grad_norm": 0.12906627357006073, | |
| "learning_rate": 4.995042000929092e-05, | |
| "loss": 0.6868, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.7815557924549317, | |
| "grad_norm": 0.10379557311534882, | |
| "learning_rate": 4.995027794628029e-05, | |
| "loss": 0.6853, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7866473611354197, | |
| "grad_norm": 0.10871709883213043, | |
| "learning_rate": 4.9950135883269664e-05, | |
| "loss": 0.6863, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.7917389298159079, | |
| "grad_norm": 0.1513502597808838, | |
| "learning_rate": 4.994999382025904e-05, | |
| "loss": 0.6816, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.796830498496396, | |
| "grad_norm": 0.13802939653396606, | |
| "learning_rate": 4.994985175724841e-05, | |
| "loss": 0.6838, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.8019220671768843, | |
| "grad_norm": 0.13514472544193268, | |
| "learning_rate": 4.9949709694237784e-05, | |
| "loss": 0.6794, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.8070136358573725, | |
| "grad_norm": 0.16484974324703217, | |
| "learning_rate": 4.994956763122716e-05, | |
| "loss": 0.682, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.8121052045378607, | |
| "grad_norm": 0.11142993718385696, | |
| "learning_rate": 4.994942556821653e-05, | |
| "loss": 0.6816, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.8171967732183487, | |
| "grad_norm": 0.14259152114391327, | |
| "learning_rate": 4.99492835052059e-05, | |
| "loss": 0.6781, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.822288341898837, | |
| "grad_norm": 0.15921758115291595, | |
| "learning_rate": 4.9949141442195276e-05, | |
| "loss": 0.6855, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.827379910579325, | |
| "grad_norm": 0.09428475797176361, | |
| "learning_rate": 4.994899937918465e-05, | |
| "loss": 0.6833, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.8324714792598131, | |
| "grad_norm": 0.1155981793999672, | |
| "learning_rate": 4.994885731617402e-05, | |
| "loss": 0.6841, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.8375630479403013, | |
| "grad_norm": 0.10845302045345306, | |
| "learning_rate": 4.994871525316339e-05, | |
| "loss": 0.6846, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.8426546166207896, | |
| "grad_norm": 0.12848089635372162, | |
| "learning_rate": 4.994857319015276e-05, | |
| "loss": 0.682, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.8477461853012778, | |
| "grad_norm": 0.10348972678184509, | |
| "learning_rate": 4.9948431127142135e-05, | |
| "loss": 0.6842, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.852837753981766, | |
| "grad_norm": 0.13845866918563843, | |
| "learning_rate": 4.994828906413151e-05, | |
| "loss": 0.6812, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.857929322662254, | |
| "grad_norm": 0.11816728860139847, | |
| "learning_rate": 4.994814700112088e-05, | |
| "loss": 0.6812, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.8630208913427422, | |
| "grad_norm": 0.13902199268341064, | |
| "learning_rate": 4.9948004938110254e-05, | |
| "loss": 0.6871, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.8681124600232302, | |
| "grad_norm": 0.12729224562644958, | |
| "learning_rate": 4.994786287509962e-05, | |
| "loss": 0.6802, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.8732040287037184, | |
| "grad_norm": 0.14033198356628418, | |
| "learning_rate": 4.9947720812088994e-05, | |
| "loss": 0.6843, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.8782955973842066, | |
| "grad_norm": 0.12836380302906036, | |
| "learning_rate": 4.994757874907837e-05, | |
| "loss": 0.6833, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.8833871660646948, | |
| "grad_norm": 0.1290048211812973, | |
| "learning_rate": 4.994743668606774e-05, | |
| "loss": 0.6832, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.888478734745183, | |
| "grad_norm": 0.1284429430961609, | |
| "learning_rate": 4.994729462305711e-05, | |
| "loss": 0.6843, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.893570303425671, | |
| "grad_norm": 0.13112841546535492, | |
| "learning_rate": 4.9947152560046486e-05, | |
| "loss": 0.6811, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.8986618721061592, | |
| "grad_norm": 0.14525501430034637, | |
| "learning_rate": 4.994701049703586e-05, | |
| "loss": 0.6793, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.9037534407866472, | |
| "grad_norm": 0.1803501546382904, | |
| "learning_rate": 4.994686843402523e-05, | |
| "loss": 0.6791, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.9088450094671354, | |
| "grad_norm": 0.1837460994720459, | |
| "learning_rate": 4.99467263710146e-05, | |
| "loss": 0.6771, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.9139365781476236, | |
| "grad_norm": 0.12087200582027435, | |
| "learning_rate": 4.994658430800397e-05, | |
| "loss": 0.6836, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.9190281468281118, | |
| "grad_norm": 0.1253005713224411, | |
| "learning_rate": 4.9946442244993345e-05, | |
| "loss": 0.6822, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.9241197155086, | |
| "grad_norm": 0.11462333053350449, | |
| "learning_rate": 4.994630018198272e-05, | |
| "loss": 0.684, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.9292112841890883, | |
| "grad_norm": 0.1458183377981186, | |
| "learning_rate": 4.994615811897209e-05, | |
| "loss": 0.6812, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.9343028528695763, | |
| "grad_norm": 0.13514210283756256, | |
| "learning_rate": 4.9946016055961464e-05, | |
| "loss": 0.6883, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.9393944215500645, | |
| "grad_norm": 0.10077164322137833, | |
| "learning_rate": 4.994587399295084e-05, | |
| "loss": 0.6841, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.9444859902305525, | |
| "grad_norm": 0.1145828515291214, | |
| "learning_rate": 4.994573192994021e-05, | |
| "loss": 0.6794, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.9495775589110407, | |
| "grad_norm": 0.12171609699726105, | |
| "learning_rate": 4.994558986692958e-05, | |
| "loss": 0.6801, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.9546691275915289, | |
| "grad_norm": 0.1296948492527008, | |
| "learning_rate": 4.9945447803918956e-05, | |
| "loss": 0.6836, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.959760696272017, | |
| "grad_norm": 0.13795003294944763, | |
| "learning_rate": 4.994530574090833e-05, | |
| "loss": 0.6814, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.9648522649525053, | |
| "grad_norm": 0.10949226468801498, | |
| "learning_rate": 4.99451636778977e-05, | |
| "loss": 0.6851, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.9699438336329935, | |
| "grad_norm": 0.09504687041044235, | |
| "learning_rate": 4.994502161488707e-05, | |
| "loss": 0.6847, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.9750354023134815, | |
| "grad_norm": 0.12004721909761429, | |
| "learning_rate": 4.994487955187644e-05, | |
| "loss": 0.6853, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.9801269709939697, | |
| "grad_norm": 0.15672442317008972, | |
| "learning_rate": 4.994473748886581e-05, | |
| "loss": 0.678, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.9852185396744577, | |
| "grad_norm": 0.1672324538230896, | |
| "learning_rate": 4.994459542585518e-05, | |
| "loss": 0.6801, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.990310108354946, | |
| "grad_norm": 0.13963304460048676, | |
| "learning_rate": 4.9944453362844555e-05, | |
| "loss": 0.6896, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.9954016770354341, | |
| "grad_norm": 0.11426424980163574, | |
| "learning_rate": 4.994431129983393e-05, | |
| "loss": 0.6802, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.01827167347073555, | |
| "learning_rate": 4.99441692368233e-05, | |
| "loss": 0.6174, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.005091568680488, | |
| "grad_norm": 0.17911553382873535, | |
| "learning_rate": 4.9944027173812674e-05, | |
| "loss": 0.6826, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.0101831373609764, | |
| "grad_norm": 0.11154532432556152, | |
| "learning_rate": 4.994388511080205e-05, | |
| "loss": 0.6843, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.0152747060414646, | |
| "grad_norm": 0.09386030584573746, | |
| "learning_rate": 4.994374304779142e-05, | |
| "loss": 0.686, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.0203662747219524, | |
| "grad_norm": 0.09608808904886246, | |
| "learning_rate": 4.994360098478079e-05, | |
| "loss": 0.6791, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.0254578434024406, | |
| "grad_norm": 0.12537717819213867, | |
| "learning_rate": 4.9943458921770166e-05, | |
| "loss": 0.6842, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.030549412082929, | |
| "grad_norm": 0.09800703823566437, | |
| "learning_rate": 4.994331685875954e-05, | |
| "loss": 0.6859, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.035640980763417, | |
| "grad_norm": 0.07934601604938507, | |
| "learning_rate": 4.994317479574891e-05, | |
| "loss": 0.6846, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.0407325494439053, | |
| "grad_norm": 0.10269072651863098, | |
| "learning_rate": 4.994303273273828e-05, | |
| "loss": 0.6852, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.0458241181243935, | |
| "grad_norm": 0.09138213843107224, | |
| "learning_rate": 4.994289066972765e-05, | |
| "loss": 0.682, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.0509156868048817, | |
| "grad_norm": 0.1062936782836914, | |
| "learning_rate": 4.9942748606717025e-05, | |
| "loss": 0.6863, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.05600725548537, | |
| "grad_norm": 0.13446182012557983, | |
| "learning_rate": 4.99426065437064e-05, | |
| "loss": 0.68, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.0610988241658577, | |
| "grad_norm": 0.1352904587984085, | |
| "learning_rate": 4.994246448069577e-05, | |
| "loss": 0.68, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.066190392846346, | |
| "grad_norm": 0.14259877800941467, | |
| "learning_rate": 4.9942322417685144e-05, | |
| "loss": 0.684, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.071281961526834, | |
| "grad_norm": 0.1194225326180458, | |
| "learning_rate": 4.994218035467452e-05, | |
| "loss": 0.6845, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.0763735302073223, | |
| "grad_norm": 0.10988787561655045, | |
| "learning_rate": 4.994203829166389e-05, | |
| "loss": 0.6834, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.0814650988878105, | |
| "grad_norm": 0.10374101996421814, | |
| "learning_rate": 4.994189622865326e-05, | |
| "loss": 0.6833, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.0865566675682987, | |
| "grad_norm": 0.11888198554515839, | |
| "learning_rate": 4.994175416564263e-05, | |
| "loss": 0.683, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.091648236248787, | |
| "grad_norm": 0.11808530986309052, | |
| "learning_rate": 4.9941612102632e-05, | |
| "loss": 0.6813, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.096739804929275, | |
| "grad_norm": 0.12874440848827362, | |
| "learning_rate": 4.9941470039621376e-05, | |
| "loss": 0.6809, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.101831373609763, | |
| "grad_norm": 0.1372908353805542, | |
| "learning_rate": 4.994132797661075e-05, | |
| "loss": 0.6793, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.106922942290251, | |
| "grad_norm": 0.15299095213413239, | |
| "learning_rate": 4.994118591360012e-05, | |
| "loss": 0.6785, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.1120145109707393, | |
| "grad_norm": 0.1464032679796219, | |
| "learning_rate": 4.994104385058949e-05, | |
| "loss": 0.6804, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.1171060796512275, | |
| "grad_norm": 0.10995624214410782, | |
| "learning_rate": 4.994090178757886e-05, | |
| "loss": 0.6854, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.1221976483317158, | |
| "grad_norm": 0.1125839501619339, | |
| "learning_rate": 4.9940759724568235e-05, | |
| "loss": 0.6802, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.127289217012204, | |
| "grad_norm": 0.15469452738761902, | |
| "learning_rate": 4.994061766155761e-05, | |
| "loss": 0.6805, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.132380785692692, | |
| "grad_norm": 0.15448547899723053, | |
| "learning_rate": 4.994047559854698e-05, | |
| "loss": 0.6826, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.1374723543731804, | |
| "grad_norm": 0.12687282264232635, | |
| "learning_rate": 4.9940333535536354e-05, | |
| "loss": 0.681, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.142563923053668, | |
| "grad_norm": 0.13330869376659393, | |
| "learning_rate": 4.994019147252573e-05, | |
| "loss": 0.6811, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.1476554917341564, | |
| "grad_norm": 0.13483920693397522, | |
| "learning_rate": 4.99400494095151e-05, | |
| "loss": 0.6834, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.1527470604146446, | |
| "grad_norm": 0.08532749861478806, | |
| "learning_rate": 4.9939907346504474e-05, | |
| "loss": 0.6867, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.157838629095133, | |
| "grad_norm": 0.12028615176677704, | |
| "learning_rate": 4.993976528349385e-05, | |
| "loss": 0.6849, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.162930197775621, | |
| "grad_norm": 0.10255931317806244, | |
| "learning_rate": 4.993962322048322e-05, | |
| "loss": 0.6816, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.1680217664561092, | |
| "grad_norm": 0.16485556960105896, | |
| "learning_rate": 4.9939481157472586e-05, | |
| "loss": 0.6791, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.1731133351365974, | |
| "grad_norm": 0.1411302089691162, | |
| "learning_rate": 4.993933909446196e-05, | |
| "loss": 0.6788, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.178204903817085, | |
| "grad_norm": 0.18721655011177063, | |
| "learning_rate": 4.993919703145133e-05, | |
| "loss": 0.6854, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.1832964724975734, | |
| "grad_norm": 0.0997004359960556, | |
| "learning_rate": 4.9939054968440706e-05, | |
| "loss": 0.6842, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.1883880411780616, | |
| "grad_norm": 0.11703092604875565, | |
| "learning_rate": 4.993891290543007e-05, | |
| "loss": 0.68, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.19347960985855, | |
| "grad_norm": 0.13729970157146454, | |
| "learning_rate": 4.9938770842419445e-05, | |
| "loss": 0.6832, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.198571178539038, | |
| "grad_norm": 0.12172706425189972, | |
| "learning_rate": 4.993862877940882e-05, | |
| "loss": 0.6827, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.2036627472195263, | |
| "grad_norm": 0.12669777870178223, | |
| "learning_rate": 4.993848671639819e-05, | |
| "loss": 0.6829, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.2087543159000145, | |
| "grad_norm": 0.13186220824718475, | |
| "learning_rate": 4.9938344653387564e-05, | |
| "loss": 0.6824, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.2138458845805022, | |
| "grad_norm": 0.13194870948791504, | |
| "learning_rate": 4.993820259037694e-05, | |
| "loss": 0.6803, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.2189374532609905, | |
| "grad_norm": 0.14057835936546326, | |
| "learning_rate": 4.993806052736631e-05, | |
| "loss": 0.68, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.2240290219414787, | |
| "grad_norm": 0.12043063342571259, | |
| "learning_rate": 4.9937918464355684e-05, | |
| "loss": 0.6849, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.229120590621967, | |
| "grad_norm": 0.11859495937824249, | |
| "learning_rate": 4.993777640134506e-05, | |
| "loss": 0.6831, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.234212159302455, | |
| "grad_norm": 0.12299305200576782, | |
| "learning_rate": 4.993763433833443e-05, | |
| "loss": 0.6803, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.2393037279829433, | |
| "grad_norm": 0.12101336568593979, | |
| "learning_rate": 4.9937492275323796e-05, | |
| "loss": 0.6812, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.2443952966634315, | |
| "grad_norm": 0.10430170595645905, | |
| "learning_rate": 4.993735021231317e-05, | |
| "loss": 0.6866, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.2494868653439197, | |
| "grad_norm": 0.08973786234855652, | |
| "learning_rate": 4.993720814930254e-05, | |
| "loss": 0.6861, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.254578434024408, | |
| "grad_norm": 0.09560893476009369, | |
| "learning_rate": 4.9937066086291916e-05, | |
| "loss": 0.6821, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.2596700027048957, | |
| "grad_norm": 0.12744377553462982, | |
| "learning_rate": 4.993692402328129e-05, | |
| "loss": 0.6861, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.264761571385384, | |
| "grad_norm": 0.09390248358249664, | |
| "learning_rate": 4.993678196027066e-05, | |
| "loss": 0.6837, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.269853140065872, | |
| "grad_norm": 0.10652091354131699, | |
| "learning_rate": 4.9936639897260035e-05, | |
| "loss": 0.6821, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.2749447087463603, | |
| "grad_norm": 0.14594070613384247, | |
| "learning_rate": 4.993649783424941e-05, | |
| "loss": 0.6831, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.2800362774268486, | |
| "grad_norm": 0.11480095237493515, | |
| "learning_rate": 4.993635577123878e-05, | |
| "loss": 0.676, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.2851278461073368, | |
| "grad_norm": 0.15268968045711517, | |
| "learning_rate": 4.9936213708228154e-05, | |
| "loss": 0.6802, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.290219414787825, | |
| "grad_norm": 0.12245162576436996, | |
| "learning_rate": 4.993607164521753e-05, | |
| "loss": 0.6816, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.2953109834683127, | |
| "grad_norm": 0.09671120345592499, | |
| "learning_rate": 4.9935929582206894e-05, | |
| "loss": 0.6808, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.300402552148801, | |
| "grad_norm": 0.10151444375514984, | |
| "learning_rate": 4.993578751919627e-05, | |
| "loss": 0.6862, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.305494120829289, | |
| "grad_norm": 0.10020536184310913, | |
| "learning_rate": 4.993564545618564e-05, | |
| "loss": 0.6809, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.3105856895097774, | |
| "grad_norm": 0.20587410032749176, | |
| "learning_rate": 4.9935503393175006e-05, | |
| "loss": 0.6839, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.3156772581902656, | |
| "grad_norm": 0.12846329808235168, | |
| "learning_rate": 4.993536133016438e-05, | |
| "loss": 0.6814, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.320768826870754, | |
| "grad_norm": 0.12553255259990692, | |
| "learning_rate": 4.993521926715375e-05, | |
| "loss": 0.6828, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.325860395551242, | |
| "grad_norm": 0.11741780489683151, | |
| "learning_rate": 4.9935077204143125e-05, | |
| "loss": 0.6855, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.33095196423173, | |
| "grad_norm": 0.09674712270498276, | |
| "learning_rate": 4.99349351411325e-05, | |
| "loss": 0.6813, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.336043532912218, | |
| "grad_norm": 0.11124306917190552, | |
| "learning_rate": 4.993479307812187e-05, | |
| "loss": 0.6763, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.341135101592706, | |
| "grad_norm": 0.1364033818244934, | |
| "learning_rate": 4.9934651015111245e-05, | |
| "loss": 0.6798, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.3462266702731944, | |
| "grad_norm": 0.14521688222885132, | |
| "learning_rate": 4.993450895210062e-05, | |
| "loss": 0.6824, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.3513182389536826, | |
| "grad_norm": 0.10061439126729965, | |
| "learning_rate": 4.993436688908999e-05, | |
| "loss": 0.6841, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.356409807634171, | |
| "grad_norm": 0.09391237050294876, | |
| "learning_rate": 4.9934224826079364e-05, | |
| "loss": 0.6827, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.361501376314659, | |
| "grad_norm": 0.12690366804599762, | |
| "learning_rate": 4.993408276306874e-05, | |
| "loss": 0.6798, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.3665929449951473, | |
| "grad_norm": 0.11659922450780869, | |
| "learning_rate": 4.993394070005811e-05, | |
| "loss": 0.6805, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.3716845136756355, | |
| "grad_norm": 0.1206756979227066, | |
| "learning_rate": 4.993379863704748e-05, | |
| "loss": 0.6792, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.3767760823561233, | |
| "grad_norm": 0.11938859522342682, | |
| "learning_rate": 4.993365657403685e-05, | |
| "loss": 0.6837, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.3818676510366115, | |
| "grad_norm": 0.10022424161434174, | |
| "learning_rate": 4.993351451102622e-05, | |
| "loss": 0.6807, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.3869592197170997, | |
| "grad_norm": 0.14838755130767822, | |
| "learning_rate": 4.9933372448015596e-05, | |
| "loss": 0.6795, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.392050788397588, | |
| "grad_norm": 0.131904736161232, | |
| "learning_rate": 4.993323038500497e-05, | |
| "loss": 0.6855, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.397142357078076, | |
| "grad_norm": 0.1132061704993248, | |
| "learning_rate": 4.993308832199434e-05, | |
| "loss": 0.6792, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.4022339257585643, | |
| "grad_norm": 0.10466153919696808, | |
| "learning_rate": 4.993294625898371e-05, | |
| "loss": 0.6856, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.4073254944390525, | |
| "grad_norm": 0.108913853764534, | |
| "learning_rate": 4.993280419597308e-05, | |
| "loss": 0.6787, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.4124170631195403, | |
| "grad_norm": 0.12613457441329956, | |
| "learning_rate": 4.9932662132962455e-05, | |
| "loss": 0.6804, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.4175086318000285, | |
| "grad_norm": 0.11993265151977539, | |
| "learning_rate": 4.993252006995183e-05, | |
| "loss": 0.6809, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.4226002004805167, | |
| "grad_norm": 0.13760647177696228, | |
| "learning_rate": 4.99323780069412e-05, | |
| "loss": 0.6762, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.427691769161005, | |
| "grad_norm": 0.15461039543151855, | |
| "learning_rate": 4.9932235943930574e-05, | |
| "loss": 0.6808, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.432783337841493, | |
| "grad_norm": 0.11814858764410019, | |
| "learning_rate": 4.993209388091995e-05, | |
| "loss": 0.6807, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.4378749065219814, | |
| "grad_norm": 0.12167418003082275, | |
| "learning_rate": 4.993195181790932e-05, | |
| "loss": 0.6838, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.4429664752024696, | |
| "grad_norm": 0.13912709057331085, | |
| "learning_rate": 4.9931809754898687e-05, | |
| "loss": 0.6814, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.4480580438829573, | |
| "grad_norm": 0.1079849898815155, | |
| "learning_rate": 4.993166769188806e-05, | |
| "loss": 0.6802, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.4531496125634455, | |
| "grad_norm": 0.1483919620513916, | |
| "learning_rate": 4.993152562887743e-05, | |
| "loss": 0.6809, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.4582411812439338, | |
| "grad_norm": 0.1411130726337433, | |
| "learning_rate": 4.9931383565866806e-05, | |
| "loss": 0.6819, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.463332749924422, | |
| "grad_norm": 0.13872161507606506, | |
| "learning_rate": 4.993124150285618e-05, | |
| "loss": 0.6814, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.46842431860491, | |
| "grad_norm": 0.13207204639911652, | |
| "learning_rate": 4.993109943984555e-05, | |
| "loss": 0.6819, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.4735158872853984, | |
| "grad_norm": 0.13904866576194763, | |
| "learning_rate": 4.9930957376834925e-05, | |
| "loss": 0.6799, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.4786074559658866, | |
| "grad_norm": 0.10088212043046951, | |
| "learning_rate": 4.99308153138243e-05, | |
| "loss": 0.6849, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.483699024646375, | |
| "grad_norm": 0.15108828246593475, | |
| "learning_rate": 4.993067325081367e-05, | |
| "loss": 0.6824, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.488790593326863, | |
| "grad_norm": 0.11093771457672119, | |
| "learning_rate": 4.9930531187803044e-05, | |
| "loss": 0.6848, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.493882162007351, | |
| "grad_norm": 0.10378114134073257, | |
| "learning_rate": 4.993038912479242e-05, | |
| "loss": 0.6824, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.498973730687839, | |
| "grad_norm": 0.1797563135623932, | |
| "learning_rate": 4.993024706178179e-05, | |
| "loss": 0.6805, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.5040652993683272, | |
| "grad_norm": 0.13369685411453247, | |
| "learning_rate": 4.993010499877116e-05, | |
| "loss": 0.6798, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.5091568680488154, | |
| "grad_norm": 0.11391709744930267, | |
| "learning_rate": 4.992996293576053e-05, | |
| "loss": 0.6769, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.5142484367293036, | |
| "grad_norm": 0.15841761231422424, | |
| "learning_rate": 4.9929820872749896e-05, | |
| "loss": 0.6813, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.519340005409792, | |
| "grad_norm": 0.1152459904551506, | |
| "learning_rate": 4.992967880973927e-05, | |
| "loss": 0.6825, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.52443157409028, | |
| "grad_norm": 0.1523844301700592, | |
| "learning_rate": 4.992953674672864e-05, | |
| "loss": 0.6795, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.529523142770768, | |
| "grad_norm": 0.15071742236614227, | |
| "learning_rate": 4.9929394683718016e-05, | |
| "loss": 0.6778, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.534614711451256, | |
| "grad_norm": 0.0915883481502533, | |
| "learning_rate": 4.992925262070739e-05, | |
| "loss": 0.689, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.5397062801317443, | |
| "grad_norm": 0.08719677478075027, | |
| "learning_rate": 4.992911055769676e-05, | |
| "loss": 0.6831, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.5447978488122325, | |
| "grad_norm": 0.10521717369556427, | |
| "learning_rate": 4.9928968494686135e-05, | |
| "loss": 0.6838, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.5498894174927207, | |
| "grad_norm": 0.14673079550266266, | |
| "learning_rate": 4.992882643167551e-05, | |
| "loss": 0.6777, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.554980986173209, | |
| "grad_norm": 0.1252555549144745, | |
| "learning_rate": 4.992868436866488e-05, | |
| "loss": 0.6774, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.560072554853697, | |
| "grad_norm": 0.17313307523727417, | |
| "learning_rate": 4.9928542305654254e-05, | |
| "loss": 0.6846, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.565164123534185, | |
| "grad_norm": 0.12619802355766296, | |
| "learning_rate": 4.992840024264363e-05, | |
| "loss": 0.6827, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.5702556922146735, | |
| "grad_norm": 0.11647044122219086, | |
| "learning_rate": 4.9928258179633e-05, | |
| "loss": 0.6779, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.5753472608951613, | |
| "grad_norm": 0.11227191984653473, | |
| "learning_rate": 4.992811611662237e-05, | |
| "loss": 0.6767, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.5804388295756495, | |
| "grad_norm": 0.12041344493627548, | |
| "learning_rate": 4.992797405361174e-05, | |
| "loss": 0.6784, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.5855303982561377, | |
| "grad_norm": 0.14506416022777557, | |
| "learning_rate": 4.992783199060111e-05, | |
| "loss": 0.6798, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.590621966936626, | |
| "grad_norm": 0.10675019025802612, | |
| "learning_rate": 4.9927689927590486e-05, | |
| "loss": 0.684, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.595713535617114, | |
| "grad_norm": 0.09595705568790436, | |
| "learning_rate": 4.992754786457986e-05, | |
| "loss": 0.68, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.600805104297602, | |
| "grad_norm": 0.12361190468072891, | |
| "learning_rate": 4.992740580156923e-05, | |
| "loss": 0.6813, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.6058966729780906, | |
| "grad_norm": 0.14116083085536957, | |
| "learning_rate": 4.9927263738558606e-05, | |
| "loss": 0.6791, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.6109882416585783, | |
| "grad_norm": 0.14521893858909607, | |
| "learning_rate": 4.992712167554798e-05, | |
| "loss": 0.6841, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.6160798103390666, | |
| "grad_norm": 0.08931027352809906, | |
| "learning_rate": 4.9926979612537345e-05, | |
| "loss": 0.6839, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.6211713790195548, | |
| "grad_norm": 0.15768922865390778, | |
| "learning_rate": 4.992683754952672e-05, | |
| "loss": 0.6837, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.626262947700043, | |
| "grad_norm": 0.11857085675001144, | |
| "learning_rate": 4.992669548651609e-05, | |
| "loss": 0.6791, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.631354516380531, | |
| "grad_norm": 0.12832790613174438, | |
| "learning_rate": 4.9926553423505464e-05, | |
| "loss": 0.6789, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.6364460850610194, | |
| "grad_norm": 0.1246199905872345, | |
| "learning_rate": 4.992641136049484e-05, | |
| "loss": 0.6834, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.6415376537415076, | |
| "grad_norm": 0.10562731325626373, | |
| "learning_rate": 4.992626929748421e-05, | |
| "loss": 0.681, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.6466292224219954, | |
| "grad_norm": 0.1098145917057991, | |
| "learning_rate": 4.992612723447358e-05, | |
| "loss": 0.68, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.6517207911024836, | |
| "grad_norm": 0.1007496640086174, | |
| "learning_rate": 4.992598517146295e-05, | |
| "loss": 0.6835, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.656812359782972, | |
| "grad_norm": 0.16250421106815338, | |
| "learning_rate": 4.992584310845232e-05, | |
| "loss": 0.6781, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.66190392846346, | |
| "grad_norm": 0.1358012706041336, | |
| "learning_rate": 4.9925701045441696e-05, | |
| "loss": 0.6838, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.6669954971439482, | |
| "grad_norm": 0.10603620857000351, | |
| "learning_rate": 4.992555898243107e-05, | |
| "loss": 0.6847, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.6720870658244364, | |
| "grad_norm": 0.12339074909687042, | |
| "learning_rate": 4.992541691942044e-05, | |
| "loss": 0.6809, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.6771786345049247, | |
| "grad_norm": 0.13252249360084534, | |
| "learning_rate": 4.9925274856409816e-05, | |
| "loss": 0.6809, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.6822702031854124, | |
| "grad_norm": 0.12156182527542114, | |
| "learning_rate": 4.992513279339919e-05, | |
| "loss": 0.6803, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.687361771865901, | |
| "grad_norm": 0.1240081861615181, | |
| "learning_rate": 4.992499073038856e-05, | |
| "loss": 0.6828, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.692453340546389, | |
| "grad_norm": 0.1085842102766037, | |
| "learning_rate": 4.9924848667377935e-05, | |
| "loss": 0.6809, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.697544909226877, | |
| "grad_norm": 0.10199875384569168, | |
| "learning_rate": 4.992470660436731e-05, | |
| "loss": 0.6799, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.7026364779073653, | |
| "grad_norm": 0.10421440750360489, | |
| "learning_rate": 4.9924564541356674e-05, | |
| "loss": 0.6774, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.7077280465878535, | |
| "grad_norm": 0.11737542599439621, | |
| "learning_rate": 4.992442247834605e-05, | |
| "loss": 0.6866, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.7128196152683417, | |
| "grad_norm": 0.1116197407245636, | |
| "learning_rate": 4.992428041533542e-05, | |
| "loss": 0.6826, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.7179111839488295, | |
| "grad_norm": 0.07906144112348557, | |
| "learning_rate": 4.9924138352324794e-05, | |
| "loss": 0.6833, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.723002752629318, | |
| "grad_norm": 0.09525004774332047, | |
| "learning_rate": 4.992399628931416e-05, | |
| "loss": 0.6846, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.728094321309806, | |
| "grad_norm": 0.10529020428657532, | |
| "learning_rate": 4.992385422630353e-05, | |
| "loss": 0.6805, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.733185889990294, | |
| "grad_norm": 0.1130564957857132, | |
| "learning_rate": 4.9923712163292906e-05, | |
| "loss": 0.6834, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.7382774586707823, | |
| "grad_norm": 0.1169043555855751, | |
| "learning_rate": 4.992357010028228e-05, | |
| "loss": 0.6791, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.7433690273512705, | |
| "grad_norm": 0.10529076308012009, | |
| "learning_rate": 4.992342803727165e-05, | |
| "loss": 0.6807, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.7484605960317587, | |
| "grad_norm": 0.11143583059310913, | |
| "learning_rate": 4.9923285974261025e-05, | |
| "loss": 0.6809, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.753552164712247, | |
| "grad_norm": 0.12018362432718277, | |
| "learning_rate": 4.99231439112504e-05, | |
| "loss": 0.6803, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.758643733392735, | |
| "grad_norm": 0.10221763700246811, | |
| "learning_rate": 4.992300184823977e-05, | |
| "loss": 0.6844, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.763735302073223, | |
| "grad_norm": 0.12819118797779083, | |
| "learning_rate": 4.9922859785229145e-05, | |
| "loss": 0.6802, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.768826870753711, | |
| "grad_norm": 0.11218137294054031, | |
| "learning_rate": 4.992271772221852e-05, | |
| "loss": 0.6816, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.7739184394341994, | |
| "grad_norm": 0.1787531077861786, | |
| "learning_rate": 4.9922575659207884e-05, | |
| "loss": 0.6815, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.7790100081146876, | |
| "grad_norm": 0.10983338207006454, | |
| "learning_rate": 4.992243359619726e-05, | |
| "loss": 0.6777, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.784101576795176, | |
| "grad_norm": 0.12096842378377914, | |
| "learning_rate": 4.992229153318663e-05, | |
| "loss": 0.6793, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.789193145475664, | |
| "grad_norm": 0.1177634447813034, | |
| "learning_rate": 4.9922149470176004e-05, | |
| "loss": 0.6845, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.794284714156152, | |
| "grad_norm": 0.09383808076381683, | |
| "learning_rate": 4.992200740716538e-05, | |
| "loss": 0.6816, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.79937628283664, | |
| "grad_norm": 0.11048846691846848, | |
| "learning_rate": 4.992186534415475e-05, | |
| "loss": 0.6812, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.8044678515171286, | |
| "grad_norm": 0.11928955465555191, | |
| "learning_rate": 4.992172328114412e-05, | |
| "loss": 0.6855, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.8095594201976164, | |
| "grad_norm": 0.10312807559967041, | |
| "learning_rate": 4.9921581218133496e-05, | |
| "loss": 0.6834, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.8146509888781046, | |
| "grad_norm": 0.14140763878822327, | |
| "learning_rate": 4.992143915512287e-05, | |
| "loss": 0.6826, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.819742557558593, | |
| "grad_norm": 0.12414680421352386, | |
| "learning_rate": 4.992129709211224e-05, | |
| "loss": 0.6778, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.824834126239081, | |
| "grad_norm": 0.18568123877048492, | |
| "learning_rate": 4.9921155029101615e-05, | |
| "loss": 0.6834, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.8299256949195692, | |
| "grad_norm": 0.09774978458881378, | |
| "learning_rate": 4.992101296609098e-05, | |
| "loss": 0.6826, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.835017263600057, | |
| "grad_norm": 0.11985506862401962, | |
| "learning_rate": 4.9920870903080355e-05, | |
| "loss": 0.6865, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.8401088322805457, | |
| "grad_norm": 0.09641832858324051, | |
| "learning_rate": 4.992072884006973e-05, | |
| "loss": 0.6821, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.8452004009610334, | |
| "grad_norm": 0.10907211899757385, | |
| "learning_rate": 4.9920586777059094e-05, | |
| "loss": 0.6847, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.8502919696415216, | |
| "grad_norm": 0.11031023412942886, | |
| "learning_rate": 4.992044471404847e-05, | |
| "loss": 0.682, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.85538353832201, | |
| "grad_norm": 0.12869343161582947, | |
| "learning_rate": 4.992030265103784e-05, | |
| "loss": 0.683, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.860475107002498, | |
| "grad_norm": 0.114951953291893, | |
| "learning_rate": 4.9920160588027213e-05, | |
| "loss": 0.6801, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.8655666756829863, | |
| "grad_norm": 0.12400404363870621, | |
| "learning_rate": 4.9920018525016587e-05, | |
| "loss": 0.685, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.8706582443634745, | |
| "grad_norm": 0.11837892979383469, | |
| "learning_rate": 4.991987646200596e-05, | |
| "loss": 0.6826, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.8757498130439627, | |
| "grad_norm": 0.16485707461833954, | |
| "learning_rate": 4.991973439899533e-05, | |
| "loss": 0.6798, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.8808413817244505, | |
| "grad_norm": 0.1649584323167801, | |
| "learning_rate": 4.9919592335984706e-05, | |
| "loss": 0.6846, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.8859329504049387, | |
| "grad_norm": 0.09823145717382431, | |
| "learning_rate": 4.991945027297408e-05, | |
| "loss": 0.6825, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.891024519085427, | |
| "grad_norm": 0.10554816573858261, | |
| "learning_rate": 4.991930820996345e-05, | |
| "loss": 0.6833, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.896116087765915, | |
| "grad_norm": 0.09985250979661942, | |
| "learning_rate": 4.9919166146952825e-05, | |
| "loss": 0.685, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.9012076564464033, | |
| "grad_norm": 0.1473183035850525, | |
| "learning_rate": 4.99190240839422e-05, | |
| "loss": 0.6773, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.9062992251268915, | |
| "grad_norm": 0.1321994662284851, | |
| "learning_rate": 4.9918882020931565e-05, | |
| "loss": 0.6851, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.9113907938073798, | |
| "grad_norm": 0.11778974533081055, | |
| "learning_rate": 4.991873995792094e-05, | |
| "loss": 0.6777, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.9164823624878675, | |
| "grad_norm": 0.12440946698188782, | |
| "learning_rate": 4.991859789491031e-05, | |
| "loss": 0.6866, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.921573931168356, | |
| "grad_norm": 0.14024010300636292, | |
| "learning_rate": 4.9918455831899684e-05, | |
| "loss": 0.6786, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.926665499848844, | |
| "grad_norm": 0.1383139193058014, | |
| "learning_rate": 4.991831376888906e-05, | |
| "loss": 0.6843, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.931757068529332, | |
| "grad_norm": 0.16354554891586304, | |
| "learning_rate": 4.991817170587843e-05, | |
| "loss": 0.6788, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.9368486372098204, | |
| "grad_norm": 0.12788814306259155, | |
| "learning_rate": 4.9918029642867796e-05, | |
| "loss": 0.6803, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.9419402058903086, | |
| "grad_norm": 0.1226319745182991, | |
| "learning_rate": 4.991788757985717e-05, | |
| "loss": 0.6833, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.947031774570797, | |
| "grad_norm": 0.12122051417827606, | |
| "learning_rate": 4.991774551684654e-05, | |
| "loss": 0.6827, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.9521233432512846, | |
| "grad_norm": 0.12066159397363663, | |
| "learning_rate": 4.9917603453835916e-05, | |
| "loss": 0.6812, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.957214911931773, | |
| "grad_norm": 0.12547747790813446, | |
| "learning_rate": 4.991746139082529e-05, | |
| "loss": 0.6801, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.962306480612261, | |
| "grad_norm": 0.1140349805355072, | |
| "learning_rate": 4.991731932781466e-05, | |
| "loss": 0.6833, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.967398049292749, | |
| "grad_norm": 0.14640016853809357, | |
| "learning_rate": 4.9917177264804035e-05, | |
| "loss": 0.6806, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.9724896179732374, | |
| "grad_norm": 0.1226801946759224, | |
| "learning_rate": 4.991703520179341e-05, | |
| "loss": 0.6816, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.9775811866537256, | |
| "grad_norm": 0.1280628740787506, | |
| "learning_rate": 4.9916893138782775e-05, | |
| "loss": 0.6814, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.982672755334214, | |
| "grad_norm": 0.13127422332763672, | |
| "learning_rate": 4.991675107577215e-05, | |
| "loss": 0.683, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.987764324014702, | |
| "grad_norm": 0.10727940499782562, | |
| "learning_rate": 4.991660901276152e-05, | |
| "loss": 0.6824, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.9928558926951903, | |
| "grad_norm": 0.13203033804893494, | |
| "learning_rate": 4.9916466949750894e-05, | |
| "loss": 0.6825, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.997947461375678, | |
| "grad_norm": 0.1205354556441307, | |
| "learning_rate": 4.991632488674027e-05, | |
| "loss": 0.6868, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 3.002545784340244, | |
| "grad_norm": 0.1364830732345581, | |
| "learning_rate": 4.991618282372964e-05, | |
| "loss": 0.6114, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.007637353020732, | |
| "grad_norm": 0.1269853115081787, | |
| "learning_rate": 4.991604076071901e-05, | |
| "loss": 0.6826, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 3.0127289217012203, | |
| "grad_norm": 0.1348942220211029, | |
| "learning_rate": 4.9915898697708386e-05, | |
| "loss": 0.6837, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 3.0178204903817085, | |
| "grad_norm": 0.13320055603981018, | |
| "learning_rate": 4.991575663469776e-05, | |
| "loss": 0.6775, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 3.0229120590621967, | |
| "grad_norm": 0.11422030627727509, | |
| "learning_rate": 4.991561457168713e-05, | |
| "loss": 0.6801, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 3.028003627742685, | |
| "grad_norm": 0.10496284067630768, | |
| "learning_rate": 4.9915472508676506e-05, | |
| "loss": 0.6809, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.033095196423173, | |
| "grad_norm": 0.10586734861135483, | |
| "learning_rate": 4.991533044566588e-05, | |
| "loss": 0.6775, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 3.0381867651036614, | |
| "grad_norm": 0.13202211260795593, | |
| "learning_rate": 4.9915188382655245e-05, | |
| "loss": 0.6777, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 3.043278333784149, | |
| "grad_norm": 0.13048899173736572, | |
| "learning_rate": 4.991504631964462e-05, | |
| "loss": 0.6779, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 3.0483699024646373, | |
| "grad_norm": 0.12446481734514236, | |
| "learning_rate": 4.9914904256633984e-05, | |
| "loss": 0.6763, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 3.0534614711451256, | |
| "grad_norm": 0.10315615683794022, | |
| "learning_rate": 4.991476219362336e-05, | |
| "loss": 0.6877, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.0585530398256138, | |
| "grad_norm": 0.11032961308956146, | |
| "learning_rate": 4.991462013061273e-05, | |
| "loss": 0.6812, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 3.063644608506102, | |
| "grad_norm": 0.0968027114868164, | |
| "learning_rate": 4.9914478067602104e-05, | |
| "loss": 0.6818, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 3.06873617718659, | |
| "grad_norm": 0.11660617589950562, | |
| "learning_rate": 4.991433600459148e-05, | |
| "loss": 0.6806, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 3.0738277458670784, | |
| "grad_norm": 0.1213793009519577, | |
| "learning_rate": 4.991419394158085e-05, | |
| "loss": 0.6818, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 3.078919314547566, | |
| "grad_norm": 0.1275392472743988, | |
| "learning_rate": 4.991405187857022e-05, | |
| "loss": 0.6802, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.0840108832280544, | |
| "grad_norm": 0.1026177927851677, | |
| "learning_rate": 4.9913909815559596e-05, | |
| "loss": 0.6835, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 3.0891024519085426, | |
| "grad_norm": 0.10983236879110336, | |
| "learning_rate": 4.991376775254897e-05, | |
| "loss": 0.6838, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 3.094194020589031, | |
| "grad_norm": 0.11360979080200195, | |
| "learning_rate": 4.991362568953834e-05, | |
| "loss": 0.6806, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 3.099285589269519, | |
| "grad_norm": 0.1488681137561798, | |
| "learning_rate": 4.9913483626527716e-05, | |
| "loss": 0.6797, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 3.1043771579500072, | |
| "grad_norm": 0.13620369136333466, | |
| "learning_rate": 4.991334156351709e-05, | |
| "loss": 0.6871, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.1094687266304954, | |
| "grad_norm": 0.12065689265727997, | |
| "learning_rate": 4.9913199500506455e-05, | |
| "loss": 0.6792, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 3.1145602953109837, | |
| "grad_norm": 0.13917431235313416, | |
| "learning_rate": 4.991305743749583e-05, | |
| "loss": 0.676, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 3.1196518639914714, | |
| "grad_norm": 0.1255902796983719, | |
| "learning_rate": 4.99129153744852e-05, | |
| "loss": 0.6842, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 3.1247434326719596, | |
| "grad_norm": 0.11472214758396149, | |
| "learning_rate": 4.9912773311474574e-05, | |
| "loss": 0.6788, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 3.129835001352448, | |
| "grad_norm": 0.12614910304546356, | |
| "learning_rate": 4.991263124846395e-05, | |
| "loss": 0.6802, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.134926570032936, | |
| "grad_norm": 0.13529641926288605, | |
| "learning_rate": 4.991248918545332e-05, | |
| "loss": 0.6805, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 3.1400181387134243, | |
| "grad_norm": 0.11604179441928864, | |
| "learning_rate": 4.9912347122442694e-05, | |
| "loss": 0.6813, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 3.1451097073939125, | |
| "grad_norm": 0.12789122760295868, | |
| "learning_rate": 4.991220505943207e-05, | |
| "loss": 0.6845, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 3.1502012760744007, | |
| "grad_norm": 0.12958049774169922, | |
| "learning_rate": 4.991206299642143e-05, | |
| "loss": 0.6804, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 3.1552928447548885, | |
| "grad_norm": 0.10314188152551651, | |
| "learning_rate": 4.9911920933410806e-05, | |
| "loss": 0.6847, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.1603844134353767, | |
| "grad_norm": 0.10737662017345428, | |
| "learning_rate": 4.991177887040018e-05, | |
| "loss": 0.6808, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 3.165475982115865, | |
| "grad_norm": 0.15128542482852936, | |
| "learning_rate": 4.991163680738955e-05, | |
| "loss": 0.6789, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 3.170567550796353, | |
| "grad_norm": 0.11941689997911453, | |
| "learning_rate": 4.9911494744378925e-05, | |
| "loss": 0.6778, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 3.1756591194768413, | |
| "grad_norm": 0.15348762273788452, | |
| "learning_rate": 4.99113526813683e-05, | |
| "loss": 0.6799, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 3.1807506881573295, | |
| "grad_norm": 0.11959049850702286, | |
| "learning_rate": 4.9911210618357665e-05, | |
| "loss": 0.6776, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.1858422568378177, | |
| "grad_norm": 0.11588987708091736, | |
| "learning_rate": 4.991106855534704e-05, | |
| "loss": 0.688, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 3.190933825518306, | |
| "grad_norm": 0.09905340522527695, | |
| "learning_rate": 4.991092649233641e-05, | |
| "loss": 0.6845, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 3.196025394198794, | |
| "grad_norm": 0.11044521629810333, | |
| "learning_rate": 4.9910784429325784e-05, | |
| "loss": 0.6823, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 3.201116962879282, | |
| "grad_norm": 0.10236191004514694, | |
| "learning_rate": 4.991064236631516e-05, | |
| "loss": 0.6824, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 3.20620853155977, | |
| "grad_norm": 0.12017529457807541, | |
| "learning_rate": 4.991050030330453e-05, | |
| "loss": 0.682, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.2113001002402584, | |
| "grad_norm": 0.14782628417015076, | |
| "learning_rate": 4.9910358240293904e-05, | |
| "loss": 0.6781, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 3.2163916689207466, | |
| "grad_norm": 0.14653240144252777, | |
| "learning_rate": 4.991021617728328e-05, | |
| "loss": 0.6809, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 3.221483237601235, | |
| "grad_norm": 0.12069736421108246, | |
| "learning_rate": 4.991007411427265e-05, | |
| "loss": 0.6811, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 3.226574806281723, | |
| "grad_norm": 0.13772337138652802, | |
| "learning_rate": 4.990993205126202e-05, | |
| "loss": 0.6838, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 3.231666374962211, | |
| "grad_norm": 0.10374171286821365, | |
| "learning_rate": 4.9909789988251396e-05, | |
| "loss": 0.6836, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.236757943642699, | |
| "grad_norm": 0.11860493570566177, | |
| "learning_rate": 4.990964792524076e-05, | |
| "loss": 0.6778, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 3.241849512323187, | |
| "grad_norm": 0.1429886519908905, | |
| "learning_rate": 4.9909505862230135e-05, | |
| "loss": 0.6775, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 3.2469410810036754, | |
| "grad_norm": 0.1501941680908203, | |
| "learning_rate": 4.990936379921951e-05, | |
| "loss": 0.6804, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 3.2520326496841636, | |
| "grad_norm": 0.12676025927066803, | |
| "learning_rate": 4.990922173620888e-05, | |
| "loss": 0.6802, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 3.257124218364652, | |
| "grad_norm": 0.14346669614315033, | |
| "learning_rate": 4.990907967319825e-05, | |
| "loss": 0.6815, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.26221578704514, | |
| "grad_norm": 0.11594365537166595, | |
| "learning_rate": 4.990893761018762e-05, | |
| "loss": 0.6783, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 3.2673073557256282, | |
| "grad_norm": 0.12863503396511078, | |
| "learning_rate": 4.9908795547176994e-05, | |
| "loss": 0.68, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 3.272398924406116, | |
| "grad_norm": 0.13634729385375977, | |
| "learning_rate": 4.990865348416637e-05, | |
| "loss": 0.6795, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 3.2774904930866042, | |
| "grad_norm": 0.10696328431367874, | |
| "learning_rate": 4.990851142115574e-05, | |
| "loss": 0.6827, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 3.2825820617670924, | |
| "grad_norm": 0.1048332154750824, | |
| "learning_rate": 4.9908369358145113e-05, | |
| "loss": 0.6812, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.2876736304475807, | |
| "grad_norm": 0.09791410714387894, | |
| "learning_rate": 4.9908227295134487e-05, | |
| "loss": 0.6848, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 3.292765199128069, | |
| "grad_norm": 0.13385730981826782, | |
| "learning_rate": 4.990808523212386e-05, | |
| "loss": 0.6826, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 3.297856767808557, | |
| "grad_norm": 0.13646642863750458, | |
| "learning_rate": 4.990794316911323e-05, | |
| "loss": 0.6767, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 3.3029483364890453, | |
| "grad_norm": 0.14173270761966705, | |
| "learning_rate": 4.9907801106102606e-05, | |
| "loss": 0.6838, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 3.3080399051695335, | |
| "grad_norm": 0.14603695273399353, | |
| "learning_rate": 4.990765904309197e-05, | |
| "loss": 0.6766, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.3131314738500217, | |
| "grad_norm": 0.138224795460701, | |
| "learning_rate": 4.9907516980081345e-05, | |
| "loss": 0.6797, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 3.3182230425305095, | |
| "grad_norm": 0.11541623622179031, | |
| "learning_rate": 4.990737491707072e-05, | |
| "loss": 0.6814, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 3.3233146112109977, | |
| "grad_norm": 0.1160949096083641, | |
| "learning_rate": 4.990723285406009e-05, | |
| "loss": 0.6842, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 3.328406179891486, | |
| "grad_norm": 0.1572464108467102, | |
| "learning_rate": 4.9907090791049465e-05, | |
| "loss": 0.6783, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 3.333497748571974, | |
| "grad_norm": 0.13026835024356842, | |
| "learning_rate": 4.990694872803884e-05, | |
| "loss": 0.681, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.3385893172524623, | |
| "grad_norm": 0.11961708962917328, | |
| "learning_rate": 4.990680666502821e-05, | |
| "loss": 0.6807, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 3.3436808859329505, | |
| "grad_norm": 0.11406982690095901, | |
| "learning_rate": 4.9906664602017584e-05, | |
| "loss": 0.6795, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 3.3487724546134388, | |
| "grad_norm": 0.20744380354881287, | |
| "learning_rate": 4.990652253900696e-05, | |
| "loss": 0.6771, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 3.3538640232939265, | |
| "grad_norm": 0.11253584921360016, | |
| "learning_rate": 4.990638047599633e-05, | |
| "loss": 0.6802, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 3.3589555919744147, | |
| "grad_norm": 0.08123784512281418, | |
| "learning_rate": 4.99062384129857e-05, | |
| "loss": 0.6871, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.364047160654903, | |
| "grad_norm": 0.10802698135375977, | |
| "learning_rate": 4.990609634997507e-05, | |
| "loss": 0.683, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 3.369138729335391, | |
| "grad_norm": 0.11430787295103073, | |
| "learning_rate": 4.990595428696444e-05, | |
| "loss": 0.6821, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 3.3742302980158794, | |
| "grad_norm": 0.09323684871196747, | |
| "learning_rate": 4.9905812223953816e-05, | |
| "loss": 0.6836, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 3.3793218666963676, | |
| "grad_norm": 0.10404845327138901, | |
| "learning_rate": 4.990567016094318e-05, | |
| "loss": 0.6849, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 3.384413435376856, | |
| "grad_norm": 0.1404566615819931, | |
| "learning_rate": 4.9905528097932555e-05, | |
| "loss": 0.6769, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 3.3895050040573436, | |
| "grad_norm": 0.17702195048332214, | |
| "learning_rate": 4.990538603492193e-05, | |
| "loss": 0.6808, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 3.3945965727378318, | |
| "grad_norm": 0.1227133646607399, | |
| "learning_rate": 4.99052439719113e-05, | |
| "loss": 0.6855, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 3.39968814141832, | |
| "grad_norm": 0.0946226418018341, | |
| "learning_rate": 4.9905101908900675e-05, | |
| "loss": 0.6804, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 3.404779710098808, | |
| "grad_norm": 0.11467920988798141, | |
| "learning_rate": 4.990495984589005e-05, | |
| "loss": 0.677, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 3.4098712787792964, | |
| "grad_norm": 0.1885383576154709, | |
| "learning_rate": 4.990481778287942e-05, | |
| "loss": 0.6786, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.4149628474597846, | |
| "grad_norm": 0.0994097888469696, | |
| "learning_rate": 4.9904675719868794e-05, | |
| "loss": 0.6908, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 3.420054416140273, | |
| "grad_norm": 0.09989442676305771, | |
| "learning_rate": 4.990453365685817e-05, | |
| "loss": 0.6804, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 3.425145984820761, | |
| "grad_norm": 0.12362310290336609, | |
| "learning_rate": 4.990439159384754e-05, | |
| "loss": 0.6817, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 3.4302375535012493, | |
| "grad_norm": 0.13228794932365417, | |
| "learning_rate": 4.990424953083691e-05, | |
| "loss": 0.681, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 3.435329122181737, | |
| "grad_norm": 0.11642909795045853, | |
| "learning_rate": 4.9904107467826286e-05, | |
| "loss": 0.6825, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.4404206908622252, | |
| "grad_norm": 0.12548530101776123, | |
| "learning_rate": 4.990396540481565e-05, | |
| "loss": 0.6814, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 3.4455122595427135, | |
| "grad_norm": 0.11513999849557877, | |
| "learning_rate": 4.9903823341805026e-05, | |
| "loss": 0.6792, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 3.4506038282232017, | |
| "grad_norm": 0.12245498597621918, | |
| "learning_rate": 4.99036812787944e-05, | |
| "loss": 0.6771, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 3.45569539690369, | |
| "grad_norm": 0.12722285091876984, | |
| "learning_rate": 4.990353921578377e-05, | |
| "loss": 0.679, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 3.460786965584178, | |
| "grad_norm": 0.13212384283542633, | |
| "learning_rate": 4.9903397152773145e-05, | |
| "loss": 0.6818, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.4658785342646663, | |
| "grad_norm": 0.11193917691707611, | |
| "learning_rate": 4.990325508976252e-05, | |
| "loss": 0.6822, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 3.470970102945154, | |
| "grad_norm": 0.14051009714603424, | |
| "learning_rate": 4.9903113026751884e-05, | |
| "loss": 0.673, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 3.4760616716256423, | |
| "grad_norm": 0.16787344217300415, | |
| "learning_rate": 4.990297096374126e-05, | |
| "loss": 0.6834, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 3.4811532403061305, | |
| "grad_norm": 0.1313748061656952, | |
| "learning_rate": 4.990282890073063e-05, | |
| "loss": 0.6785, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 3.4862448089866187, | |
| "grad_norm": 0.13282889127731323, | |
| "learning_rate": 4.9902686837720004e-05, | |
| "loss": 0.6791, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 3.491336377667107, | |
| "grad_norm": 0.15743672847747803, | |
| "learning_rate": 4.990254477470938e-05, | |
| "loss": 0.685, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 3.496427946347595, | |
| "grad_norm": 0.09886245429515839, | |
| "learning_rate": 4.990240271169875e-05, | |
| "loss": 0.6831, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 3.5015195150280833, | |
| "grad_norm": 0.14891770482063293, | |
| "learning_rate": 4.990226064868812e-05, | |
| "loss": 0.681, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 3.506611083708571, | |
| "grad_norm": 0.13956576585769653, | |
| "learning_rate": 4.9902118585677496e-05, | |
| "loss": 0.6806, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 3.5117026523890598, | |
| "grad_norm": 0.1325678676366806, | |
| "learning_rate": 4.990197652266686e-05, | |
| "loss": 0.6809, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.5167942210695475, | |
| "grad_norm": 0.13164210319519043, | |
| "learning_rate": 4.9901834459656236e-05, | |
| "loss": 0.6822, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 3.5218857897500357, | |
| "grad_norm": 0.13481168448925018, | |
| "learning_rate": 4.990169239664561e-05, | |
| "loss": 0.673, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 3.526977358430524, | |
| "grad_norm": 0.16314196586608887, | |
| "learning_rate": 4.990155033363498e-05, | |
| "loss": 0.6768, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 3.532068927111012, | |
| "grad_norm": 0.1418369710445404, | |
| "learning_rate": 4.9901408270624355e-05, | |
| "loss": 0.6777, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 3.5371604957915004, | |
| "grad_norm": 0.12762701511383057, | |
| "learning_rate": 4.990126620761373e-05, | |
| "loss": 0.6788, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 3.542252064471988, | |
| "grad_norm": 0.10353351384401321, | |
| "learning_rate": 4.99011241446031e-05, | |
| "loss": 0.6858, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 3.547343633152477, | |
| "grad_norm": 0.0953698605298996, | |
| "learning_rate": 4.9900982081592474e-05, | |
| "loss": 0.6783, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 3.5524352018329646, | |
| "grad_norm": 0.10428538918495178, | |
| "learning_rate": 4.990084001858185e-05, | |
| "loss": 0.6844, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 3.557526770513453, | |
| "grad_norm": 0.11740399152040482, | |
| "learning_rate": 4.990069795557122e-05, | |
| "loss": 0.6838, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 3.562618339193941, | |
| "grad_norm": 0.12733303010463715, | |
| "learning_rate": 4.9900555892560594e-05, | |
| "loss": 0.6768, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.567709907874429, | |
| "grad_norm": 0.16426721215248108, | |
| "learning_rate": 4.990041382954997e-05, | |
| "loss": 0.683, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 3.5728014765549174, | |
| "grad_norm": 0.12947894632816315, | |
| "learning_rate": 4.990027176653933e-05, | |
| "loss": 0.6729, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 3.5778930452354056, | |
| "grad_norm": 0.15960286557674408, | |
| "learning_rate": 4.9900129703528706e-05, | |
| "loss": 0.679, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 3.582984613915894, | |
| "grad_norm": 0.12176317721605301, | |
| "learning_rate": 4.989998764051807e-05, | |
| "loss": 0.6832, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 3.5880761825963816, | |
| "grad_norm": 0.12822549045085907, | |
| "learning_rate": 4.9899845577507446e-05, | |
| "loss": 0.6807, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 3.59316775127687, | |
| "grad_norm": 0.09114730358123779, | |
| "learning_rate": 4.989970351449682e-05, | |
| "loss": 0.6837, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 3.598259319957358, | |
| "grad_norm": 0.11248596012592316, | |
| "learning_rate": 4.989956145148619e-05, | |
| "loss": 0.6773, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 3.6033508886378462, | |
| "grad_norm": 0.14381690323352814, | |
| "learning_rate": 4.9899419388475565e-05, | |
| "loss": 0.6763, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 3.6084424573183345, | |
| "grad_norm": 0.1576450616121292, | |
| "learning_rate": 4.989927732546494e-05, | |
| "loss": 0.6796, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 3.6135340259988227, | |
| "grad_norm": 0.12672173976898193, | |
| "learning_rate": 4.989913526245431e-05, | |
| "loss": 0.6773, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.618625594679311, | |
| "grad_norm": 0.10089720040559769, | |
| "learning_rate": 4.9898993199443684e-05, | |
| "loss": 0.6835, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 3.6237171633597987, | |
| "grad_norm": 0.10352669656276703, | |
| "learning_rate": 4.989885113643306e-05, | |
| "loss": 0.6804, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 3.6288087320402873, | |
| "grad_norm": 0.12168221920728683, | |
| "learning_rate": 4.989870907342243e-05, | |
| "loss": 0.6775, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 3.633900300720775, | |
| "grad_norm": 0.152724489569664, | |
| "learning_rate": 4.9898567010411804e-05, | |
| "loss": 0.6832, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 3.6389918694012633, | |
| "grad_norm": 0.10124222189188004, | |
| "learning_rate": 4.989842494740118e-05, | |
| "loss": 0.6824, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 3.6440834380817515, | |
| "grad_norm": 0.10840737819671631, | |
| "learning_rate": 4.989828288439054e-05, | |
| "loss": 0.6781, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 3.6491750067622397, | |
| "grad_norm": 0.10668514668941498, | |
| "learning_rate": 4.9898140821379916e-05, | |
| "loss": 0.6857, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 3.654266575442728, | |
| "grad_norm": 0.11429141461849213, | |
| "learning_rate": 4.989799875836929e-05, | |
| "loss": 0.6823, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 3.6593581441232157, | |
| "grad_norm": 0.1012284979224205, | |
| "learning_rate": 4.989785669535866e-05, | |
| "loss": 0.678, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 3.6644497128037044, | |
| "grad_norm": 0.15000002086162567, | |
| "learning_rate": 4.9897714632348035e-05, | |
| "loss": 0.6763, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.669541281484192, | |
| "grad_norm": 0.15613609552383423, | |
| "learning_rate": 4.989757256933741e-05, | |
| "loss": 0.6837, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 3.6746328501646803, | |
| "grad_norm": 0.13344906270503998, | |
| "learning_rate": 4.989743050632678e-05, | |
| "loss": 0.6841, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 3.6797244188451685, | |
| "grad_norm": 0.12140567600727081, | |
| "learning_rate": 4.9897288443316155e-05, | |
| "loss": 0.6792, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 3.6848159875256568, | |
| "grad_norm": 0.11317454278469086, | |
| "learning_rate": 4.989714638030552e-05, | |
| "loss": 0.6821, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 3.689907556206145, | |
| "grad_norm": 0.1328129768371582, | |
| "learning_rate": 4.9897004317294894e-05, | |
| "loss": 0.6842, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.694999124886633, | |
| "grad_norm": 0.1081654503941536, | |
| "learning_rate": 4.989686225428427e-05, | |
| "loss": 0.6796, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 3.7000906935671214, | |
| "grad_norm": 0.09531684964895248, | |
| "learning_rate": 4.989672019127364e-05, | |
| "loss": 0.6833, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 3.705182262247609, | |
| "grad_norm": 0.10997920483350754, | |
| "learning_rate": 4.9896578128263013e-05, | |
| "loss": 0.6795, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 3.7102738309280974, | |
| "grad_norm": 0.15568581223487854, | |
| "learning_rate": 4.9896436065252387e-05, | |
| "loss": 0.6804, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 3.7153653996085856, | |
| "grad_norm": 0.130909726023674, | |
| "learning_rate": 4.989629400224175e-05, | |
| "loss": 0.6814, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.720456968289074, | |
| "grad_norm": 0.13917888700962067, | |
| "learning_rate": 4.9896151939231126e-05, | |
| "loss": 0.6793, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 3.725548536969562, | |
| "grad_norm": 0.12968967854976654, | |
| "learning_rate": 4.98960098762205e-05, | |
| "loss": 0.6819, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 3.73064010565005, | |
| "grad_norm": 0.12175523489713669, | |
| "learning_rate": 4.989586781320987e-05, | |
| "loss": 0.6785, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 3.7357316743305384, | |
| "grad_norm": 0.12431439012289047, | |
| "learning_rate": 4.9895725750199245e-05, | |
| "loss": 0.6785, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 3.740823243011026, | |
| "grad_norm": 0.1398157924413681, | |
| "learning_rate": 4.989558368718862e-05, | |
| "loss": 0.6779, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.745914811691515, | |
| "grad_norm": 0.11357001215219498, | |
| "learning_rate": 4.989544162417799e-05, | |
| "loss": 0.685, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 3.7510063803720026, | |
| "grad_norm": 0.16288457810878754, | |
| "learning_rate": 4.9895299561167365e-05, | |
| "loss": 0.6811, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 3.756097949052491, | |
| "grad_norm": 0.11568481475114822, | |
| "learning_rate": 4.989515749815674e-05, | |
| "loss": 0.6796, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 3.761189517732979, | |
| "grad_norm": 0.15195196866989136, | |
| "learning_rate": 4.989501543514611e-05, | |
| "loss": 0.6777, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 3.7662810864134673, | |
| "grad_norm": 0.12881244719028473, | |
| "learning_rate": 4.9894873372135484e-05, | |
| "loss": 0.6775, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.7713726550939555, | |
| "grad_norm": 0.1401291787624359, | |
| "learning_rate": 4.989473130912485e-05, | |
| "loss": 0.6834, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 3.7764642237744432, | |
| "grad_norm": 0.12248072773218155, | |
| "learning_rate": 4.9894589246114223e-05, | |
| "loss": 0.6792, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 3.781555792454932, | |
| "grad_norm": 0.11089824140071869, | |
| "learning_rate": 4.9894447183103596e-05, | |
| "loss": 0.6819, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 3.7866473611354197, | |
| "grad_norm": 0.09657898545265198, | |
| "learning_rate": 4.989430512009297e-05, | |
| "loss": 0.6841, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 3.791738929815908, | |
| "grad_norm": 0.12385948747396469, | |
| "learning_rate": 4.9894163057082336e-05, | |
| "loss": 0.6795, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.796830498496396, | |
| "grad_norm": 0.10562111437320709, | |
| "learning_rate": 4.989402099407171e-05, | |
| "loss": 0.6783, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 3.8019220671768843, | |
| "grad_norm": 0.11349403858184814, | |
| "learning_rate": 4.989387893106108e-05, | |
| "loss": 0.6807, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 3.8070136358573725, | |
| "grad_norm": 0.11444567143917084, | |
| "learning_rate": 4.9893736868050455e-05, | |
| "loss": 0.6791, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 3.8121052045378607, | |
| "grad_norm": 0.1610439121723175, | |
| "learning_rate": 4.989359480503983e-05, | |
| "loss": 0.6812, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 3.817196773218349, | |
| "grad_norm": 0.1214766800403595, | |
| "learning_rate": 4.98934527420292e-05, | |
| "loss": 0.6817, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.8222883418988367, | |
| "grad_norm": 0.12765400111675262, | |
| "learning_rate": 4.9893310679018575e-05, | |
| "loss": 0.6787, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 3.827379910579325, | |
| "grad_norm": 0.10731592029333115, | |
| "learning_rate": 4.989316861600795e-05, | |
| "loss": 0.683, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 3.832471479259813, | |
| "grad_norm": 0.12986642122268677, | |
| "learning_rate": 4.989302655299732e-05, | |
| "loss": 0.6766, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 3.8375630479403013, | |
| "grad_norm": 0.12156540900468826, | |
| "learning_rate": 4.9892884489986694e-05, | |
| "loss": 0.6834, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 3.8426546166207896, | |
| "grad_norm": 0.10650958865880966, | |
| "learning_rate": 4.989274242697606e-05, | |
| "loss": 0.6821, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.8477461853012778, | |
| "grad_norm": 0.09265447407960892, | |
| "learning_rate": 4.989260036396543e-05, | |
| "loss": 0.6807, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 3.852837753981766, | |
| "grad_norm": 0.13007622957229614, | |
| "learning_rate": 4.9892458300954806e-05, | |
| "loss": 0.681, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 3.8579293226622537, | |
| "grad_norm": 0.1033967137336731, | |
| "learning_rate": 4.989231623794418e-05, | |
| "loss": 0.686, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 3.8630208913427424, | |
| "grad_norm": 0.10867638140916824, | |
| "learning_rate": 4.989217417493355e-05, | |
| "loss": 0.6796, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 3.86811246002323, | |
| "grad_norm": 0.105263352394104, | |
| "learning_rate": 4.9892032111922926e-05, | |
| "loss": 0.6824, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.8732040287037184, | |
| "grad_norm": 0.12403067946434021, | |
| "learning_rate": 4.98918900489123e-05, | |
| "loss": 0.6793, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 3.8782955973842066, | |
| "grad_norm": 0.09988098591566086, | |
| "learning_rate": 4.989174798590167e-05, | |
| "loss": 0.6842, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 3.883387166064695, | |
| "grad_norm": 0.13452745974063873, | |
| "learning_rate": 4.9891605922891045e-05, | |
| "loss": 0.6811, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 3.888478734745183, | |
| "grad_norm": 0.10854171961545944, | |
| "learning_rate": 4.989146385988042e-05, | |
| "loss": 0.6827, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 3.893570303425671, | |
| "grad_norm": 0.10819829255342484, | |
| "learning_rate": 4.9891321796869784e-05, | |
| "loss": 0.6796, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.8986618721061594, | |
| "grad_norm": 0.17421726882457733, | |
| "learning_rate": 4.989117973385916e-05, | |
| "loss": 0.6808, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 3.903753440786647, | |
| "grad_norm": 0.13020376861095428, | |
| "learning_rate": 4.989103767084853e-05, | |
| "loss": 0.6796, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 3.9088450094671354, | |
| "grad_norm": 0.10870732367038727, | |
| "learning_rate": 4.9890895607837904e-05, | |
| "loss": 0.6867, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 3.9139365781476236, | |
| "grad_norm": 0.10249564051628113, | |
| "learning_rate": 4.989075354482727e-05, | |
| "loss": 0.6847, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 3.919028146828112, | |
| "grad_norm": 0.09583424031734467, | |
| "learning_rate": 4.989061148181664e-05, | |
| "loss": 0.6837, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.9241197155086, | |
| "grad_norm": 0.10090246796607971, | |
| "learning_rate": 4.9890469418806016e-05, | |
| "loss": 0.6814, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 3.9292112841890883, | |
| "grad_norm": 0.1201721727848053, | |
| "learning_rate": 4.989032735579539e-05, | |
| "loss": 0.6843, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 3.9343028528695765, | |
| "grad_norm": 0.11703382432460785, | |
| "learning_rate": 4.989018529278476e-05, | |
| "loss": 0.6784, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 3.9393944215500643, | |
| "grad_norm": 0.1226707398891449, | |
| "learning_rate": 4.9890043229774136e-05, | |
| "loss": 0.6847, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 3.9444859902305525, | |
| "grad_norm": 0.09304598718881607, | |
| "learning_rate": 4.988990116676351e-05, | |
| "loss": 0.6812, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.9495775589110407, | |
| "grad_norm": 0.10586468130350113, | |
| "learning_rate": 4.988975910375288e-05, | |
| "loss": 0.679, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 3.954669127591529, | |
| "grad_norm": 0.10969860106706619, | |
| "learning_rate": 4.9889617040742255e-05, | |
| "loss": 0.6826, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 3.959760696272017, | |
| "grad_norm": 0.1249874085187912, | |
| "learning_rate": 4.988947497773163e-05, | |
| "loss": 0.6767, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 3.9648522649525053, | |
| "grad_norm": 0.16480816900730133, | |
| "learning_rate": 4.9889332914721e-05, | |
| "loss": 0.6796, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 3.9699438336329935, | |
| "grad_norm": 0.2025347650051117, | |
| "learning_rate": 4.9889190851710374e-05, | |
| "loss": 0.678, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.9750354023134813, | |
| "grad_norm": 0.1153530701994896, | |
| "learning_rate": 4.988904878869974e-05, | |
| "loss": 0.6812, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 3.98012697099397, | |
| "grad_norm": 0.12336631864309311, | |
| "learning_rate": 4.9888906725689114e-05, | |
| "loss": 0.6781, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 3.9852185396744577, | |
| "grad_norm": 0.1417071670293808, | |
| "learning_rate": 4.988876466267849e-05, | |
| "loss": 0.6796, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 3.990310108354946, | |
| "grad_norm": 0.12677961587905884, | |
| "learning_rate": 4.988862259966786e-05, | |
| "loss": 0.6819, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 3.995401677035434, | |
| "grad_norm": 0.1134430319070816, | |
| "learning_rate": 4.988848053665723e-05, | |
| "loss": 0.6802, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.021812934428453445, | |
| "learning_rate": 4.98883384736466e-05, | |
| "loss": 0.6148, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 4.005091568680488, | |
| "grad_norm": 0.11325574666261673, | |
| "learning_rate": 4.988819641063597e-05, | |
| "loss": 0.6823, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 4.010183137360976, | |
| "grad_norm": 0.12439537793397903, | |
| "learning_rate": 4.9888054347625346e-05, | |
| "loss": 0.6808, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 4.015274706041464, | |
| "grad_norm": 0.11274933069944382, | |
| "learning_rate": 4.988791228461472e-05, | |
| "loss": 0.6828, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 4.020366274721953, | |
| "grad_norm": 0.10643935203552246, | |
| "learning_rate": 4.988777022160409e-05, | |
| "loss": 0.6833, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.025457843402441, | |
| "grad_norm": 0.0944155901670456, | |
| "learning_rate": 4.9887628158593465e-05, | |
| "loss": 0.6842, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 4.030549412082929, | |
| "grad_norm": 0.12772725522518158, | |
| "learning_rate": 4.988748609558284e-05, | |
| "loss": 0.6764, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 4.035640980763417, | |
| "grad_norm": 0.19370485842227936, | |
| "learning_rate": 4.988734403257221e-05, | |
| "loss": 0.6845, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 4.040732549443905, | |
| "grad_norm": 0.13512100279331207, | |
| "learning_rate": 4.9887201969561584e-05, | |
| "loss": 0.6745, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 4.0458241181243935, | |
| "grad_norm": 0.13933135569095612, | |
| "learning_rate": 4.988705990655095e-05, | |
| "loss": 0.6842, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 4.050915686804881, | |
| "grad_norm": 0.13375182449817657, | |
| "learning_rate": 4.9886917843540324e-05, | |
| "loss": 0.6815, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 4.05600725548537, | |
| "grad_norm": 0.11060313135385513, | |
| "learning_rate": 4.98867757805297e-05, | |
| "loss": 0.6798, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 4.061098824165858, | |
| "grad_norm": 0.14003530144691467, | |
| "learning_rate": 4.988663371751907e-05, | |
| "loss": 0.6787, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 4.066190392846346, | |
| "grad_norm": 0.10484720021486282, | |
| "learning_rate": 4.988649165450844e-05, | |
| "loss": 0.6818, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 4.071281961526834, | |
| "grad_norm": 0.11415210366249084, | |
| "learning_rate": 4.9886349591497816e-05, | |
| "loss": 0.6804, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.076373530207323, | |
| "grad_norm": 0.1279604583978653, | |
| "learning_rate": 4.988620752848719e-05, | |
| "loss": 0.6793, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 4.0814650988878105, | |
| "grad_norm": 0.12138471007347107, | |
| "learning_rate": 4.988606546547656e-05, | |
| "loss": 0.6814, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 4.086556667568298, | |
| "grad_norm": 0.13427557051181793, | |
| "learning_rate": 4.9885923402465935e-05, | |
| "loss": 0.6752, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 4.091648236248787, | |
| "grad_norm": 0.14821045100688934, | |
| "learning_rate": 4.988578133945531e-05, | |
| "loss": 0.6775, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 4.096739804929275, | |
| "grad_norm": 0.13484236598014832, | |
| "learning_rate": 4.988563927644468e-05, | |
| "loss": 0.6846, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 4.101831373609763, | |
| "grad_norm": 0.07954470813274384, | |
| "learning_rate": 4.9885497213434055e-05, | |
| "loss": 0.684, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 4.106922942290251, | |
| "grad_norm": 0.10616060346364975, | |
| "learning_rate": 4.988535515042342e-05, | |
| "loss": 0.6822, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 4.11201451097074, | |
| "grad_norm": 0.10499216616153717, | |
| "learning_rate": 4.9885213087412794e-05, | |
| "loss": 0.6798, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 4.1171060796512275, | |
| "grad_norm": 0.12274570018053055, | |
| "learning_rate": 4.988507102440216e-05, | |
| "loss": 0.6799, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 4.122197648331715, | |
| "grad_norm": 0.11465749889612198, | |
| "learning_rate": 4.9884928961391534e-05, | |
| "loss": 0.6817, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.127289217012204, | |
| "grad_norm": 0.09962257742881775, | |
| "learning_rate": 4.988478689838091e-05, | |
| "loss": 0.6844, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 4.132380785692692, | |
| "grad_norm": 0.1151047945022583, | |
| "learning_rate": 4.988464483537028e-05, | |
| "loss": 0.6787, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 4.13747235437318, | |
| "grad_norm": 0.1360507756471634, | |
| "learning_rate": 4.988450277235965e-05, | |
| "loss": 0.6791, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 4.142563923053668, | |
| "grad_norm": 0.16751664876937866, | |
| "learning_rate": 4.9884360709349026e-05, | |
| "loss": 0.6738, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 4.147655491734157, | |
| "grad_norm": 0.18576379120349884, | |
| "learning_rate": 4.98842186463384e-05, | |
| "loss": 0.678, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 4.152747060414645, | |
| "grad_norm": 0.12279310077428818, | |
| "learning_rate": 4.988407658332777e-05, | |
| "loss": 0.6786, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 4.157838629095132, | |
| "grad_norm": 0.14428728818893433, | |
| "learning_rate": 4.9883934520317145e-05, | |
| "loss": 0.6756, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 4.162930197775621, | |
| "grad_norm": 0.1211373507976532, | |
| "learning_rate": 4.988379245730652e-05, | |
| "loss": 0.6775, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 4.168021766456109, | |
| "grad_norm": 0.13393299281597137, | |
| "learning_rate": 4.988365039429589e-05, | |
| "loss": 0.6769, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 4.173113335136597, | |
| "grad_norm": 0.12077504396438599, | |
| "learning_rate": 4.9883508331285265e-05, | |
| "loss": 0.6829, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.178204903817085, | |
| "grad_norm": 0.10940321534872055, | |
| "learning_rate": 4.988336626827463e-05, | |
| "loss": 0.6809, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 4.183296472497574, | |
| "grad_norm": 0.09884709119796753, | |
| "learning_rate": 4.9883224205264004e-05, | |
| "loss": 0.6813, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 4.188388041178062, | |
| "grad_norm": 0.10086120665073395, | |
| "learning_rate": 4.988308214225338e-05, | |
| "loss": 0.6809, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 4.19347960985855, | |
| "grad_norm": 0.11668648570775986, | |
| "learning_rate": 4.988294007924275e-05, | |
| "loss": 0.6798, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 4.198571178539038, | |
| "grad_norm": 0.12528111040592194, | |
| "learning_rate": 4.9882798016232123e-05, | |
| "loss": 0.6765, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 4.203662747219526, | |
| "grad_norm": 0.11714299023151398, | |
| "learning_rate": 4.9882655953221497e-05, | |
| "loss": 0.6744, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 4.2087543159000145, | |
| "grad_norm": 0.11050295829772949, | |
| "learning_rate": 4.988251389021087e-05, | |
| "loss": 0.6874, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 4.213845884580502, | |
| "grad_norm": 0.09499291330575943, | |
| "learning_rate": 4.9882371827200236e-05, | |
| "loss": 0.6817, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 4.218937453260991, | |
| "grad_norm": 0.09335146099328995, | |
| "learning_rate": 4.988222976418961e-05, | |
| "loss": 0.6818, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 4.224029021941479, | |
| "grad_norm": 0.1219559907913208, | |
| "learning_rate": 4.988208770117898e-05, | |
| "loss": 0.681, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.229120590621967, | |
| "grad_norm": 0.14629492163658142, | |
| "learning_rate": 4.9881945638168355e-05, | |
| "loss": 0.6822, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 4.234212159302455, | |
| "grad_norm": 0.13365550339221954, | |
| "learning_rate": 4.988180357515773e-05, | |
| "loss": 0.6805, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 4.239303727982943, | |
| "grad_norm": 0.141509547829628, | |
| "learning_rate": 4.98816615121471e-05, | |
| "loss": 0.6783, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 4.2443952966634315, | |
| "grad_norm": 0.13036063313484192, | |
| "learning_rate": 4.9881519449136475e-05, | |
| "loss": 0.6756, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 4.249486865343919, | |
| "grad_norm": 0.11939451843500137, | |
| "learning_rate": 4.988137738612584e-05, | |
| "loss": 0.6826, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 4.254578434024408, | |
| "grad_norm": 0.12008455395698547, | |
| "learning_rate": 4.9881235323115214e-05, | |
| "loss": 0.681, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 4.259670002704896, | |
| "grad_norm": 0.11019112914800644, | |
| "learning_rate": 4.988109326010459e-05, | |
| "loss": 0.6856, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 4.264761571385384, | |
| "grad_norm": 0.10078281164169312, | |
| "learning_rate": 4.988095119709396e-05, | |
| "loss": 0.6792, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 4.269853140065872, | |
| "grad_norm": 0.1294504553079605, | |
| "learning_rate": 4.988080913408333e-05, | |
| "loss": 0.6825, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 4.274944708746361, | |
| "grad_norm": 0.1074661836028099, | |
| "learning_rate": 4.9880667071072706e-05, | |
| "loss": 0.6799, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.280036277426849, | |
| "grad_norm": 0.11285123229026794, | |
| "learning_rate": 4.988052500806208e-05, | |
| "loss": 0.6831, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 4.285127846107336, | |
| "grad_norm": 0.12429996579885483, | |
| "learning_rate": 4.988038294505145e-05, | |
| "loss": 0.6793, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 4.290219414787825, | |
| "grad_norm": 0.10803820192813873, | |
| "learning_rate": 4.9880240882040826e-05, | |
| "loss": 0.6787, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 4.295310983468313, | |
| "grad_norm": 0.12693211436271667, | |
| "learning_rate": 4.98800988190302e-05, | |
| "loss": 0.6729, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 4.300402552148801, | |
| "grad_norm": 0.11828629672527313, | |
| "learning_rate": 4.987995675601957e-05, | |
| "loss": 0.6829, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 4.305494120829289, | |
| "grad_norm": 0.11893879622220993, | |
| "learning_rate": 4.987981469300894e-05, | |
| "loss": 0.681, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 4.310585689509777, | |
| "grad_norm": 0.12228237092494965, | |
| "learning_rate": 4.987967262999831e-05, | |
| "loss": 0.6793, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 4.315677258190266, | |
| "grad_norm": 0.11881165206432343, | |
| "learning_rate": 4.9879530566987684e-05, | |
| "loss": 0.681, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 4.320768826870753, | |
| "grad_norm": 0.09753947705030441, | |
| "learning_rate": 4.987938850397705e-05, | |
| "loss": 0.6812, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 4.325860395551242, | |
| "grad_norm": 0.10875561088323593, | |
| "learning_rate": 4.9879246440966424e-05, | |
| "loss": 0.6764, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.33095196423173, | |
| "grad_norm": 0.1029878631234169, | |
| "learning_rate": 4.98791043779558e-05, | |
| "loss": 0.6793, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 4.3360435329122184, | |
| "grad_norm": 0.11321298032999039, | |
| "learning_rate": 4.987896231494517e-05, | |
| "loss": 0.6805, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 4.341135101592706, | |
| "grad_norm": 0.12302636355161667, | |
| "learning_rate": 4.987882025193454e-05, | |
| "loss": 0.6841, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 4.346226670273195, | |
| "grad_norm": 0.0927717313170433, | |
| "learning_rate": 4.9878678188923916e-05, | |
| "loss": 0.6848, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 4.351318238953683, | |
| "grad_norm": 0.1418168693780899, | |
| "learning_rate": 4.987853612591329e-05, | |
| "loss": 0.6764, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 4.35640980763417, | |
| "grad_norm": 0.12036493420600891, | |
| "learning_rate": 4.987839406290266e-05, | |
| "loss": 0.6783, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 4.361501376314659, | |
| "grad_norm": 0.14609991014003754, | |
| "learning_rate": 4.9878251999892036e-05, | |
| "loss": 0.676, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 4.366592944995147, | |
| "grad_norm": 0.1448822170495987, | |
| "learning_rate": 4.987810993688141e-05, | |
| "loss": 0.6803, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 4.3716845136756355, | |
| "grad_norm": 0.14650079607963562, | |
| "learning_rate": 4.987796787387078e-05, | |
| "loss": 0.6795, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 4.376776082356123, | |
| "grad_norm": 0.10146970301866531, | |
| "learning_rate": 4.987782581086015e-05, | |
| "loss": 0.6826, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 4.381867651036612, | |
| "grad_norm": 0.10098574310541153, | |
| "learning_rate": 4.987768374784952e-05, | |
| "loss": 0.6814, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 4.3869592197171, | |
| "grad_norm": 0.12981392443180084, | |
| "learning_rate": 4.9877541684838894e-05, | |
| "loss": 0.6774, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 4.392050788397588, | |
| "grad_norm": 0.1231103241443634, | |
| "learning_rate": 4.987739962182827e-05, | |
| "loss": 0.6751, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 4.397142357078076, | |
| "grad_norm": 0.17549310624599457, | |
| "learning_rate": 4.987725755881764e-05, | |
| "loss": 0.6773, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 4.402233925758564, | |
| "grad_norm": 0.1261102259159088, | |
| "learning_rate": 4.9877115495807014e-05, | |
| "loss": 0.6778, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 4.4073254944390525, | |
| "grad_norm": 0.12228421121835709, | |
| "learning_rate": 4.987697343279639e-05, | |
| "loss": 0.6815, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 4.41241706311954, | |
| "grad_norm": 0.08992882072925568, | |
| "learning_rate": 4.987683136978576e-05, | |
| "loss": 0.6829, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 4.417508631800029, | |
| "grad_norm": 0.10478372871875763, | |
| "learning_rate": 4.987668930677513e-05, | |
| "loss": 0.6758, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 4.422600200480517, | |
| "grad_norm": 0.1255083978176117, | |
| "learning_rate": 4.9876547243764506e-05, | |
| "loss": 0.6741, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 4.4276917691610045, | |
| "grad_norm": 0.13139568269252777, | |
| "learning_rate": 4.987640518075387e-05, | |
| "loss": 0.6803, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 4.432783337841493, | |
| "grad_norm": 0.1472860723733902, | |
| "learning_rate": 4.9876263117743246e-05, | |
| "loss": 0.6759, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 4.437874906521981, | |
| "grad_norm": 0.16318807005882263, | |
| "learning_rate": 4.987612105473262e-05, | |
| "loss": 0.6868, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 4.44296647520247, | |
| "grad_norm": 0.1145109310746193, | |
| "learning_rate": 4.987597899172199e-05, | |
| "loss": 0.6788, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 4.448058043882957, | |
| "grad_norm": 0.09544923901557922, | |
| "learning_rate": 4.987583692871136e-05, | |
| "loss": 0.682, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 4.453149612563446, | |
| "grad_norm": 0.10780615359544754, | |
| "learning_rate": 4.987569486570073e-05, | |
| "loss": 0.6781, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 4.458241181243934, | |
| "grad_norm": 0.14260242879390717, | |
| "learning_rate": 4.9875552802690104e-05, | |
| "loss": 0.6782, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 4.463332749924422, | |
| "grad_norm": 0.13693778216838837, | |
| "learning_rate": 4.987541073967948e-05, | |
| "loss": 0.6771, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 4.46842431860491, | |
| "grad_norm": 0.10794325917959213, | |
| "learning_rate": 4.987526867666885e-05, | |
| "loss": 0.6789, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 4.473515887285398, | |
| "grad_norm": 0.11324315518140793, | |
| "learning_rate": 4.9875126613658224e-05, | |
| "loss": 0.684, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 4.478607455965887, | |
| "grad_norm": 0.10087355971336365, | |
| "learning_rate": 4.98749845506476e-05, | |
| "loss": 0.6819, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 4.483699024646374, | |
| "grad_norm": 0.09752973914146423, | |
| "learning_rate": 4.987484248763697e-05, | |
| "loss": 0.6819, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 4.488790593326863, | |
| "grad_norm": 0.12462896853685379, | |
| "learning_rate": 4.987470042462634e-05, | |
| "loss": 0.6844, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 4.493882162007351, | |
| "grad_norm": 0.12875770032405853, | |
| "learning_rate": 4.9874558361615716e-05, | |
| "loss": 0.6784, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 4.4989737306878395, | |
| "grad_norm": 0.11722705513238907, | |
| "learning_rate": 4.987441629860509e-05, | |
| "loss": 0.6797, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 4.504065299368327, | |
| "grad_norm": 0.16931360960006714, | |
| "learning_rate": 4.987427423559446e-05, | |
| "loss": 0.6766, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 4.509156868048816, | |
| "grad_norm": 0.13619418442249298, | |
| "learning_rate": 4.987413217258383e-05, | |
| "loss": 0.6774, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 4.514248436729304, | |
| "grad_norm": 0.19465768337249756, | |
| "learning_rate": 4.98739901095732e-05, | |
| "loss": 0.6832, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 4.519340005409791, | |
| "grad_norm": 0.11889132857322693, | |
| "learning_rate": 4.9873848046562575e-05, | |
| "loss": 0.6848, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 4.52443157409028, | |
| "grad_norm": 0.10783824324607849, | |
| "learning_rate": 4.987370598355195e-05, | |
| "loss": 0.6793, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 4.529523142770768, | |
| "grad_norm": 0.11385292559862137, | |
| "learning_rate": 4.987356392054132e-05, | |
| "loss": 0.6754, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 4.5346147114512565, | |
| "grad_norm": 0.13017722964286804, | |
| "learning_rate": 4.987342185753069e-05, | |
| "loss": 0.6778, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 4.539706280131744, | |
| "grad_norm": 0.13603904843330383, | |
| "learning_rate": 4.987327979452006e-05, | |
| "loss": 0.6758, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 4.544797848812232, | |
| "grad_norm": 0.15172545611858368, | |
| "learning_rate": 4.9873137731509434e-05, | |
| "loss": 0.677, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 4.549889417492721, | |
| "grad_norm": 0.13269858062267303, | |
| "learning_rate": 4.987299566849881e-05, | |
| "loss": 0.6823, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 4.5549809861732085, | |
| "grad_norm": 0.14247867465019226, | |
| "learning_rate": 4.987285360548818e-05, | |
| "loss": 0.6803, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 4.560072554853697, | |
| "grad_norm": 0.1458357870578766, | |
| "learning_rate": 4.987271154247755e-05, | |
| "loss": 0.6755, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 4.565164123534185, | |
| "grad_norm": 0.1240466758608818, | |
| "learning_rate": 4.9872569479466926e-05, | |
| "loss": 0.6793, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 4.5702556922146735, | |
| "grad_norm": 0.14014077186584473, | |
| "learning_rate": 4.98724274164563e-05, | |
| "loss": 0.6812, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 4.575347260895161, | |
| "grad_norm": 0.1574947088956833, | |
| "learning_rate": 4.987228535344567e-05, | |
| "loss": 0.6752, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 4.58043882957565, | |
| "grad_norm": 0.12997229397296906, | |
| "learning_rate": 4.987214329043504e-05, | |
| "loss": 0.6853, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.585530398256138, | |
| "grad_norm": 0.11148348450660706, | |
| "learning_rate": 4.987200122742441e-05, | |
| "loss": 0.6782, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 4.5906219669366255, | |
| "grad_norm": 0.13387084007263184, | |
| "learning_rate": 4.9871859164413785e-05, | |
| "loss": 0.6798, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 4.595713535617114, | |
| "grad_norm": 0.16059359908103943, | |
| "learning_rate": 4.987171710140316e-05, | |
| "loss": 0.6815, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 4.600805104297602, | |
| "grad_norm": 0.15377014875411987, | |
| "learning_rate": 4.987157503839253e-05, | |
| "loss": 0.6813, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 4.605896672978091, | |
| "grad_norm": 0.13581454753875732, | |
| "learning_rate": 4.9871432975381904e-05, | |
| "loss": 0.6776, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 4.610988241658578, | |
| "grad_norm": 0.11781629174947739, | |
| "learning_rate": 4.987129091237128e-05, | |
| "loss": 0.6778, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 4.616079810339067, | |
| "grad_norm": 0.15693874657154083, | |
| "learning_rate": 4.987114884936065e-05, | |
| "loss": 0.6785, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 4.621171379019555, | |
| "grad_norm": 0.1455591917037964, | |
| "learning_rate": 4.9871006786350023e-05, | |
| "loss": 0.683, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 4.626262947700043, | |
| "grad_norm": 0.10115326195955276, | |
| "learning_rate": 4.9870864723339397e-05, | |
| "loss": 0.6816, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 4.631354516380531, | |
| "grad_norm": 0.10945667326450348, | |
| "learning_rate": 4.987072266032877e-05, | |
| "loss": 0.6853, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 4.636446085061019, | |
| "grad_norm": 0.11783566325902939, | |
| "learning_rate": 4.987058059731814e-05, | |
| "loss": 0.6825, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 4.641537653741508, | |
| "grad_norm": 0.1183709055185318, | |
| "learning_rate": 4.987043853430751e-05, | |
| "loss": 0.6794, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 4.646629222421995, | |
| "grad_norm": 0.17861825227737427, | |
| "learning_rate": 4.987029647129688e-05, | |
| "loss": 0.6812, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 4.651720791102484, | |
| "grad_norm": 0.1105700135231018, | |
| "learning_rate": 4.987015440828625e-05, | |
| "loss": 0.6853, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 4.656812359782972, | |
| "grad_norm": 0.13059043884277344, | |
| "learning_rate": 4.987001234527562e-05, | |
| "loss": 0.6825, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 4.66190392846346, | |
| "grad_norm": 0.10306143015623093, | |
| "learning_rate": 4.9869870282264995e-05, | |
| "loss": 0.6825, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 4.666995497143948, | |
| "grad_norm": 0.1366746723651886, | |
| "learning_rate": 4.986972821925437e-05, | |
| "loss": 0.6769, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 4.672087065824436, | |
| "grad_norm": 0.15557105839252472, | |
| "learning_rate": 4.986958615624374e-05, | |
| "loss": 0.6811, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 4.677178634504925, | |
| "grad_norm": 0.1473141759634018, | |
| "learning_rate": 4.9869444093233114e-05, | |
| "loss": 0.6843, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 4.682270203185412, | |
| "grad_norm": 0.16388468444347382, | |
| "learning_rate": 4.986930203022249e-05, | |
| "loss": 0.6751, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.687361771865901, | |
| "grad_norm": 0.15377168357372284, | |
| "learning_rate": 4.986915996721186e-05, | |
| "loss": 0.68, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 4.692453340546389, | |
| "grad_norm": 0.14194439351558685, | |
| "learning_rate": 4.986901790420123e-05, | |
| "loss": 0.6762, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 4.6975449092268775, | |
| "grad_norm": 0.1327824741601944, | |
| "learning_rate": 4.9868875841190606e-05, | |
| "loss": 0.6837, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 4.702636477907365, | |
| "grad_norm": 0.13738127052783966, | |
| "learning_rate": 4.986873377817998e-05, | |
| "loss": 0.6785, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 4.707728046587853, | |
| "grad_norm": 0.17268739640712738, | |
| "learning_rate": 4.986859171516935e-05, | |
| "loss": 0.6769, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.712819615268342, | |
| "grad_norm": 0.14373987913131714, | |
| "learning_rate": 4.986844965215872e-05, | |
| "loss": 0.6806, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 4.7179111839488295, | |
| "grad_norm": 0.11402563005685806, | |
| "learning_rate": 4.986830758914809e-05, | |
| "loss": 0.681, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 4.723002752629318, | |
| "grad_norm": 0.12297854572534561, | |
| "learning_rate": 4.9868165526137465e-05, | |
| "loss": 0.6814, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 4.728094321309806, | |
| "grad_norm": 0.10925690084695816, | |
| "learning_rate": 4.986802346312684e-05, | |
| "loss": 0.6835, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 4.7331858899902945, | |
| "grad_norm": 0.1584441214799881, | |
| "learning_rate": 4.986788140011621e-05, | |
| "loss": 0.6795, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.738277458670782, | |
| "grad_norm": 0.1546424776315689, | |
| "learning_rate": 4.9867739337105585e-05, | |
| "loss": 0.6804, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 4.743369027351271, | |
| "grad_norm": 0.10821778327226639, | |
| "learning_rate": 4.986759727409496e-05, | |
| "loss": 0.6837, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 4.748460596031759, | |
| "grad_norm": 0.13283872604370117, | |
| "learning_rate": 4.9867455211084324e-05, | |
| "loss": 0.6806, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 4.7535521647122465, | |
| "grad_norm": 0.14704841375350952, | |
| "learning_rate": 4.98673131480737e-05, | |
| "loss": 0.68, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 4.758643733392735, | |
| "grad_norm": 0.13948886096477509, | |
| "learning_rate": 4.986717108506307e-05, | |
| "loss": 0.6737, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 4.763735302073223, | |
| "grad_norm": 0.1441805213689804, | |
| "learning_rate": 4.986702902205244e-05, | |
| "loss": 0.6791, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 4.768826870753712, | |
| "grad_norm": 0.15041285753250122, | |
| "learning_rate": 4.9866886959041816e-05, | |
| "loss": 0.6772, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 4.773918439434199, | |
| "grad_norm": 0.1656763106584549, | |
| "learning_rate": 4.986674489603119e-05, | |
| "loss": 0.6844, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 4.779010008114687, | |
| "grad_norm": 0.1404283046722412, | |
| "learning_rate": 4.986660283302056e-05, | |
| "loss": 0.6837, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 4.784101576795176, | |
| "grad_norm": 0.1178780272603035, | |
| "learning_rate": 4.986646077000993e-05, | |
| "loss": 0.6822, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.7891931454756635, | |
| "grad_norm": 0.11357172578573227, | |
| "learning_rate": 4.98663187069993e-05, | |
| "loss": 0.6811, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 4.794284714156152, | |
| "grad_norm": 0.12318674474954605, | |
| "learning_rate": 4.9866176643988675e-05, | |
| "loss": 0.6798, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 4.79937628283664, | |
| "grad_norm": 0.09487531334161758, | |
| "learning_rate": 4.986603458097805e-05, | |
| "loss": 0.6825, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 4.804467851517129, | |
| "grad_norm": 0.09417689591646194, | |
| "learning_rate": 4.986589251796742e-05, | |
| "loss": 0.6828, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 4.809559420197616, | |
| "grad_norm": 0.10734029114246368, | |
| "learning_rate": 4.9865750454956794e-05, | |
| "loss": 0.6821, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 4.814650988878105, | |
| "grad_norm": 0.10005868971347809, | |
| "learning_rate": 4.986560839194617e-05, | |
| "loss": 0.687, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 4.819742557558593, | |
| "grad_norm": 0.11884880065917969, | |
| "learning_rate": 4.986546632893554e-05, | |
| "loss": 0.679, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 4.824834126239081, | |
| "grad_norm": 0.10700765252113342, | |
| "learning_rate": 4.9865324265924914e-05, | |
| "loss": 0.679, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 4.829925694919569, | |
| "grad_norm": 0.1253756880760193, | |
| "learning_rate": 4.986518220291429e-05, | |
| "loss": 0.6824, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 4.835017263600057, | |
| "grad_norm": 0.13005779683589935, | |
| "learning_rate": 4.986504013990366e-05, | |
| "loss": 0.6773, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.840108832280546, | |
| "grad_norm": 0.1245838925242424, | |
| "learning_rate": 4.9864898076893026e-05, | |
| "loss": 0.6778, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 4.845200400961033, | |
| "grad_norm": 0.13099046051502228, | |
| "learning_rate": 4.98647560138824e-05, | |
| "loss": 0.6819, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 4.850291969641522, | |
| "grad_norm": 0.10995706915855408, | |
| "learning_rate": 4.986461395087177e-05, | |
| "loss": 0.6806, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 4.85538353832201, | |
| "grad_norm": 0.10981863737106323, | |
| "learning_rate": 4.986447188786114e-05, | |
| "loss": 0.6768, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 4.8604751070024985, | |
| "grad_norm": 0.10785161703824997, | |
| "learning_rate": 4.986432982485051e-05, | |
| "loss": 0.6815, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 4.865566675682986, | |
| "grad_norm": 0.11493176966905594, | |
| "learning_rate": 4.9864187761839885e-05, | |
| "loss": 0.6803, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 4.870658244363474, | |
| "grad_norm": 0.13624422252178192, | |
| "learning_rate": 4.986404569882926e-05, | |
| "loss": 0.679, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 4.875749813043963, | |
| "grad_norm": 0.12251431494951248, | |
| "learning_rate": 4.986390363581863e-05, | |
| "loss": 0.68, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 4.8808413817244505, | |
| "grad_norm": 0.15482662618160248, | |
| "learning_rate": 4.9863761572808004e-05, | |
| "loss": 0.6827, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 4.885932950404939, | |
| "grad_norm": 0.08389197289943695, | |
| "learning_rate": 4.986361950979738e-05, | |
| "loss": 0.6831, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.891024519085427, | |
| "grad_norm": 0.1233370378613472, | |
| "learning_rate": 4.986347744678675e-05, | |
| "loss": 0.6811, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 4.896116087765915, | |
| "grad_norm": 0.11783581227064133, | |
| "learning_rate": 4.9863335383776124e-05, | |
| "loss": 0.6854, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 4.901207656446403, | |
| "grad_norm": 0.10777773708105087, | |
| "learning_rate": 4.98631933207655e-05, | |
| "loss": 0.6787, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 4.906299225126891, | |
| "grad_norm": 0.14652119576931, | |
| "learning_rate": 4.986305125775487e-05, | |
| "loss": 0.6797, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 4.91139079380738, | |
| "grad_norm": 0.11962393671274185, | |
| "learning_rate": 4.9862909194744236e-05, | |
| "loss": 0.6832, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 4.9164823624878675, | |
| "grad_norm": 0.11764557659626007, | |
| "learning_rate": 4.986276713173361e-05, | |
| "loss": 0.677, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 4.921573931168356, | |
| "grad_norm": 0.13469521701335907, | |
| "learning_rate": 4.986262506872298e-05, | |
| "loss": 0.6759, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 4.926665499848844, | |
| "grad_norm": 0.11636529117822647, | |
| "learning_rate": 4.9862483005712356e-05, | |
| "loss": 0.6789, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 4.931757068529333, | |
| "grad_norm": 0.15902294218540192, | |
| "learning_rate": 4.986234094270173e-05, | |
| "loss": 0.6758, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 4.93684863720982, | |
| "grad_norm": 0.13991579413414001, | |
| "learning_rate": 4.98621988796911e-05, | |
| "loss": 0.6839, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.941940205890308, | |
| "grad_norm": 0.12394755333662033, | |
| "learning_rate": 4.9862056816680475e-05, | |
| "loss": 0.6823, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 4.947031774570797, | |
| "grad_norm": 0.11160258948802948, | |
| "learning_rate": 4.986191475366985e-05, | |
| "loss": 0.6772, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 4.952123343251285, | |
| "grad_norm": 0.11390865594148636, | |
| "learning_rate": 4.986177269065922e-05, | |
| "loss": 0.6777, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 4.957214911931773, | |
| "grad_norm": 0.14337550103664398, | |
| "learning_rate": 4.9861630627648594e-05, | |
| "loss": 0.676, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 4.962306480612261, | |
| "grad_norm": 0.1478574424982071, | |
| "learning_rate": 4.986148856463796e-05, | |
| "loss": 0.6804, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 4.96739804929275, | |
| "grad_norm": 0.09173934161663055, | |
| "learning_rate": 4.9861346501627334e-05, | |
| "loss": 0.6834, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 4.972489617973237, | |
| "grad_norm": 0.10893456637859344, | |
| "learning_rate": 4.986120443861671e-05, | |
| "loss": 0.6796, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 4.977581186653726, | |
| "grad_norm": 0.10967724025249481, | |
| "learning_rate": 4.986106237560608e-05, | |
| "loss": 0.6804, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 4.982672755334214, | |
| "grad_norm": 0.11746654659509659, | |
| "learning_rate": 4.9860920312595446e-05, | |
| "loss": 0.6807, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 4.987764324014702, | |
| "grad_norm": 0.10084499418735504, | |
| "learning_rate": 4.986077824958482e-05, | |
| "loss": 0.6779, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.99285589269519, | |
| "grad_norm": 0.16148197650909424, | |
| "learning_rate": 4.986063618657419e-05, | |
| "loss": 0.6766, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 4.997947461375678, | |
| "grad_norm": 0.12952958047389984, | |
| "learning_rate": 4.9860494123563565e-05, | |
| "loss": 0.676, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 5.002545784340244, | |
| "grad_norm": 0.16547605395317078, | |
| "learning_rate": 4.986035206055294e-05, | |
| "loss": 0.6137, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 5.0076373530207325, | |
| "grad_norm": 0.1671449840068817, | |
| "learning_rate": 4.986020999754231e-05, | |
| "loss": 0.6774, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 5.01272892170122, | |
| "grad_norm": 0.13992175459861755, | |
| "learning_rate": 4.9860067934531685e-05, | |
| "loss": 0.6819, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 5.017820490381709, | |
| "grad_norm": 0.08816186338663101, | |
| "learning_rate": 4.985992587152106e-05, | |
| "loss": 0.6819, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 5.022912059062197, | |
| "grad_norm": 0.08476711064577103, | |
| "learning_rate": 4.985978380851043e-05, | |
| "loss": 0.6817, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 5.0280036277426845, | |
| "grad_norm": 0.09989239275455475, | |
| "learning_rate": 4.9859641745499804e-05, | |
| "loss": 0.683, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 5.033095196423173, | |
| "grad_norm": 0.09048530459403992, | |
| "learning_rate": 4.985949968248918e-05, | |
| "loss": 0.681, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 5.038186765103661, | |
| "grad_norm": 0.11307314783334732, | |
| "learning_rate": 4.985935761947855e-05, | |
| "loss": 0.6785, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 5.04327833378415, | |
| "grad_norm": 0.12317655235528946, | |
| "learning_rate": 4.985921555646792e-05, | |
| "loss": 0.681, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 5.048369902464637, | |
| "grad_norm": 0.11963162571191788, | |
| "learning_rate": 4.985907349345729e-05, | |
| "loss": 0.6774, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 5.053461471145126, | |
| "grad_norm": 0.11438319087028503, | |
| "learning_rate": 4.985893143044666e-05, | |
| "loss": 0.68, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 5.058553039825614, | |
| "grad_norm": 0.13765713572502136, | |
| "learning_rate": 4.9858789367436036e-05, | |
| "loss": 0.6766, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 5.0636446085061015, | |
| "grad_norm": 0.12760768830776215, | |
| "learning_rate": 4.985864730442541e-05, | |
| "loss": 0.6763, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 5.06873617718659, | |
| "grad_norm": 0.14188893139362335, | |
| "learning_rate": 4.9858505241414775e-05, | |
| "loss": 0.6815, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 5.073827745867078, | |
| "grad_norm": 0.177343487739563, | |
| "learning_rate": 4.985836317840415e-05, | |
| "loss": 0.6765, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 5.078919314547567, | |
| "grad_norm": 0.15826770663261414, | |
| "learning_rate": 4.985822111539352e-05, | |
| "loss": 0.6819, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 5.084010883228054, | |
| "grad_norm": 0.1431620568037033, | |
| "learning_rate": 4.9858079052382895e-05, | |
| "loss": 0.6809, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 5.089102451908543, | |
| "grad_norm": 0.13952907919883728, | |
| "learning_rate": 4.985793698937227e-05, | |
| "loss": 0.6803, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.094194020589031, | |
| "grad_norm": 0.11862120032310486, | |
| "learning_rate": 4.985779492636164e-05, | |
| "loss": 0.6774, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 5.099285589269519, | |
| "grad_norm": 0.15467384457588196, | |
| "learning_rate": 4.9857652863351014e-05, | |
| "loss": 0.6777, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 5.104377157950007, | |
| "grad_norm": 0.12163079530000687, | |
| "learning_rate": 4.985751080034039e-05, | |
| "loss": 0.6801, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 5.109468726630495, | |
| "grad_norm": 0.1349727064371109, | |
| "learning_rate": 4.985736873732976e-05, | |
| "loss": 0.679, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 5.114560295310984, | |
| "grad_norm": 0.12950022518634796, | |
| "learning_rate": 4.9857226674319127e-05, | |
| "loss": 0.6799, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 5.119651863991471, | |
| "grad_norm": 0.12536922097206116, | |
| "learning_rate": 4.98570846113085e-05, | |
| "loss": 0.6805, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 5.12474343267196, | |
| "grad_norm": 0.08876863867044449, | |
| "learning_rate": 4.985694254829787e-05, | |
| "loss": 0.6838, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 5.129835001352448, | |
| "grad_norm": 0.13812567293643951, | |
| "learning_rate": 4.9856800485287246e-05, | |
| "loss": 0.6795, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 5.1349265700329365, | |
| "grad_norm": 0.11330072581768036, | |
| "learning_rate": 4.985665842227662e-05, | |
| "loss": 0.6775, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 5.140018138713424, | |
| "grad_norm": 0.12768009305000305, | |
| "learning_rate": 4.985651635926599e-05, | |
| "loss": 0.6758, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 5.145109707393912, | |
| "grad_norm": 0.15295925736427307, | |
| "learning_rate": 4.9856374296255365e-05, | |
| "loss": 0.6885, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 5.150201276074401, | |
| "grad_norm": 0.08242222666740417, | |
| "learning_rate": 4.985623223324474e-05, | |
| "loss": 0.6826, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 5.1552928447548885, | |
| "grad_norm": 0.0866493284702301, | |
| "learning_rate": 4.985609017023411e-05, | |
| "loss": 0.6823, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 5.160384413435377, | |
| "grad_norm": 0.1157221645116806, | |
| "learning_rate": 4.9855948107223485e-05, | |
| "loss": 0.6764, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 5.165475982115865, | |
| "grad_norm": 0.1414877027273178, | |
| "learning_rate": 4.985580604421286e-05, | |
| "loss": 0.6749, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 5.1705675507963536, | |
| "grad_norm": 0.13449379801750183, | |
| "learning_rate": 4.985566398120223e-05, | |
| "loss": 0.6806, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 5.175659119476841, | |
| "grad_norm": 0.13108868896961212, | |
| "learning_rate": 4.98555219181916e-05, | |
| "loss": 0.6806, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 5.180750688157329, | |
| "grad_norm": 0.12748171389102936, | |
| "learning_rate": 4.985537985518097e-05, | |
| "loss": 0.6763, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 5.185842256837818, | |
| "grad_norm": 0.10387007147073746, | |
| "learning_rate": 4.9855237792170336e-05, | |
| "loss": 0.6872, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 5.1909338255183055, | |
| "grad_norm": 0.09480390697717667, | |
| "learning_rate": 4.985509572915971e-05, | |
| "loss": 0.6822, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 5.196025394198794, | |
| "grad_norm": 0.11437319219112396, | |
| "learning_rate": 4.985495366614908e-05, | |
| "loss": 0.6792, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 5.201116962879282, | |
| "grad_norm": 0.12557561695575714, | |
| "learning_rate": 4.9854811603138456e-05, | |
| "loss": 0.682, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 5.206208531559771, | |
| "grad_norm": 0.1291828453540802, | |
| "learning_rate": 4.985466954012783e-05, | |
| "loss": 0.6848, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 5.211300100240258, | |
| "grad_norm": 0.12377645820379257, | |
| "learning_rate": 4.98545274771172e-05, | |
| "loss": 0.6789, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 5.216391668920746, | |
| "grad_norm": 0.12247670441865921, | |
| "learning_rate": 4.9854385414106575e-05, | |
| "loss": 0.681, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 5.221483237601235, | |
| "grad_norm": 0.10693535208702087, | |
| "learning_rate": 4.985424335109595e-05, | |
| "loss": 0.687, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 5.2265748062817226, | |
| "grad_norm": 0.11651374399662018, | |
| "learning_rate": 4.985410128808532e-05, | |
| "loss": 0.6775, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 5.231666374962211, | |
| "grad_norm": 0.1369701623916626, | |
| "learning_rate": 4.9853959225074694e-05, | |
| "loss": 0.6767, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 5.236757943642699, | |
| "grad_norm": 0.13671474158763885, | |
| "learning_rate": 4.985381716206407e-05, | |
| "loss": 0.6821, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 5.241849512323188, | |
| "grad_norm": 0.11949580907821655, | |
| "learning_rate": 4.985367509905344e-05, | |
| "loss": 0.6807, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 5.246941081003675, | |
| "grad_norm": 0.11703040450811386, | |
| "learning_rate": 4.985353303604281e-05, | |
| "loss": 0.678, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 5.252032649684164, | |
| "grad_norm": 0.11209936439990997, | |
| "learning_rate": 4.985339097303218e-05, | |
| "loss": 0.6773, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 5.257124218364652, | |
| "grad_norm": 0.13346509635448456, | |
| "learning_rate": 4.985324891002155e-05, | |
| "loss": 0.6857, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 5.26221578704514, | |
| "grad_norm": 0.12218772619962692, | |
| "learning_rate": 4.9853106847010926e-05, | |
| "loss": 0.681, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 5.267307355725628, | |
| "grad_norm": 0.1169796735048294, | |
| "learning_rate": 4.98529647840003e-05, | |
| "loss": 0.6767, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 5.272398924406116, | |
| "grad_norm": 0.14005398750305176, | |
| "learning_rate": 4.985282272098967e-05, | |
| "loss": 0.674, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 5.277490493086605, | |
| "grad_norm": 0.1299133449792862, | |
| "learning_rate": 4.9852680657979046e-05, | |
| "loss": 0.6779, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 5.282582061767092, | |
| "grad_norm": 0.13446015119552612, | |
| "learning_rate": 4.985253859496841e-05, | |
| "loss": 0.6781, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 5.287673630447581, | |
| "grad_norm": 0.14030112326145172, | |
| "learning_rate": 4.9852396531957785e-05, | |
| "loss": 0.6782, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 5.292765199128069, | |
| "grad_norm": 0.12442600727081299, | |
| "learning_rate": 4.985225446894716e-05, | |
| "loss": 0.6841, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 5.297856767808557, | |
| "grad_norm": 0.11391379684209824, | |
| "learning_rate": 4.985211240593653e-05, | |
| "loss": 0.6834, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 5.302948336489045, | |
| "grad_norm": 0.11152996867895126, | |
| "learning_rate": 4.9851970342925904e-05, | |
| "loss": 0.6816, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 5.308039905169533, | |
| "grad_norm": 0.13936050236225128, | |
| "learning_rate": 4.985182827991528e-05, | |
| "loss": 0.6831, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 5.313131473850022, | |
| "grad_norm": 0.11654047667980194, | |
| "learning_rate": 4.985168621690465e-05, | |
| "loss": 0.6803, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 5.3182230425305095, | |
| "grad_norm": 0.11251688003540039, | |
| "learning_rate": 4.985154415389402e-05, | |
| "loss": 0.6815, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 5.323314611210998, | |
| "grad_norm": 0.09920088946819305, | |
| "learning_rate": 4.985140209088339e-05, | |
| "loss": 0.6789, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 5.328406179891486, | |
| "grad_norm": 0.18474489450454712, | |
| "learning_rate": 4.985126002787276e-05, | |
| "loss": 0.6777, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 5.333497748571974, | |
| "grad_norm": 0.12075336277484894, | |
| "learning_rate": 4.9851117964862136e-05, | |
| "loss": 0.6828, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 5.338589317252462, | |
| "grad_norm": 0.1428055316209793, | |
| "learning_rate": 4.985097590185151e-05, | |
| "loss": 0.6765, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 5.34368088593295, | |
| "grad_norm": 0.1289169192314148, | |
| "learning_rate": 4.985083383884088e-05, | |
| "loss": 0.6825, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.348772454613439, | |
| "grad_norm": 0.10693208128213882, | |
| "learning_rate": 4.9850691775830256e-05, | |
| "loss": 0.6814, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 5.3538640232939265, | |
| "grad_norm": 0.11116955429315567, | |
| "learning_rate": 4.985054971281963e-05, | |
| "loss": 0.6805, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 5.358955591974415, | |
| "grad_norm": 0.11630560457706451, | |
| "learning_rate": 4.9850407649809e-05, | |
| "loss": 0.6779, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 5.364047160654903, | |
| "grad_norm": 0.13117016851902008, | |
| "learning_rate": 4.9850265586798375e-05, | |
| "loss": 0.6749, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 5.369138729335392, | |
| "grad_norm": 0.14777855575084686, | |
| "learning_rate": 4.985012352378775e-05, | |
| "loss": 0.6788, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 5.374230298015879, | |
| "grad_norm": 0.1084110215306282, | |
| "learning_rate": 4.9849981460777114e-05, | |
| "loss": 0.6843, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 5.379321866696367, | |
| "grad_norm": 0.10926970094442368, | |
| "learning_rate": 4.984983939776649e-05, | |
| "loss": 0.6807, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 5.384413435376856, | |
| "grad_norm": 0.10273724794387817, | |
| "learning_rate": 4.984969733475586e-05, | |
| "loss": 0.6819, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 5.389505004057344, | |
| "grad_norm": 0.12061687558889389, | |
| "learning_rate": 4.984955527174523e-05, | |
| "loss": 0.6791, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 5.394596572737832, | |
| "grad_norm": 0.11515804380178452, | |
| "learning_rate": 4.98494132087346e-05, | |
| "loss": 0.6798, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 5.39968814141832, | |
| "grad_norm": 0.11288391053676605, | |
| "learning_rate": 4.984927114572397e-05, | |
| "loss": 0.681, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 5.404779710098809, | |
| "grad_norm": 0.12682178616523743, | |
| "learning_rate": 4.9849129082713346e-05, | |
| "loss": 0.6778, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 5.409871278779296, | |
| "grad_norm": 0.12649093568325043, | |
| "learning_rate": 4.984898701970272e-05, | |
| "loss": 0.6767, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 5.414962847459784, | |
| "grad_norm": 0.1650230884552002, | |
| "learning_rate": 4.984884495669209e-05, | |
| "loss": 0.6772, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 5.420054416140273, | |
| "grad_norm": 0.11968445032835007, | |
| "learning_rate": 4.9848702893681465e-05, | |
| "loss": 0.6791, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 5.425145984820761, | |
| "grad_norm": 0.10566221922636032, | |
| "learning_rate": 4.984856083067084e-05, | |
| "loss": 0.6769, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 5.430237553501249, | |
| "grad_norm": 0.09944125264883041, | |
| "learning_rate": 4.984841876766021e-05, | |
| "loss": 0.6789, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 5.435329122181737, | |
| "grad_norm": 0.12134432047605515, | |
| "learning_rate": 4.9848276704649585e-05, | |
| "loss": 0.6741, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 5.440420690862226, | |
| "grad_norm": 0.1576509177684784, | |
| "learning_rate": 4.984813464163896e-05, | |
| "loss": 0.6818, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 5.4455122595427135, | |
| "grad_norm": 0.13000087440013885, | |
| "learning_rate": 4.9847992578628324e-05, | |
| "loss": 0.6719, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 5.450603828223201, | |
| "grad_norm": 0.12142984569072723, | |
| "learning_rate": 4.98478505156177e-05, | |
| "loss": 0.6825, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 5.45569539690369, | |
| "grad_norm": 0.1100669875741005, | |
| "learning_rate": 4.984770845260707e-05, | |
| "loss": 0.6759, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 5.460786965584178, | |
| "grad_norm": 0.1101478561758995, | |
| "learning_rate": 4.9847566389596444e-05, | |
| "loss": 0.685, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 5.465878534264666, | |
| "grad_norm": 0.1224004253745079, | |
| "learning_rate": 4.984742432658582e-05, | |
| "loss": 0.6763, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 5.470970102945154, | |
| "grad_norm": 0.14111606776714325, | |
| "learning_rate": 4.984728226357519e-05, | |
| "loss": 0.6777, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 5.476061671625643, | |
| "grad_norm": 0.10880038887262344, | |
| "learning_rate": 4.984714020056456e-05, | |
| "loss": 0.6834, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 5.4811532403061305, | |
| "grad_norm": 0.1258549839258194, | |
| "learning_rate": 4.9846998137553936e-05, | |
| "loss": 0.6828, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 5.486244808986619, | |
| "grad_norm": 0.10077346116304398, | |
| "learning_rate": 4.984685607454331e-05, | |
| "loss": 0.6797, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 5.491336377667107, | |
| "grad_norm": 0.14082978665828705, | |
| "learning_rate": 4.984671401153268e-05, | |
| "loss": 0.6773, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 5.496427946347595, | |
| "grad_norm": 0.12051651626825333, | |
| "learning_rate": 4.984657194852205e-05, | |
| "loss": 0.6774, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 5.501519515028083, | |
| "grad_norm": 0.15081602334976196, | |
| "learning_rate": 4.984642988551142e-05, | |
| "loss": 0.6866, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 5.506611083708571, | |
| "grad_norm": 0.09743819385766983, | |
| "learning_rate": 4.9846287822500795e-05, | |
| "loss": 0.6804, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 5.51170265238906, | |
| "grad_norm": 0.09400393813848495, | |
| "learning_rate": 4.984614575949017e-05, | |
| "loss": 0.6815, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 5.5167942210695475, | |
| "grad_norm": 0.13835515081882477, | |
| "learning_rate": 4.9846003696479534e-05, | |
| "loss": 0.6866, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 5.521885789750036, | |
| "grad_norm": 0.11208510398864746, | |
| "learning_rate": 4.984586163346891e-05, | |
| "loss": 0.6805, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 5.526977358430524, | |
| "grad_norm": 0.11167927086353302, | |
| "learning_rate": 4.984571957045828e-05, | |
| "loss": 0.6799, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 5.532068927111012, | |
| "grad_norm": 0.12590061128139496, | |
| "learning_rate": 4.9845577507447653e-05, | |
| "loss": 0.676, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 5.5371604957915, | |
| "grad_norm": 0.15050916373729706, | |
| "learning_rate": 4.9845435444437027e-05, | |
| "loss": 0.6712, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 5.542252064471988, | |
| "grad_norm": 0.14142751693725586, | |
| "learning_rate": 4.98452933814264e-05, | |
| "loss": 0.676, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 5.547343633152477, | |
| "grad_norm": 0.24029377102851868, | |
| "learning_rate": 4.984515131841577e-05, | |
| "loss": 0.683, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 5.552435201832965, | |
| "grad_norm": 0.11458209902048111, | |
| "learning_rate": 4.9845009255405146e-05, | |
| "loss": 0.6795, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 5.557526770513453, | |
| "grad_norm": 0.10509049147367477, | |
| "learning_rate": 4.984486719239452e-05, | |
| "loss": 0.6832, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 5.562618339193941, | |
| "grad_norm": 0.1304958164691925, | |
| "learning_rate": 4.984472512938389e-05, | |
| "loss": 0.6814, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 5.567709907874429, | |
| "grad_norm": 0.11066732555627823, | |
| "learning_rate": 4.9844583066373265e-05, | |
| "loss": 0.6734, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 5.572801476554917, | |
| "grad_norm": 0.14044025540351868, | |
| "learning_rate": 4.984444100336264e-05, | |
| "loss": 0.6851, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 5.577893045235405, | |
| "grad_norm": 0.09776227921247482, | |
| "learning_rate": 4.9844298940352005e-05, | |
| "loss": 0.6797, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 5.582984613915894, | |
| "grad_norm": 0.08972660452127457, | |
| "learning_rate": 4.984415687734138e-05, | |
| "loss": 0.6803, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 5.588076182596382, | |
| "grad_norm": 0.11810458451509476, | |
| "learning_rate": 4.984401481433075e-05, | |
| "loss": 0.6802, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 5.59316775127687, | |
| "grad_norm": 0.11004742234945297, | |
| "learning_rate": 4.9843872751320124e-05, | |
| "loss": 0.6795, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 5.598259319957358, | |
| "grad_norm": 0.10075508058071136, | |
| "learning_rate": 4.98437306883095e-05, | |
| "loss": 0.682, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.603350888637847, | |
| "grad_norm": 0.10835061222314835, | |
| "learning_rate": 4.9843588625298863e-05, | |
| "loss": 0.6829, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 5.6084424573183345, | |
| "grad_norm": 0.1209336370229721, | |
| "learning_rate": 4.9843446562288236e-05, | |
| "loss": 0.6808, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 5.613534025998822, | |
| "grad_norm": 0.12438962608575821, | |
| "learning_rate": 4.984330449927761e-05, | |
| "loss": 0.6768, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 5.618625594679311, | |
| "grad_norm": 0.1364268809556961, | |
| "learning_rate": 4.984316243626698e-05, | |
| "loss": 0.6781, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 5.623717163359799, | |
| "grad_norm": 0.11569849401712418, | |
| "learning_rate": 4.9843020373256356e-05, | |
| "loss": 0.6825, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 5.628808732040287, | |
| "grad_norm": 0.10072596371173859, | |
| "learning_rate": 4.984287831024573e-05, | |
| "loss": 0.6764, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 5.633900300720775, | |
| "grad_norm": 0.15180449187755585, | |
| "learning_rate": 4.98427362472351e-05, | |
| "loss": 0.6782, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 5.638991869401263, | |
| "grad_norm": 0.14204277098178864, | |
| "learning_rate": 4.9842594184224475e-05, | |
| "loss": 0.6806, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 5.6440834380817515, | |
| "grad_norm": 0.12409929186105728, | |
| "learning_rate": 4.984245212121385e-05, | |
| "loss": 0.6806, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 5.649175006762239, | |
| "grad_norm": 0.1692194640636444, | |
| "learning_rate": 4.9842310058203215e-05, | |
| "loss": 0.6723, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 5.654266575442728, | |
| "grad_norm": 0.2566402852535248, | |
| "learning_rate": 4.984216799519259e-05, | |
| "loss": 0.6845, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 5.659358144123216, | |
| "grad_norm": 0.13745322823524475, | |
| "learning_rate": 4.984202593218196e-05, | |
| "loss": 0.6748, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 5.664449712803704, | |
| "grad_norm": 0.16598811745643616, | |
| "learning_rate": 4.9841883869171334e-05, | |
| "loss": 0.6798, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 5.669541281484192, | |
| "grad_norm": 0.13570183515548706, | |
| "learning_rate": 4.984174180616071e-05, | |
| "loss": 0.6797, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 5.674632850164681, | |
| "grad_norm": 0.17549622058868408, | |
| "learning_rate": 4.984159974315008e-05, | |
| "loss": 0.6773, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 5.6797244188451685, | |
| "grad_norm": 0.15479332208633423, | |
| "learning_rate": 4.984145768013945e-05, | |
| "loss": 0.6795, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 5.684815987525656, | |
| "grad_norm": 0.1562296450138092, | |
| "learning_rate": 4.9841315617128826e-05, | |
| "loss": 0.6803, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 5.689907556206145, | |
| "grad_norm": 0.13014480471611023, | |
| "learning_rate": 4.98411735541182e-05, | |
| "loss": 0.6793, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 5.694999124886633, | |
| "grad_norm": 0.1577223241329193, | |
| "learning_rate": 4.984103149110757e-05, | |
| "loss": 0.6845, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 5.700090693567121, | |
| "grad_norm": 0.14906632900238037, | |
| "learning_rate": 4.9840889428096946e-05, | |
| "loss": 0.6771, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 5.705182262247609, | |
| "grad_norm": 0.15042632818222046, | |
| "learning_rate": 4.984074736508632e-05, | |
| "loss": 0.6737, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 5.710273830928098, | |
| "grad_norm": 0.1530093252658844, | |
| "learning_rate": 4.9840605302075685e-05, | |
| "loss": 0.6804, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 5.715365399608586, | |
| "grad_norm": 0.18300846219062805, | |
| "learning_rate": 4.984046323906506e-05, | |
| "loss": 0.6752, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 5.720456968289074, | |
| "grad_norm": 0.14398545026779175, | |
| "learning_rate": 4.9840321176054424e-05, | |
| "loss": 0.6793, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 5.725548536969562, | |
| "grad_norm": 0.12745435535907745, | |
| "learning_rate": 4.98401791130438e-05, | |
| "loss": 0.6765, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 5.73064010565005, | |
| "grad_norm": 0.15162277221679688, | |
| "learning_rate": 4.984003705003317e-05, | |
| "loss": 0.6744, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 5.735731674330538, | |
| "grad_norm": 0.12970998883247375, | |
| "learning_rate": 4.9839894987022544e-05, | |
| "loss": 0.6818, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 5.740823243011026, | |
| "grad_norm": 0.1195228323340416, | |
| "learning_rate": 4.983975292401192e-05, | |
| "loss": 0.6749, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 5.745914811691515, | |
| "grad_norm": 0.14821238815784454, | |
| "learning_rate": 4.983961086100129e-05, | |
| "loss": 0.6759, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 5.751006380372003, | |
| "grad_norm": 0.18345175683498383, | |
| "learning_rate": 4.983946879799066e-05, | |
| "loss": 0.6736, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 5.75609794905249, | |
| "grad_norm": 0.14165613055229187, | |
| "learning_rate": 4.9839326734980036e-05, | |
| "loss": 0.6777, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 5.761189517732979, | |
| "grad_norm": 0.16045770049095154, | |
| "learning_rate": 4.983918467196941e-05, | |
| "loss": 0.678, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 5.766281086413467, | |
| "grad_norm": 0.1490974873304367, | |
| "learning_rate": 4.983904260895878e-05, | |
| "loss": 0.68, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 5.7713726550939555, | |
| "grad_norm": 0.11064887046813965, | |
| "learning_rate": 4.9838900545948156e-05, | |
| "loss": 0.6832, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 5.776464223774443, | |
| "grad_norm": 0.11848734319210052, | |
| "learning_rate": 4.983875848293753e-05, | |
| "loss": 0.6792, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 5.781555792454932, | |
| "grad_norm": 0.1246313750743866, | |
| "learning_rate": 4.9838616419926895e-05, | |
| "loss": 0.6794, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 5.78664736113542, | |
| "grad_norm": 0.17359575629234314, | |
| "learning_rate": 4.983847435691627e-05, | |
| "loss": 0.6762, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 5.791738929815908, | |
| "grad_norm": 0.16471154987812042, | |
| "learning_rate": 4.983833229390564e-05, | |
| "loss": 0.6742, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 5.796830498496396, | |
| "grad_norm": 0.1479930430650711, | |
| "learning_rate": 4.9838190230895014e-05, | |
| "loss": 0.678, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 5.801922067176884, | |
| "grad_norm": 0.11385341733694077, | |
| "learning_rate": 4.983804816788439e-05, | |
| "loss": 0.6791, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 5.8070136358573725, | |
| "grad_norm": 0.13574256002902985, | |
| "learning_rate": 4.983790610487376e-05, | |
| "loss": 0.6795, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 5.81210520453786, | |
| "grad_norm": 0.1701575517654419, | |
| "learning_rate": 4.9837764041863134e-05, | |
| "loss": 0.6791, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 5.817196773218349, | |
| "grad_norm": 0.11972179263830185, | |
| "learning_rate": 4.98376219788525e-05, | |
| "loss": 0.6802, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 5.822288341898837, | |
| "grad_norm": 0.15830230712890625, | |
| "learning_rate": 4.983747991584187e-05, | |
| "loss": 0.6761, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 5.827379910579325, | |
| "grad_norm": 0.16592001914978027, | |
| "learning_rate": 4.9837337852831246e-05, | |
| "loss": 0.6768, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 5.832471479259813, | |
| "grad_norm": 0.21496979892253876, | |
| "learning_rate": 4.983719578982062e-05, | |
| "loss": 0.6783, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 5.837563047940302, | |
| "grad_norm": 0.14850680530071259, | |
| "learning_rate": 4.983705372680999e-05, | |
| "loss": 0.6781, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 5.8426546166207896, | |
| "grad_norm": 0.12256158143281937, | |
| "learning_rate": 4.9836911663799365e-05, | |
| "loss": 0.6776, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 5.847746185301277, | |
| "grad_norm": 0.14311592280864716, | |
| "learning_rate": 4.983676960078874e-05, | |
| "loss": 0.6717, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 5.852837753981766, | |
| "grad_norm": 0.1648699939250946, | |
| "learning_rate": 4.9836627537778105e-05, | |
| "loss": 0.6779, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.857929322662254, | |
| "grad_norm": 0.13590501248836517, | |
| "learning_rate": 4.983648547476748e-05, | |
| "loss": 0.6824, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 5.863020891342742, | |
| "grad_norm": 0.13972793519496918, | |
| "learning_rate": 4.983634341175685e-05, | |
| "loss": 0.679, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 5.86811246002323, | |
| "grad_norm": 0.11360618472099304, | |
| "learning_rate": 4.9836201348746224e-05, | |
| "loss": 0.6746, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 5.873204028703718, | |
| "grad_norm": 0.14063167572021484, | |
| "learning_rate": 4.98360592857356e-05, | |
| "loss": 0.6818, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 5.878295597384207, | |
| "grad_norm": 0.12393573671579361, | |
| "learning_rate": 4.983591722272497e-05, | |
| "loss": 0.6771, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 5.883387166064694, | |
| "grad_norm": 0.12383928149938583, | |
| "learning_rate": 4.9835775159714344e-05, | |
| "loss": 0.6807, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 5.888478734745183, | |
| "grad_norm": 0.11464569717645645, | |
| "learning_rate": 4.983563309670372e-05, | |
| "loss": 0.6823, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 5.893570303425671, | |
| "grad_norm": 0.15896569192409515, | |
| "learning_rate": 4.983549103369309e-05, | |
| "loss": 0.678, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 5.898661872106159, | |
| "grad_norm": 0.11153749376535416, | |
| "learning_rate": 4.983534897068246e-05, | |
| "loss": 0.6799, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 5.903753440786647, | |
| "grad_norm": 0.13557817041873932, | |
| "learning_rate": 4.9835206907671836e-05, | |
| "loss": 0.678, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 5.908845009467136, | |
| "grad_norm": 0.12681804597377777, | |
| "learning_rate": 4.98350648446612e-05, | |
| "loss": 0.6853, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 5.913936578147624, | |
| "grad_norm": 0.11007581651210785, | |
| "learning_rate": 4.9834922781650575e-05, | |
| "loss": 0.6799, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 5.919028146828111, | |
| "grad_norm": 0.14073921740055084, | |
| "learning_rate": 4.983478071863995e-05, | |
| "loss": 0.6809, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 5.9241197155086, | |
| "grad_norm": 0.17294389009475708, | |
| "learning_rate": 4.9834638655629315e-05, | |
| "loss": 0.677, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 5.929211284189088, | |
| "grad_norm": 0.11901852488517761, | |
| "learning_rate": 4.983449659261869e-05, | |
| "loss": 0.6814, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 5.9343028528695765, | |
| "grad_norm": 0.1563209444284439, | |
| "learning_rate": 4.983435452960806e-05, | |
| "loss": 0.6803, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 5.939394421550064, | |
| "grad_norm": 0.1763051152229309, | |
| "learning_rate": 4.9834212466597434e-05, | |
| "loss": 0.6713, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 5.944485990230553, | |
| "grad_norm": 0.1412787139415741, | |
| "learning_rate": 4.983407040358681e-05, | |
| "loss": 0.6791, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 5.949577558911041, | |
| "grad_norm": 0.13946793973445892, | |
| "learning_rate": 4.983392834057618e-05, | |
| "loss": 0.674, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 5.954669127591529, | |
| "grad_norm": 0.1848699301481247, | |
| "learning_rate": 4.9833786277565553e-05, | |
| "loss": 0.6785, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 5.959760696272017, | |
| "grad_norm": 0.14714594185352325, | |
| "learning_rate": 4.9833644214554927e-05, | |
| "loss": 0.6764, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 5.964852264952505, | |
| "grad_norm": 0.14410807192325592, | |
| "learning_rate": 4.98335021515443e-05, | |
| "loss": 0.6755, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 5.9699438336329935, | |
| "grad_norm": 0.11196265369653702, | |
| "learning_rate": 4.983336008853367e-05, | |
| "loss": 0.6801, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 5.975035402313481, | |
| "grad_norm": 0.14931631088256836, | |
| "learning_rate": 4.9833218025523046e-05, | |
| "loss": 0.6761, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 5.98012697099397, | |
| "grad_norm": 0.1235998123884201, | |
| "learning_rate": 4.983307596251241e-05, | |
| "loss": 0.6816, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 5.985218539674458, | |
| "grad_norm": 0.14235694706439972, | |
| "learning_rate": 4.9832933899501785e-05, | |
| "loss": 0.6784, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 5.9903101083549455, | |
| "grad_norm": 0.11291839182376862, | |
| "learning_rate": 4.983279183649116e-05, | |
| "loss": 0.6857, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 5.995401677035434, | |
| "grad_norm": 0.12273520231246948, | |
| "learning_rate": 4.983264977348053e-05, | |
| "loss": 0.6801, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.025783156976103783, | |
| "learning_rate": 4.9832507710469905e-05, | |
| "loss": 0.6142, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 6.005091568680488, | |
| "grad_norm": 0.1227310448884964, | |
| "learning_rate": 4.983236564745928e-05, | |
| "loss": 0.679, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 6.010183137360976, | |
| "grad_norm": 0.14122678339481354, | |
| "learning_rate": 4.983222358444865e-05, | |
| "loss": 0.677, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 6.015274706041464, | |
| "grad_norm": 0.14405541121959686, | |
| "learning_rate": 4.9832081521438024e-05, | |
| "loss": 0.6799, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 6.020366274721953, | |
| "grad_norm": 0.18694424629211426, | |
| "learning_rate": 4.98319394584274e-05, | |
| "loss": 0.675, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 6.025457843402441, | |
| "grad_norm": 0.1961718052625656, | |
| "learning_rate": 4.983179739541677e-05, | |
| "loss": 0.6819, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 6.030549412082929, | |
| "grad_norm": 0.1102224811911583, | |
| "learning_rate": 4.9831655332406137e-05, | |
| "loss": 0.682, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 6.035640980763417, | |
| "grad_norm": 0.1295260190963745, | |
| "learning_rate": 4.983151326939551e-05, | |
| "loss": 0.6794, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 6.040732549443905, | |
| "grad_norm": 0.12580661475658417, | |
| "learning_rate": 4.983137120638488e-05, | |
| "loss": 0.6791, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 6.0458241181243935, | |
| "grad_norm": 0.1288338154554367, | |
| "learning_rate": 4.9831229143374256e-05, | |
| "loss": 0.6805, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 6.050915686804881, | |
| "grad_norm": 0.1211671456694603, | |
| "learning_rate": 4.983108708036362e-05, | |
| "loss": 0.6764, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 6.05600725548537, | |
| "grad_norm": 0.15219536423683167, | |
| "learning_rate": 4.9830945017352995e-05, | |
| "loss": 0.6806, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 6.061098824165858, | |
| "grad_norm": 0.12759484350681305, | |
| "learning_rate": 4.983080295434237e-05, | |
| "loss": 0.676, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 6.066190392846346, | |
| "grad_norm": 0.1949695497751236, | |
| "learning_rate": 4.983066089133174e-05, | |
| "loss": 0.6832, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 6.071281961526834, | |
| "grad_norm": 0.11879277229309082, | |
| "learning_rate": 4.9830518828321115e-05, | |
| "loss": 0.6781, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 6.076373530207323, | |
| "grad_norm": 0.12636293470859528, | |
| "learning_rate": 4.983037676531049e-05, | |
| "loss": 0.6774, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 6.0814650988878105, | |
| "grad_norm": 0.13675157725811005, | |
| "learning_rate": 4.983023470229986e-05, | |
| "loss": 0.6789, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 6.086556667568298, | |
| "grad_norm": 0.13322140276432037, | |
| "learning_rate": 4.9830092639289234e-05, | |
| "loss": 0.6805, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 6.091648236248787, | |
| "grad_norm": 0.1352871060371399, | |
| "learning_rate": 4.982995057627861e-05, | |
| "loss": 0.6808, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 6.096739804929275, | |
| "grad_norm": 0.14976170659065247, | |
| "learning_rate": 4.982980851326798e-05, | |
| "loss": 0.6775, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 6.101831373609763, | |
| "grad_norm": 0.1250462532043457, | |
| "learning_rate": 4.982966645025735e-05, | |
| "loss": 0.6782, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 6.106922942290251, | |
| "grad_norm": 0.16815803945064545, | |
| "learning_rate": 4.9829524387246726e-05, | |
| "loss": 0.6721, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.11201451097074, | |
| "grad_norm": 0.18195395171642303, | |
| "learning_rate": 4.982938232423609e-05, | |
| "loss": 0.6806, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 6.1171060796512275, | |
| "grad_norm": 0.15061675012111664, | |
| "learning_rate": 4.9829240261225466e-05, | |
| "loss": 0.6732, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 6.122197648331715, | |
| "grad_norm": 0.14526985585689545, | |
| "learning_rate": 4.982909819821484e-05, | |
| "loss": 0.6788, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 6.127289217012204, | |
| "grad_norm": 0.1469496637582779, | |
| "learning_rate": 4.982895613520421e-05, | |
| "loss": 0.6779, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 6.132380785692692, | |
| "grad_norm": 0.18443866074085236, | |
| "learning_rate": 4.9828814072193585e-05, | |
| "loss": 0.6767, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 6.13747235437318, | |
| "grad_norm": 0.11885727196931839, | |
| "learning_rate": 4.982867200918295e-05, | |
| "loss": 0.6764, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 6.142563923053668, | |
| "grad_norm": 0.1266055554151535, | |
| "learning_rate": 4.9828529946172325e-05, | |
| "loss": 0.6837, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 6.147655491734157, | |
| "grad_norm": 0.11415141075849533, | |
| "learning_rate": 4.98283878831617e-05, | |
| "loss": 0.6837, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 6.152747060414645, | |
| "grad_norm": 0.09705322235822678, | |
| "learning_rate": 4.982824582015107e-05, | |
| "loss": 0.6815, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 6.157838629095132, | |
| "grad_norm": 0.12555427849292755, | |
| "learning_rate": 4.9828103757140444e-05, | |
| "loss": 0.6804, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 6.162930197775621, | |
| "grad_norm": 0.11063813418149948, | |
| "learning_rate": 4.982796169412982e-05, | |
| "loss": 0.6815, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 6.168021766456109, | |
| "grad_norm": 0.1428930014371872, | |
| "learning_rate": 4.982781963111919e-05, | |
| "loss": 0.6781, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 6.173113335136597, | |
| "grad_norm": 0.13896307349205017, | |
| "learning_rate": 4.982767756810856e-05, | |
| "loss": 0.6763, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 6.178204903817085, | |
| "grad_norm": 0.12032928317785263, | |
| "learning_rate": 4.9827535505097936e-05, | |
| "loss": 0.6803, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 6.183296472497574, | |
| "grad_norm": 0.11562150716781616, | |
| "learning_rate": 4.98273934420873e-05, | |
| "loss": 0.6766, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 6.188388041178062, | |
| "grad_norm": 0.1040254682302475, | |
| "learning_rate": 4.9827251379076676e-05, | |
| "loss": 0.6823, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 6.19347960985855, | |
| "grad_norm": 0.1031600683927536, | |
| "learning_rate": 4.982710931606605e-05, | |
| "loss": 0.6757, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 6.198571178539038, | |
| "grad_norm": 0.11150684952735901, | |
| "learning_rate": 4.982696725305542e-05, | |
| "loss": 0.6781, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 6.203662747219526, | |
| "grad_norm": 0.15506963431835175, | |
| "learning_rate": 4.9826825190044795e-05, | |
| "loss": 0.672, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 6.2087543159000145, | |
| "grad_norm": 0.13985055685043335, | |
| "learning_rate": 4.982668312703417e-05, | |
| "loss": 0.6793, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 6.213845884580502, | |
| "grad_norm": 0.11352770030498505, | |
| "learning_rate": 4.982654106402354e-05, | |
| "loss": 0.6824, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 6.218937453260991, | |
| "grad_norm": 0.11052574217319489, | |
| "learning_rate": 4.9826399001012914e-05, | |
| "loss": 0.6791, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 6.224029021941479, | |
| "grad_norm": 0.12992137670516968, | |
| "learning_rate": 4.982625693800229e-05, | |
| "loss": 0.6793, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 6.229120590621967, | |
| "grad_norm": 0.1408848613500595, | |
| "learning_rate": 4.982611487499166e-05, | |
| "loss": 0.6791, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 6.234212159302455, | |
| "grad_norm": 0.18795296549797058, | |
| "learning_rate": 4.9825972811981034e-05, | |
| "loss": 0.6802, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 6.239303727982943, | |
| "grad_norm": 0.12889884412288666, | |
| "learning_rate": 4.982583074897041e-05, | |
| "loss": 0.6878, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 6.2443952966634315, | |
| "grad_norm": 0.1431640386581421, | |
| "learning_rate": 4.982568868595977e-05, | |
| "loss": 0.6775, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 6.249486865343919, | |
| "grad_norm": 0.11410534381866455, | |
| "learning_rate": 4.9825546622949146e-05, | |
| "loss": 0.6798, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 6.254578434024408, | |
| "grad_norm": 0.14347901940345764, | |
| "learning_rate": 4.982540455993851e-05, | |
| "loss": 0.6764, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 6.259670002704896, | |
| "grad_norm": 0.14148719608783722, | |
| "learning_rate": 4.9825262496927886e-05, | |
| "loss": 0.6778, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 6.264761571385384, | |
| "grad_norm": 0.13571056723594666, | |
| "learning_rate": 4.982512043391726e-05, | |
| "loss": 0.6822, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 6.269853140065872, | |
| "grad_norm": 0.13416819274425507, | |
| "learning_rate": 4.982497837090663e-05, | |
| "loss": 0.6764, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 6.274944708746361, | |
| "grad_norm": 0.12467856705188751, | |
| "learning_rate": 4.9824836307896005e-05, | |
| "loss": 0.681, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 6.280036277426849, | |
| "grad_norm": 0.11934306472539902, | |
| "learning_rate": 4.982469424488538e-05, | |
| "loss": 0.6808, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 6.285127846107336, | |
| "grad_norm": 0.12335172295570374, | |
| "learning_rate": 4.982455218187475e-05, | |
| "loss": 0.6795, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 6.290219414787825, | |
| "grad_norm": 0.12900583446025848, | |
| "learning_rate": 4.9824410118864124e-05, | |
| "loss": 0.6736, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 6.295310983468313, | |
| "grad_norm": 0.11381091177463531, | |
| "learning_rate": 4.98242680558535e-05, | |
| "loss": 0.6838, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 6.300402552148801, | |
| "grad_norm": 0.11505099385976791, | |
| "learning_rate": 4.982412599284287e-05, | |
| "loss": 0.6772, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 6.305494120829289, | |
| "grad_norm": 0.11616339534521103, | |
| "learning_rate": 4.9823983929832244e-05, | |
| "loss": 0.6788, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 6.310585689509777, | |
| "grad_norm": 0.1088867336511612, | |
| "learning_rate": 4.982384186682162e-05, | |
| "loss": 0.6777, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 6.315677258190266, | |
| "grad_norm": 0.11975440382957458, | |
| "learning_rate": 4.982369980381098e-05, | |
| "loss": 0.6854, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 6.320768826870753, | |
| "grad_norm": 0.11531190574169159, | |
| "learning_rate": 4.9823557740800356e-05, | |
| "loss": 0.6786, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 6.325860395551242, | |
| "grad_norm": 0.117821604013443, | |
| "learning_rate": 4.982341567778973e-05, | |
| "loss": 0.6814, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 6.33095196423173, | |
| "grad_norm": 0.13663433492183685, | |
| "learning_rate": 4.98232736147791e-05, | |
| "loss": 0.6754, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 6.3360435329122184, | |
| "grad_norm": 0.14458602666854858, | |
| "learning_rate": 4.9823131551768475e-05, | |
| "loss": 0.6829, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 6.341135101592706, | |
| "grad_norm": 0.12459100037813187, | |
| "learning_rate": 4.982298948875785e-05, | |
| "loss": 0.6803, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 6.346226670273195, | |
| "grad_norm": 0.11213183403015137, | |
| "learning_rate": 4.9822847425747215e-05, | |
| "loss": 0.6776, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 6.351318238953683, | |
| "grad_norm": 0.12166488170623779, | |
| "learning_rate": 4.982270536273659e-05, | |
| "loss": 0.6817, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 6.35640980763417, | |
| "grad_norm": 0.11691765487194061, | |
| "learning_rate": 4.982256329972596e-05, | |
| "loss": 0.6829, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 6.361501376314659, | |
| "grad_norm": 0.1120506301522255, | |
| "learning_rate": 4.9822421236715334e-05, | |
| "loss": 0.6791, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.366592944995147, | |
| "grad_norm": 0.12437008321285248, | |
| "learning_rate": 4.982227917370471e-05, | |
| "loss": 0.6751, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 6.3716845136756355, | |
| "grad_norm": 0.15133772790431976, | |
| "learning_rate": 4.982213711069408e-05, | |
| "loss": 0.6785, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 6.376776082356123, | |
| "grad_norm": 0.14470815658569336, | |
| "learning_rate": 4.9821995047683453e-05, | |
| "loss": 0.6805, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 6.381867651036612, | |
| "grad_norm": 0.1352653056383133, | |
| "learning_rate": 4.9821852984672827e-05, | |
| "loss": 0.6799, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 6.3869592197171, | |
| "grad_norm": 0.12650400400161743, | |
| "learning_rate": 4.982171092166219e-05, | |
| "loss": 0.6788, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 6.392050788397588, | |
| "grad_norm": 0.12057118117809296, | |
| "learning_rate": 4.9821568858651566e-05, | |
| "loss": 0.6811, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 6.397142357078076, | |
| "grad_norm": 0.16348209977149963, | |
| "learning_rate": 4.982142679564094e-05, | |
| "loss": 0.6799, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 6.402233925758564, | |
| "grad_norm": 0.18208801746368408, | |
| "learning_rate": 4.982128473263031e-05, | |
| "loss": 0.6738, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 6.4073254944390525, | |
| "grad_norm": 0.1399811953306198, | |
| "learning_rate": 4.9821142669619685e-05, | |
| "loss": 0.6762, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 6.41241706311954, | |
| "grad_norm": 0.11085145175457001, | |
| "learning_rate": 4.982100060660906e-05, | |
| "loss": 0.6914, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 6.417508631800029, | |
| "grad_norm": 0.10344066470861435, | |
| "learning_rate": 4.982085854359843e-05, | |
| "loss": 0.6809, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 6.422600200480517, | |
| "grad_norm": 0.13643105328083038, | |
| "learning_rate": 4.9820716480587805e-05, | |
| "loss": 0.6752, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 6.4276917691610045, | |
| "grad_norm": 0.12111321091651917, | |
| "learning_rate": 4.982057441757718e-05, | |
| "loss": 0.6786, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 6.432783337841493, | |
| "grad_norm": 0.1612890660762787, | |
| "learning_rate": 4.982043235456655e-05, | |
| "loss": 0.6789, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 6.437874906521981, | |
| "grad_norm": 0.15844057500362396, | |
| "learning_rate": 4.9820290291555924e-05, | |
| "loss": 0.6826, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 6.44296647520247, | |
| "grad_norm": 0.128059983253479, | |
| "learning_rate": 4.982014822854529e-05, | |
| "loss": 0.6776, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 6.448058043882957, | |
| "grad_norm": 0.13311228156089783, | |
| "learning_rate": 4.9820006165534663e-05, | |
| "loss": 0.6793, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 6.453149612563446, | |
| "grad_norm": 0.15546241402626038, | |
| "learning_rate": 4.9819864102524037e-05, | |
| "loss": 0.6753, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 6.458241181243934, | |
| "grad_norm": 0.1458451747894287, | |
| "learning_rate": 4.98197220395134e-05, | |
| "loss": 0.6817, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 6.463332749924422, | |
| "grad_norm": 0.12202929705381393, | |
| "learning_rate": 4.9819579976502776e-05, | |
| "loss": 0.6801, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 6.46842431860491, | |
| "grad_norm": 0.137448251247406, | |
| "learning_rate": 4.981943791349215e-05, | |
| "loss": 0.6779, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 6.473515887285398, | |
| "grad_norm": 0.12428711354732513, | |
| "learning_rate": 4.981929585048152e-05, | |
| "loss": 0.6814, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 6.478607455965887, | |
| "grad_norm": 0.15364359319210052, | |
| "learning_rate": 4.9819153787470895e-05, | |
| "loss": 0.6719, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 6.483699024646374, | |
| "grad_norm": 0.1646897941827774, | |
| "learning_rate": 4.981901172446027e-05, | |
| "loss": 0.6787, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 6.488790593326863, | |
| "grad_norm": 0.18058307468891144, | |
| "learning_rate": 4.981886966144964e-05, | |
| "loss": 0.6797, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 6.493882162007351, | |
| "grad_norm": 0.13395850360393524, | |
| "learning_rate": 4.9818727598439015e-05, | |
| "loss": 0.6776, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 6.4989737306878395, | |
| "grad_norm": 0.15397368371486664, | |
| "learning_rate": 4.981858553542839e-05, | |
| "loss": 0.6698, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 6.504065299368327, | |
| "grad_norm": 0.16110943257808685, | |
| "learning_rate": 4.981844347241776e-05, | |
| "loss": 0.6849, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 6.509156868048816, | |
| "grad_norm": 0.18386079370975494, | |
| "learning_rate": 4.9818301409407134e-05, | |
| "loss": 0.6813, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 6.514248436729304, | |
| "grad_norm": 0.11144635081291199, | |
| "learning_rate": 4.98181593463965e-05, | |
| "loss": 0.6746, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 6.519340005409791, | |
| "grad_norm": 0.1547509729862213, | |
| "learning_rate": 4.981801728338587e-05, | |
| "loss": 0.6775, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 6.52443157409028, | |
| "grad_norm": 0.12533412873744965, | |
| "learning_rate": 4.9817875220375246e-05, | |
| "loss": 0.6723, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 6.529523142770768, | |
| "grad_norm": 0.13594309985637665, | |
| "learning_rate": 4.981773315736462e-05, | |
| "loss": 0.6815, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 6.5346147114512565, | |
| "grad_norm": 0.16000863909721375, | |
| "learning_rate": 4.981759109435399e-05, | |
| "loss": 0.6845, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 6.539706280131744, | |
| "grad_norm": 0.12660828232765198, | |
| "learning_rate": 4.9817449031343366e-05, | |
| "loss": 0.6776, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 6.544797848812232, | |
| "grad_norm": 0.13099251687526703, | |
| "learning_rate": 4.981730696833274e-05, | |
| "loss": 0.6761, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 6.549889417492721, | |
| "grad_norm": 0.13618282973766327, | |
| "learning_rate": 4.981716490532211e-05, | |
| "loss": 0.6777, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 6.5549809861732085, | |
| "grad_norm": 0.128812775015831, | |
| "learning_rate": 4.9817022842311485e-05, | |
| "loss": 0.687, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 6.560072554853697, | |
| "grad_norm": 0.10990247130393982, | |
| "learning_rate": 4.981688077930085e-05, | |
| "loss": 0.6792, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 6.565164123534185, | |
| "grad_norm": 0.13022927939891815, | |
| "learning_rate": 4.9816738716290225e-05, | |
| "loss": 0.6785, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 6.5702556922146735, | |
| "grad_norm": 0.14299486577510834, | |
| "learning_rate": 4.98165966532796e-05, | |
| "loss": 0.6819, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 6.575347260895161, | |
| "grad_norm": 0.13400639593601227, | |
| "learning_rate": 4.981645459026897e-05, | |
| "loss": 0.6815, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 6.58043882957565, | |
| "grad_norm": 0.0999205932021141, | |
| "learning_rate": 4.9816312527258344e-05, | |
| "loss": 0.6788, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 6.585530398256138, | |
| "grad_norm": 0.11330140382051468, | |
| "learning_rate": 4.981617046424771e-05, | |
| "loss": 0.6805, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 6.5906219669366255, | |
| "grad_norm": 0.18674777448177338, | |
| "learning_rate": 4.981602840123708e-05, | |
| "loss": 0.6778, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 6.595713535617114, | |
| "grad_norm": 0.15032435953617096, | |
| "learning_rate": 4.9815886338226456e-05, | |
| "loss": 0.6825, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 6.600805104297602, | |
| "grad_norm": 0.1333203762769699, | |
| "learning_rate": 4.981574427521583e-05, | |
| "loss": 0.6795, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 6.605896672978091, | |
| "grad_norm": 0.16465353965759277, | |
| "learning_rate": 4.98156022122052e-05, | |
| "loss": 0.6706, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 6.610988241658578, | |
| "grad_norm": 0.15451110899448395, | |
| "learning_rate": 4.9815460149194576e-05, | |
| "loss": 0.6757, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 6.616079810339067, | |
| "grad_norm": 0.15208947658538818, | |
| "learning_rate": 4.981531808618395e-05, | |
| "loss": 0.6818, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.621171379019555, | |
| "grad_norm": 0.13289377093315125, | |
| "learning_rate": 4.981517602317332e-05, | |
| "loss": 0.6811, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 6.626262947700043, | |
| "grad_norm": 0.18308168649673462, | |
| "learning_rate": 4.9815033960162695e-05, | |
| "loss": 0.678, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 6.631354516380531, | |
| "grad_norm": 0.12425180524587631, | |
| "learning_rate": 4.981489189715207e-05, | |
| "loss": 0.6816, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 6.636446085061019, | |
| "grad_norm": 0.13754673302173615, | |
| "learning_rate": 4.981474983414144e-05, | |
| "loss": 0.6773, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 6.641537653741508, | |
| "grad_norm": 0.15316608548164368, | |
| "learning_rate": 4.9814607771130814e-05, | |
| "loss": 0.6765, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 6.646629222421995, | |
| "grad_norm": 0.136078342795372, | |
| "learning_rate": 4.981446570812018e-05, | |
| "loss": 0.6767, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 6.651720791102484, | |
| "grad_norm": 0.12898576259613037, | |
| "learning_rate": 4.9814323645109554e-05, | |
| "loss": 0.6786, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 6.656812359782972, | |
| "grad_norm": 0.11854422837495804, | |
| "learning_rate": 4.981418158209893e-05, | |
| "loss": 0.6806, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 6.66190392846346, | |
| "grad_norm": 0.1517888456583023, | |
| "learning_rate": 4.98140395190883e-05, | |
| "loss": 0.6829, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 6.666995497143948, | |
| "grad_norm": 0.1091533899307251, | |
| "learning_rate": 4.9813897456077666e-05, | |
| "loss": 0.6774, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 6.672087065824436, | |
| "grad_norm": 0.13526228070259094, | |
| "learning_rate": 4.981375539306704e-05, | |
| "loss": 0.6747, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 6.677178634504925, | |
| "grad_norm": 0.144491046667099, | |
| "learning_rate": 4.981361333005641e-05, | |
| "loss": 0.6787, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 6.682270203185412, | |
| "grad_norm": 0.16958777606487274, | |
| "learning_rate": 4.9813471267045786e-05, | |
| "loss": 0.6744, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 6.687361771865901, | |
| "grad_norm": 0.14115367829799652, | |
| "learning_rate": 4.981332920403516e-05, | |
| "loss": 0.6791, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 6.692453340546389, | |
| "grad_norm": 0.11081673204898834, | |
| "learning_rate": 4.981318714102453e-05, | |
| "loss": 0.6795, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 6.6975449092268775, | |
| "grad_norm": 0.14843027293682098, | |
| "learning_rate": 4.9813045078013905e-05, | |
| "loss": 0.6807, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 6.702636477907365, | |
| "grad_norm": 0.12543180584907532, | |
| "learning_rate": 4.981290301500328e-05, | |
| "loss": 0.6778, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 6.707728046587853, | |
| "grad_norm": 0.13169404864311218, | |
| "learning_rate": 4.981276095199265e-05, | |
| "loss": 0.675, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 6.712819615268342, | |
| "grad_norm": 0.15343239903450012, | |
| "learning_rate": 4.9812618888982024e-05, | |
| "loss": 0.6819, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 6.7179111839488295, | |
| "grad_norm": 0.13029424846172333, | |
| "learning_rate": 4.981247682597139e-05, | |
| "loss": 0.6778, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 6.723002752629318, | |
| "grad_norm": 0.11084284633398056, | |
| "learning_rate": 4.9812334762960764e-05, | |
| "loss": 0.6824, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 6.728094321309806, | |
| "grad_norm": 0.11253423988819122, | |
| "learning_rate": 4.981219269995014e-05, | |
| "loss": 0.6798, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 6.7331858899902945, | |
| "grad_norm": 0.1311793029308319, | |
| "learning_rate": 4.981205063693951e-05, | |
| "loss": 0.6814, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 6.738277458670782, | |
| "grad_norm": 0.12919209897518158, | |
| "learning_rate": 4.981190857392888e-05, | |
| "loss": 0.6768, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 6.743369027351271, | |
| "grad_norm": 0.12355062365531921, | |
| "learning_rate": 4.9811766510918256e-05, | |
| "loss": 0.6799, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 6.748460596031759, | |
| "grad_norm": 0.1338970810174942, | |
| "learning_rate": 4.981162444790763e-05, | |
| "loss": 0.6771, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 6.7535521647122465, | |
| "grad_norm": 0.14117179811000824, | |
| "learning_rate": 4.9811482384897e-05, | |
| "loss": 0.6799, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 6.758643733392735, | |
| "grad_norm": 0.1848529875278473, | |
| "learning_rate": 4.9811340321886375e-05, | |
| "loss": 0.6755, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 6.763735302073223, | |
| "grad_norm": 0.1720336526632309, | |
| "learning_rate": 4.981119825887575e-05, | |
| "loss": 0.67, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 6.768826870753712, | |
| "grad_norm": 0.1607787162065506, | |
| "learning_rate": 4.981105619586512e-05, | |
| "loss": 0.6827, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 6.773918439434199, | |
| "grad_norm": 0.14998158812522888, | |
| "learning_rate": 4.981091413285449e-05, | |
| "loss": 0.6759, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 6.779010008114687, | |
| "grad_norm": 0.11763730645179749, | |
| "learning_rate": 4.981077206984386e-05, | |
| "loss": 0.6747, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 6.784101576795176, | |
| "grad_norm": 0.12859204411506653, | |
| "learning_rate": 4.9810630006833234e-05, | |
| "loss": 0.6785, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 6.7891931454756635, | |
| "grad_norm": 0.12227821350097656, | |
| "learning_rate": 4.98104879438226e-05, | |
| "loss": 0.6794, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 6.794284714156152, | |
| "grad_norm": 0.11308576911687851, | |
| "learning_rate": 4.9810345880811974e-05, | |
| "loss": 0.6777, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 6.79937628283664, | |
| "grad_norm": 0.12252433598041534, | |
| "learning_rate": 4.981020381780135e-05, | |
| "loss": 0.6778, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 6.804467851517129, | |
| "grad_norm": 0.11951456218957901, | |
| "learning_rate": 4.981006175479072e-05, | |
| "loss": 0.6778, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 6.809559420197616, | |
| "grad_norm": 0.13758736848831177, | |
| "learning_rate": 4.980991969178009e-05, | |
| "loss": 0.6757, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 6.814650988878105, | |
| "grad_norm": 0.15930655598640442, | |
| "learning_rate": 4.9809777628769466e-05, | |
| "loss": 0.675, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 6.819742557558593, | |
| "grad_norm": 0.16790159046649933, | |
| "learning_rate": 4.980963556575884e-05, | |
| "loss": 0.6685, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 6.824834126239081, | |
| "grad_norm": 0.1681044101715088, | |
| "learning_rate": 4.980949350274821e-05, | |
| "loss": 0.683, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 6.829925694919569, | |
| "grad_norm": 0.1336173415184021, | |
| "learning_rate": 4.9809351439737585e-05, | |
| "loss": 0.6746, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 6.835017263600057, | |
| "grad_norm": 0.11793011426925659, | |
| "learning_rate": 4.980920937672696e-05, | |
| "loss": 0.6789, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 6.840108832280546, | |
| "grad_norm": 0.14056985080242157, | |
| "learning_rate": 4.980906731371633e-05, | |
| "loss": 0.6797, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 6.845200400961033, | |
| "grad_norm": 0.11312086880207062, | |
| "learning_rate": 4.9808925250705705e-05, | |
| "loss": 0.6777, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 6.850291969641522, | |
| "grad_norm": 0.14550986886024475, | |
| "learning_rate": 4.980878318769507e-05, | |
| "loss": 0.6792, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 6.85538353832201, | |
| "grad_norm": 0.13276565074920654, | |
| "learning_rate": 4.9808641124684444e-05, | |
| "loss": 0.6797, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 6.8604751070024985, | |
| "grad_norm": 0.1404767632484436, | |
| "learning_rate": 4.980849906167382e-05, | |
| "loss": 0.6767, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 6.865566675682986, | |
| "grad_norm": 0.11344119906425476, | |
| "learning_rate": 4.980835699866319e-05, | |
| "loss": 0.6779, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 6.870658244363474, | |
| "grad_norm": 0.18248707056045532, | |
| "learning_rate": 4.9808214935652563e-05, | |
| "loss": 0.6819, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 6.875749813043963, | |
| "grad_norm": 0.13696008920669556, | |
| "learning_rate": 4.9808072872641937e-05, | |
| "loss": 0.6789, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 6.8808413817244505, | |
| "grad_norm": 0.1089053824543953, | |
| "learning_rate": 4.98079308096313e-05, | |
| "loss": 0.6833, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 6.885932950404939, | |
| "grad_norm": 0.13730046153068542, | |
| "learning_rate": 4.9807788746620676e-05, | |
| "loss": 0.685, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 6.891024519085427, | |
| "grad_norm": 0.11708593368530273, | |
| "learning_rate": 4.980764668361005e-05, | |
| "loss": 0.6797, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 6.896116087765915, | |
| "grad_norm": 0.14479976892471313, | |
| "learning_rate": 4.980750462059942e-05, | |
| "loss": 0.6779, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 6.901207656446403, | |
| "grad_norm": 0.13402192294597626, | |
| "learning_rate": 4.9807362557588795e-05, | |
| "loss": 0.6775, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 6.906299225126891, | |
| "grad_norm": 0.1378648430109024, | |
| "learning_rate": 4.980722049457817e-05, | |
| "loss": 0.6799, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 6.91139079380738, | |
| "grad_norm": 0.1424325555562973, | |
| "learning_rate": 4.980707843156754e-05, | |
| "loss": 0.6777, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 6.9164823624878675, | |
| "grad_norm": 0.12795968353748322, | |
| "learning_rate": 4.9806936368556915e-05, | |
| "loss": 0.6756, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 6.921573931168356, | |
| "grad_norm": 0.16961532831192017, | |
| "learning_rate": 4.980679430554628e-05, | |
| "loss": 0.6762, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 6.926665499848844, | |
| "grad_norm": 0.16084560751914978, | |
| "learning_rate": 4.9806652242535654e-05, | |
| "loss": 0.6783, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 6.931757068529333, | |
| "grad_norm": 0.1510113775730133, | |
| "learning_rate": 4.980651017952503e-05, | |
| "loss": 0.676, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 6.93684863720982, | |
| "grad_norm": 0.1436864286661148, | |
| "learning_rate": 4.98063681165144e-05, | |
| "loss": 0.6769, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 6.941940205890308, | |
| "grad_norm": 0.14651361107826233, | |
| "learning_rate": 4.980622605350377e-05, | |
| "loss": 0.6786, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 6.947031774570797, | |
| "grad_norm": 0.12080514430999756, | |
| "learning_rate": 4.9806083990493146e-05, | |
| "loss": 0.6719, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 6.952123343251285, | |
| "grad_norm": 0.18036852777004242, | |
| "learning_rate": 4.980594192748252e-05, | |
| "loss": 0.6776, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 6.957214911931773, | |
| "grad_norm": 0.15538708865642548, | |
| "learning_rate": 4.980579986447189e-05, | |
| "loss": 0.677, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 6.962306480612261, | |
| "grad_norm": 0.14524763822555542, | |
| "learning_rate": 4.9805657801461266e-05, | |
| "loss": 0.6725, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 6.96739804929275, | |
| "grad_norm": 0.13171471655368805, | |
| "learning_rate": 4.980551573845064e-05, | |
| "loss": 0.6814, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 6.972489617973237, | |
| "grad_norm": 0.14730645716190338, | |
| "learning_rate": 4.980537367544001e-05, | |
| "loss": 0.6828, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 6.977581186653726, | |
| "grad_norm": 0.1142466589808464, | |
| "learning_rate": 4.980523161242938e-05, | |
| "loss": 0.677, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 6.982672755334214, | |
| "grad_norm": 0.11980883777141571, | |
| "learning_rate": 4.980508954941875e-05, | |
| "loss": 0.6847, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 6.987764324014702, | |
| "grad_norm": 0.10882198065519333, | |
| "learning_rate": 4.9804947486408125e-05, | |
| "loss": 0.6749, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 6.99285589269519, | |
| "grad_norm": 0.1418180912733078, | |
| "learning_rate": 4.980480542339749e-05, | |
| "loss": 0.675, | |
| "step": 13740 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 13748, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |