| { | |
| "best_global_step": 5852, | |
| "best_metric": 0.848216712474823, | |
| "best_model_checkpoint": "./arthur-ft/checkpoint-5852", | |
| "epoch": 1.9999006600148612, | |
| "eval_steps": 500, | |
| "global_step": 11704, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017086477443862974, | |
| "grad_norm": 81.49402618408203, | |
| "learning_rate": 1.278409090909091e-07, | |
| "loss": 1.5968, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0034172954887725948, | |
| "grad_norm": 27.926727294921875, | |
| "learning_rate": 2.6988636363636366e-07, | |
| "loss": 1.2647, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005125943233158892, | |
| "grad_norm": 26.150636672973633, | |
| "learning_rate": 4.119318181818182e-07, | |
| "loss": 1.1425, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0068345909775451895, | |
| "grad_norm": 32.89878463745117, | |
| "learning_rate": 5.539772727272728e-07, | |
| "loss": 1.1125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.008543238721931487, | |
| "grad_norm": 31.276884078979492, | |
| "learning_rate": 6.960227272727273e-07, | |
| "loss": 1.1157, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.010251886466317785, | |
| "grad_norm": 22.583457946777344, | |
| "learning_rate": 8.380681818181818e-07, | |
| "loss": 1.1315, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.011960534210704083, | |
| "grad_norm": 26.30866813659668, | |
| "learning_rate": 9.801136363636364e-07, | |
| "loss": 1.107, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.013669181955090379, | |
| "grad_norm": 26.83280372619629, | |
| "learning_rate": 1.1221590909090909e-06, | |
| "loss": 1.1115, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.015377829699476677, | |
| "grad_norm": 25.484037399291992, | |
| "learning_rate": 1.2642045454545456e-06, | |
| "loss": 1.1214, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.017086477443862973, | |
| "grad_norm": 21.573806762695312, | |
| "learning_rate": 1.40625e-06, | |
| "loss": 1.0899, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.018795125188249273, | |
| "grad_norm": 31.41425323486328, | |
| "learning_rate": 1.5482954545454546e-06, | |
| "loss": 1.1174, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02050377293263557, | |
| "grad_norm": 31.211849212646484, | |
| "learning_rate": 1.6903409090909093e-06, | |
| "loss": 1.1159, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.022212420677021866, | |
| "grad_norm": 21.36687660217285, | |
| "learning_rate": 1.8323863636363638e-06, | |
| "loss": 1.0619, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.023921068421408165, | |
| "grad_norm": 21.135522842407227, | |
| "learning_rate": 1.9744318181818183e-06, | |
| "loss": 1.0649, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.025629716165794462, | |
| "grad_norm": 15.732582092285156, | |
| "learning_rate": 2.1164772727272728e-06, | |
| "loss": 1.0778, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.027338363910180758, | |
| "grad_norm": 29.327178955078125, | |
| "learning_rate": 2.2585227272727277e-06, | |
| "loss": 1.0911, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.029047011654567058, | |
| "grad_norm": 18.275588989257812, | |
| "learning_rate": 2.4005681818181818e-06, | |
| "loss": 1.0608, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.030755659398953354, | |
| "grad_norm": 23.922975540161133, | |
| "learning_rate": 2.5426136363636367e-06, | |
| "loss": 1.0478, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03246430714333965, | |
| "grad_norm": 20.124839782714844, | |
| "learning_rate": 2.684659090909091e-06, | |
| "loss": 1.0195, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03417295488772595, | |
| "grad_norm": 17.850400924682617, | |
| "learning_rate": 2.8267045454545457e-06, | |
| "loss": 1.0701, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03588160263211224, | |
| "grad_norm": 21.76569175720215, | |
| "learning_rate": 2.96875e-06, | |
| "loss": 1.0621, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.037590250376498546, | |
| "grad_norm": 18.49285125732422, | |
| "learning_rate": 3.110795454545455e-06, | |
| "loss": 1.0733, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03929889812088484, | |
| "grad_norm": 20.746557235717773, | |
| "learning_rate": 3.252840909090909e-06, | |
| "loss": 1.0711, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04100754586527114, | |
| "grad_norm": 22.905445098876953, | |
| "learning_rate": 3.3948863636363636e-06, | |
| "loss": 1.0938, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.042716193609657435, | |
| "grad_norm": 21.94207763671875, | |
| "learning_rate": 3.5369318181818186e-06, | |
| "loss": 1.0307, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04442484135404373, | |
| "grad_norm": 19.50674819946289, | |
| "learning_rate": 3.678977272727273e-06, | |
| "loss": 1.0386, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04613348909843003, | |
| "grad_norm": 33.76179504394531, | |
| "learning_rate": 3.821022727272727e-06, | |
| "loss": 1.024, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04784213684281633, | |
| "grad_norm": 23.91062355041504, | |
| "learning_rate": 3.963068181818182e-06, | |
| "loss": 1.0586, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04955078458720263, | |
| "grad_norm": 20.59576988220215, | |
| "learning_rate": 4.105113636363637e-06, | |
| "loss": 1.0404, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.051259432331588924, | |
| "grad_norm": 20.871490478515625, | |
| "learning_rate": 4.247159090909092e-06, | |
| "loss": 1.0237, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05296808007597522, | |
| "grad_norm": 20.965486526489258, | |
| "learning_rate": 4.389204545454546e-06, | |
| "loss": 0.9917, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.054676727820361516, | |
| "grad_norm": 25.50008201599121, | |
| "learning_rate": 4.53125e-06, | |
| "loss": 1.0394, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.05638537556474781, | |
| "grad_norm": 19.419902801513672, | |
| "learning_rate": 4.673295454545455e-06, | |
| "loss": 0.9999, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.058094023309134116, | |
| "grad_norm": 22.62456512451172, | |
| "learning_rate": 4.815340909090909e-06, | |
| "loss": 1.0649, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05980267105352041, | |
| "grad_norm": 29.71078109741211, | |
| "learning_rate": 4.957386363636364e-06, | |
| "loss": 1.0064, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06151131879790671, | |
| "grad_norm": 22.22879409790039, | |
| "learning_rate": 4.9969168428470764e-06, | |
| "loss": 1.06, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.063219966542293, | |
| "grad_norm": 21.7359619140625, | |
| "learning_rate": 4.992512332628612e-06, | |
| "loss": 1.043, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0649286142866793, | |
| "grad_norm": 18.66118812561035, | |
| "learning_rate": 4.988107822410148e-06, | |
| "loss": 0.9956, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.0666372620310656, | |
| "grad_norm": 18.667190551757812, | |
| "learning_rate": 4.983703312191685e-06, | |
| "loss": 1.0341, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0683459097754519, | |
| "grad_norm": 21.578454971313477, | |
| "learning_rate": 4.979298801973221e-06, | |
| "loss": 1.0265, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07005455751983819, | |
| "grad_norm": 22.787809371948242, | |
| "learning_rate": 4.974894291754757e-06, | |
| "loss": 1.0062, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.07176320526422449, | |
| "grad_norm": 15.971848487854004, | |
| "learning_rate": 4.970489781536293e-06, | |
| "loss": 0.9964, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.0734718530086108, | |
| "grad_norm": 19.03502082824707, | |
| "learning_rate": 4.96608527131783e-06, | |
| "loss": 1.0245, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07518050075299709, | |
| "grad_norm": 19.77265167236328, | |
| "learning_rate": 4.9616807610993666e-06, | |
| "loss": 1.0321, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.07688914849738339, | |
| "grad_norm": 26.002628326416016, | |
| "learning_rate": 4.957276250880902e-06, | |
| "loss": 1.024, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07859779624176969, | |
| "grad_norm": 20.502666473388672, | |
| "learning_rate": 4.952871740662439e-06, | |
| "loss": 1.02, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08030644398615598, | |
| "grad_norm": 25.582834243774414, | |
| "learning_rate": 4.948467230443975e-06, | |
| "loss": 0.9959, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.08201509173054228, | |
| "grad_norm": 23.496889114379883, | |
| "learning_rate": 4.944062720225512e-06, | |
| "loss": 1.0149, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08372373947492857, | |
| "grad_norm": 19.036056518554688, | |
| "learning_rate": 4.9396582100070475e-06, | |
| "loss": 1.0096, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.08543238721931487, | |
| "grad_norm": 17.741846084594727, | |
| "learning_rate": 4.935253699788584e-06, | |
| "loss": 1.0365, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08714103496370117, | |
| "grad_norm": 17.736528396606445, | |
| "learning_rate": 4.93084918957012e-06, | |
| "loss": 0.9791, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08884968270808746, | |
| "grad_norm": 15.833001136779785, | |
| "learning_rate": 4.926444679351657e-06, | |
| "loss": 1.0195, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.09055833045247376, | |
| "grad_norm": 21.06598663330078, | |
| "learning_rate": 4.9220401691331925e-06, | |
| "loss": 1.0199, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.09226697819686006, | |
| "grad_norm": 20.22286605834961, | |
| "learning_rate": 4.917635658914729e-06, | |
| "loss": 1.0549, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.09397562594124637, | |
| "grad_norm": 18.53827476501465, | |
| "learning_rate": 4.913231148696265e-06, | |
| "loss": 1.0115, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.09568427368563266, | |
| "grad_norm": 17.737276077270508, | |
| "learning_rate": 4.908826638477802e-06, | |
| "loss": 0.9855, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09739292143001896, | |
| "grad_norm": 16.152812957763672, | |
| "learning_rate": 4.9044221282593376e-06, | |
| "loss": 1.0029, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.09910156917440525, | |
| "grad_norm": 22.577655792236328, | |
| "learning_rate": 4.900017618040874e-06, | |
| "loss": 0.9897, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.10081021691879155, | |
| "grad_norm": 19.24541664123535, | |
| "learning_rate": 4.895613107822411e-06, | |
| "loss": 0.9428, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.10251886466317785, | |
| "grad_norm": 14.999211311340332, | |
| "learning_rate": 4.891208597603947e-06, | |
| "loss": 1.0375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.10422751240756414, | |
| "grad_norm": 17.597455978393555, | |
| "learning_rate": 4.8868040873854835e-06, | |
| "loss": 0.9238, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.10593616015195044, | |
| "grad_norm": 14.606751441955566, | |
| "learning_rate": 4.882399577167019e-06, | |
| "loss": 1.0094, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.10764480789633674, | |
| "grad_norm": 19.107078552246094, | |
| "learning_rate": 4.877995066948556e-06, | |
| "loss": 1.0012, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.10935345564072303, | |
| "grad_norm": 22.561248779296875, | |
| "learning_rate": 4.873590556730092e-06, | |
| "loss": 0.9683, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11106210338510933, | |
| "grad_norm": 20.76687240600586, | |
| "learning_rate": 4.869186046511628e-06, | |
| "loss": 0.9516, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11277075112949562, | |
| "grad_norm": 18.26988410949707, | |
| "learning_rate": 4.864781536293164e-06, | |
| "loss": 1.003, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.11447939887388194, | |
| "grad_norm": 24.70865821838379, | |
| "learning_rate": 4.860377026074701e-06, | |
| "loss": 0.9629, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.11618804661826823, | |
| "grad_norm": 15.833657264709473, | |
| "learning_rate": 4.855972515856237e-06, | |
| "loss": 0.9842, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.11789669436265453, | |
| "grad_norm": 23.024721145629883, | |
| "learning_rate": 4.851568005637774e-06, | |
| "loss": 0.9945, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11960534210704082, | |
| "grad_norm": 21.521650314331055, | |
| "learning_rate": 4.8471634954193094e-06, | |
| "loss": 0.9154, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.12131398985142712, | |
| "grad_norm": 16.77184295654297, | |
| "learning_rate": 4.842758985200846e-06, | |
| "loss": 0.9333, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.12302263759581342, | |
| "grad_norm": 18.226619720458984, | |
| "learning_rate": 4.838354474982383e-06, | |
| "loss": 1.003, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.12473128534019971, | |
| "grad_norm": 18.95140266418457, | |
| "learning_rate": 4.833949964763919e-06, | |
| "loss": 0.9384, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.126439933084586, | |
| "grad_norm": 21.0819149017334, | |
| "learning_rate": 4.829545454545455e-06, | |
| "loss": 1.0182, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.12814858082897232, | |
| "grad_norm": 20.32185935974121, | |
| "learning_rate": 4.825140944326991e-06, | |
| "loss": 0.9399, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1298572285733586, | |
| "grad_norm": 21.246639251708984, | |
| "learning_rate": 4.820736434108528e-06, | |
| "loss": 0.9671, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.1315658763177449, | |
| "grad_norm": 17.076871871948242, | |
| "learning_rate": 4.816331923890064e-06, | |
| "loss": 0.9338, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.1332745240621312, | |
| "grad_norm": 17.45732879638672, | |
| "learning_rate": 4.8119274136715996e-06, | |
| "loss": 0.9071, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.1349831718065175, | |
| "grad_norm": 15.503561973571777, | |
| "learning_rate": 4.807522903453136e-06, | |
| "loss": 1.0028, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1366918195509038, | |
| "grad_norm": 20.221580505371094, | |
| "learning_rate": 4.803118393234673e-06, | |
| "loss": 0.9703, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1384004672952901, | |
| "grad_norm": 16.32524299621582, | |
| "learning_rate": 4.798713883016209e-06, | |
| "loss": 0.9856, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.14010911503967638, | |
| "grad_norm": 19.584348678588867, | |
| "learning_rate": 4.7943093727977455e-06, | |
| "loss": 0.9418, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1418177627840627, | |
| "grad_norm": 23.859182357788086, | |
| "learning_rate": 4.789904862579281e-06, | |
| "loss": 0.9651, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.14352641052844897, | |
| "grad_norm": 20.757596969604492, | |
| "learning_rate": 4.785500352360818e-06, | |
| "loss": 0.9272, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.14523505827283528, | |
| "grad_norm": 19.72559928894043, | |
| "learning_rate": 4.781095842142354e-06, | |
| "loss": 0.9644, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1469437060172216, | |
| "grad_norm": 20.055456161499023, | |
| "learning_rate": 4.7766913319238905e-06, | |
| "loss": 0.9819, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.14865235376160787, | |
| "grad_norm": 18.991012573242188, | |
| "learning_rate": 4.772286821705427e-06, | |
| "loss": 0.9234, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.15036100150599419, | |
| "grad_norm": 17.789796829223633, | |
| "learning_rate": 4.767882311486963e-06, | |
| "loss": 1.0245, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.15206964925038047, | |
| "grad_norm": 19.364513397216797, | |
| "learning_rate": 4.7634778012685e-06, | |
| "loss": 0.9382, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.15377829699476678, | |
| "grad_norm": 18.194772720336914, | |
| "learning_rate": 4.759073291050036e-06, | |
| "loss": 0.9533, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.15548694473915306, | |
| "grad_norm": 15.367209434509277, | |
| "learning_rate": 4.7546687808315714e-06, | |
| "loss": 0.9901, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.15719559248353937, | |
| "grad_norm": 26.23330307006836, | |
| "learning_rate": 4.750264270613108e-06, | |
| "loss": 0.8894, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.15890424022792565, | |
| "grad_norm": 20.43960189819336, | |
| "learning_rate": 4.745859760394644e-06, | |
| "loss": 0.9469, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.16061288797231196, | |
| "grad_norm": 17.476476669311523, | |
| "learning_rate": 4.741455250176181e-06, | |
| "loss": 0.9681, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.16232153571669825, | |
| "grad_norm": 18.390302658081055, | |
| "learning_rate": 4.737050739957717e-06, | |
| "loss": 0.9627, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.16403018346108456, | |
| "grad_norm": 26.21846580505371, | |
| "learning_rate": 4.732646229739253e-06, | |
| "loss": 0.9453, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.16573883120547084, | |
| "grad_norm": 17.23887062072754, | |
| "learning_rate": 4.72824171952079e-06, | |
| "loss": 0.9315, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.16744747894985715, | |
| "grad_norm": 15.847450256347656, | |
| "learning_rate": 4.723837209302326e-06, | |
| "loss": 0.9448, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.16915612669424346, | |
| "grad_norm": 20.83458709716797, | |
| "learning_rate": 4.719432699083862e-06, | |
| "loss": 0.9788, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.17086477443862974, | |
| "grad_norm": 28.041086196899414, | |
| "learning_rate": 4.715028188865399e-06, | |
| "loss": 0.9217, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.17257342218301605, | |
| "grad_norm": 21.50284767150879, | |
| "learning_rate": 4.710623678646935e-06, | |
| "loss": 0.8928, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.17428206992740233, | |
| "grad_norm": 21.39044761657715, | |
| "learning_rate": 4.706219168428472e-06, | |
| "loss": 0.9415, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.17599071767178864, | |
| "grad_norm": 21.087949752807617, | |
| "learning_rate": 4.7018146582100075e-06, | |
| "loss": 0.9399, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.17769936541617493, | |
| "grad_norm": 16.453859329223633, | |
| "learning_rate": 4.697410147991543e-06, | |
| "loss": 0.948, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.17940801316056124, | |
| "grad_norm": 18.201675415039062, | |
| "learning_rate": 4.69300563777308e-06, | |
| "loss": 0.9403, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.18111666090494752, | |
| "grad_norm": 18.971012115478516, | |
| "learning_rate": 4.688601127554616e-06, | |
| "loss": 0.9294, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.18282530864933383, | |
| "grad_norm": 18.481828689575195, | |
| "learning_rate": 4.6841966173361525e-06, | |
| "loss": 0.8809, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1845339563937201, | |
| "grad_norm": 17.92839813232422, | |
| "learning_rate": 4.679792107117689e-06, | |
| "loss": 0.9608, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.18624260413810642, | |
| "grad_norm": 21.6907958984375, | |
| "learning_rate": 4.675387596899225e-06, | |
| "loss": 0.9896, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.18795125188249273, | |
| "grad_norm": 19.17830467224121, | |
| "learning_rate": 4.670983086680762e-06, | |
| "loss": 0.9319, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.189659899626879, | |
| "grad_norm": 19.919885635375977, | |
| "learning_rate": 4.666578576462298e-06, | |
| "loss": 0.9509, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.19136854737126532, | |
| "grad_norm": 13.461675643920898, | |
| "learning_rate": 4.662174066243834e-06, | |
| "loss": 0.9315, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.1930771951156516, | |
| "grad_norm": 25.66329574584961, | |
| "learning_rate": 4.65776955602537e-06, | |
| "loss": 0.9203, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.19478584286003792, | |
| "grad_norm": 20.48524284362793, | |
| "learning_rate": 4.653365045806907e-06, | |
| "loss": 0.8759, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1964944906044242, | |
| "grad_norm": 18.57932472229004, | |
| "learning_rate": 4.6489605355884435e-06, | |
| "loss": 0.9367, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1982031383488105, | |
| "grad_norm": 24.531593322753906, | |
| "learning_rate": 4.644556025369979e-06, | |
| "loss": 0.924, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.1999117860931968, | |
| "grad_norm": 19.594648361206055, | |
| "learning_rate": 4.640151515151515e-06, | |
| "loss": 0.9189, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2016204338375831, | |
| "grad_norm": 18.946157455444336, | |
| "learning_rate": 4.635747004933052e-06, | |
| "loss": 0.9476, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.20332908158196938, | |
| "grad_norm": 18.381322860717773, | |
| "learning_rate": 4.631342494714588e-06, | |
| "loss": 0.961, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.2050377293263557, | |
| "grad_norm": 18.244287490844727, | |
| "learning_rate": 4.626937984496124e-06, | |
| "loss": 0.9345, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.20674637707074198, | |
| "grad_norm": 21.273303985595703, | |
| "learning_rate": 4.62253347427766e-06, | |
| "loss": 0.8893, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.2084550248151283, | |
| "grad_norm": 21.534873962402344, | |
| "learning_rate": 4.618128964059197e-06, | |
| "loss": 0.8948, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.2101636725595146, | |
| "grad_norm": 20.035734176635742, | |
| "learning_rate": 4.613724453840734e-06, | |
| "loss": 0.9211, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.21187232030390088, | |
| "grad_norm": 19.587982177734375, | |
| "learning_rate": 4.6093199436222695e-06, | |
| "loss": 0.9074, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2135809680482872, | |
| "grad_norm": 20.059412002563477, | |
| "learning_rate": 4.604915433403806e-06, | |
| "loss": 0.9389, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.21528961579267347, | |
| "grad_norm": 23.202457427978516, | |
| "learning_rate": 4.600510923185342e-06, | |
| "loss": 0.9303, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.21699826353705978, | |
| "grad_norm": 22.944717407226562, | |
| "learning_rate": 4.596106412966879e-06, | |
| "loss": 0.9118, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.21870691128144606, | |
| "grad_norm": 19.934560775756836, | |
| "learning_rate": 4.591701902748415e-06, | |
| "loss": 0.9638, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.22041555902583237, | |
| "grad_norm": 19.087709426879883, | |
| "learning_rate": 4.587297392529951e-06, | |
| "loss": 0.9519, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.22212420677021866, | |
| "grad_norm": 17.25513458251953, | |
| "learning_rate": 4.582892882311487e-06, | |
| "loss": 0.8735, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.22383285451460497, | |
| "grad_norm": 23.020050048828125, | |
| "learning_rate": 4.578488372093024e-06, | |
| "loss": 0.9319, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.22554150225899125, | |
| "grad_norm": 18.893648147583008, | |
| "learning_rate": 4.57408386187456e-06, | |
| "loss": 0.9329, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.22725015000337756, | |
| "grad_norm": 20.73868179321289, | |
| "learning_rate": 4.569679351656096e-06, | |
| "loss": 0.8715, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.22895879774776387, | |
| "grad_norm": 25.549577713012695, | |
| "learning_rate": 4.565274841437632e-06, | |
| "loss": 0.9145, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.23066744549215015, | |
| "grad_norm": 18.99001693725586, | |
| "learning_rate": 4.560870331219169e-06, | |
| "loss": 0.9251, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.23237609323653646, | |
| "grad_norm": 19.704002380371094, | |
| "learning_rate": 4.5564658210007055e-06, | |
| "loss": 0.9342, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.23408474098092275, | |
| "grad_norm": 20.581199645996094, | |
| "learning_rate": 4.552061310782241e-06, | |
| "loss": 0.9107, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.23579338872530906, | |
| "grad_norm": 18.79061508178711, | |
| "learning_rate": 4.547656800563778e-06, | |
| "loss": 0.962, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.23750203646969534, | |
| "grad_norm": 17.29990577697754, | |
| "learning_rate": 4.543252290345314e-06, | |
| "loss": 0.8641, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.23921068421408165, | |
| "grad_norm": 16.5628719329834, | |
| "learning_rate": 4.5388477801268506e-06, | |
| "loss": 0.9237, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.24091933195846793, | |
| "grad_norm": 17.68106460571289, | |
| "learning_rate": 4.534443269908386e-06, | |
| "loss": 0.9167, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.24262797970285424, | |
| "grad_norm": 20.802289962768555, | |
| "learning_rate": 4.530038759689923e-06, | |
| "loss": 0.9236, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.24433662744724052, | |
| "grad_norm": 15.484850883483887, | |
| "learning_rate": 4.525634249471459e-06, | |
| "loss": 0.9322, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.24604527519162683, | |
| "grad_norm": 21.147815704345703, | |
| "learning_rate": 4.521229739252996e-06, | |
| "loss": 0.9034, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.24775392293601312, | |
| "grad_norm": 20.891565322875977, | |
| "learning_rate": 4.5168252290345315e-06, | |
| "loss": 0.9118, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.24946257068039943, | |
| "grad_norm": 20.994525909423828, | |
| "learning_rate": 4.512420718816068e-06, | |
| "loss": 0.8692, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.25117121842478574, | |
| "grad_norm": 15.313887596130371, | |
| "learning_rate": 4.508016208597604e-06, | |
| "loss": 0.9845, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.252879866169172, | |
| "grad_norm": 20.045129776000977, | |
| "learning_rate": 4.503611698379141e-06, | |
| "loss": 0.9341, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2545885139135583, | |
| "grad_norm": 20.646169662475586, | |
| "learning_rate": 4.4992071881606765e-06, | |
| "loss": 0.9434, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.25629716165794464, | |
| "grad_norm": 21.185823440551758, | |
| "learning_rate": 4.494802677942213e-06, | |
| "loss": 0.9123, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2580058094023309, | |
| "grad_norm": 15.910945892333984, | |
| "learning_rate": 4.49039816772375e-06, | |
| "loss": 0.9393, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.2597144571467172, | |
| "grad_norm": 19.833402633666992, | |
| "learning_rate": 4.485993657505286e-06, | |
| "loss": 0.9744, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2614231048911035, | |
| "grad_norm": 18.990707397460938, | |
| "learning_rate": 4.481589147286822e-06, | |
| "loss": 0.9286, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.2631317526354898, | |
| "grad_norm": 21.067312240600586, | |
| "learning_rate": 4.477184637068358e-06, | |
| "loss": 0.9652, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2648404003798761, | |
| "grad_norm": 20.689836502075195, | |
| "learning_rate": 4.472780126849895e-06, | |
| "loss": 0.9158, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.2665490481242624, | |
| "grad_norm": 17.695697784423828, | |
| "learning_rate": 4.468375616631431e-06, | |
| "loss": 0.8859, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.26825769586864867, | |
| "grad_norm": 20.22654914855957, | |
| "learning_rate": 4.463971106412967e-06, | |
| "loss": 0.8948, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.269966343613035, | |
| "grad_norm": 15.549092292785645, | |
| "learning_rate": 4.459566596194503e-06, | |
| "loss": 0.8615, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2716749913574213, | |
| "grad_norm": 18.86482810974121, | |
| "learning_rate": 4.45516208597604e-06, | |
| "loss": 0.9054, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2733836391018076, | |
| "grad_norm": 18.071102142333984, | |
| "learning_rate": 4.450757575757576e-06, | |
| "loss": 0.939, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2750922868461939, | |
| "grad_norm": 22.560697555541992, | |
| "learning_rate": 4.4463530655391125e-06, | |
| "loss": 0.8798, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.2768009345905802, | |
| "grad_norm": 19.61587905883789, | |
| "learning_rate": 4.441948555320648e-06, | |
| "loss": 0.8923, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.2785095823349665, | |
| "grad_norm": 13.9995698928833, | |
| "learning_rate": 4.437544045102185e-06, | |
| "loss": 0.9556, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.28021823007935276, | |
| "grad_norm": 23.62803077697754, | |
| "learning_rate": 4.433139534883722e-06, | |
| "loss": 0.8756, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2819268778237391, | |
| "grad_norm": 19.477319717407227, | |
| "learning_rate": 4.428735024665258e-06, | |
| "loss": 0.8588, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2836355255681254, | |
| "grad_norm": 17.02006721496582, | |
| "learning_rate": 4.424330514446794e-06, | |
| "loss": 0.934, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.28534417331251166, | |
| "grad_norm": 18.509023666381836, | |
| "learning_rate": 4.41992600422833e-06, | |
| "loss": 0.9693, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.28705282105689794, | |
| "grad_norm": 16.825519561767578, | |
| "learning_rate": 4.415521494009867e-06, | |
| "loss": 0.8973, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.2887614688012843, | |
| "grad_norm": 18.926586151123047, | |
| "learning_rate": 4.411116983791403e-06, | |
| "loss": 0.8644, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.29047011654567056, | |
| "grad_norm": 20.28687286376953, | |
| "learning_rate": 4.4067124735729385e-06, | |
| "loss": 0.9245, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.29217876429005685, | |
| "grad_norm": 23.774314880371094, | |
| "learning_rate": 4.402307963354475e-06, | |
| "loss": 0.8688, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.2938874120344432, | |
| "grad_norm": 18.38115692138672, | |
| "learning_rate": 4.397903453136012e-06, | |
| "loss": 0.8836, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.29559605977882947, | |
| "grad_norm": 17.962003707885742, | |
| "learning_rate": 4.393498942917548e-06, | |
| "loss": 0.8547, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.29730470752321575, | |
| "grad_norm": 17.536418914794922, | |
| "learning_rate": 4.389094432699084e-06, | |
| "loss": 0.8899, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.29901335526760203, | |
| "grad_norm": 24.884021759033203, | |
| "learning_rate": 4.38468992248062e-06, | |
| "loss": 0.8861, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.30072200301198837, | |
| "grad_norm": 21.32032012939453, | |
| "learning_rate": 4.380285412262157e-06, | |
| "loss": 0.8905, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.30243065075637465, | |
| "grad_norm": 17.606523513793945, | |
| "learning_rate": 4.375880902043693e-06, | |
| "loss": 0.8898, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.30413929850076094, | |
| "grad_norm": 18.825279235839844, | |
| "learning_rate": 4.3714763918252295e-06, | |
| "loss": 0.8806, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.3058479462451472, | |
| "grad_norm": 18.960371017456055, | |
| "learning_rate": 4.367071881606766e-06, | |
| "loss": 0.897, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.30755659398953356, | |
| "grad_norm": 22.261259078979492, | |
| "learning_rate": 4.362667371388302e-06, | |
| "loss": 0.8931, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.30926524173391984, | |
| "grad_norm": 23.404190063476562, | |
| "learning_rate": 4.358262861169839e-06, | |
| "loss": 0.8802, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.3109738894783061, | |
| "grad_norm": 14.648833274841309, | |
| "learning_rate": 4.3538583509513745e-06, | |
| "loss": 0.9234, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.31268253722269246, | |
| "grad_norm": 18.37412452697754, | |
| "learning_rate": 4.34945384073291e-06, | |
| "loss": 0.8852, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.31439118496707874, | |
| "grad_norm": 24.400611877441406, | |
| "learning_rate": 4.345049330514447e-06, | |
| "loss": 0.8791, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.316099832711465, | |
| "grad_norm": 17.905906677246094, | |
| "learning_rate": 4.340644820295983e-06, | |
| "loss": 0.874, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3178084804558513, | |
| "grad_norm": 16.834829330444336, | |
| "learning_rate": 4.33624031007752e-06, | |
| "loss": 0.907, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.31951712820023764, | |
| "grad_norm": 18.529735565185547, | |
| "learning_rate": 4.331835799859056e-06, | |
| "loss": 0.8914, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.3212257759446239, | |
| "grad_norm": 18.155649185180664, | |
| "learning_rate": 4.327431289640592e-06, | |
| "loss": 0.8678, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3229344236890102, | |
| "grad_norm": 15.488029479980469, | |
| "learning_rate": 4.323026779422129e-06, | |
| "loss": 0.8673, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.3246430714333965, | |
| "grad_norm": 22.161739349365234, | |
| "learning_rate": 4.318622269203665e-06, | |
| "loss": 0.8082, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.32635171917778283, | |
| "grad_norm": 21.01485252380371, | |
| "learning_rate": 4.314217758985201e-06, | |
| "loss": 0.8848, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.3280603669221691, | |
| "grad_norm": 17.303821563720703, | |
| "learning_rate": 4.309813248766738e-06, | |
| "loss": 0.8917, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3297690146665554, | |
| "grad_norm": 27.273990631103516, | |
| "learning_rate": 4.305408738548274e-06, | |
| "loss": 0.8299, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.3314776624109417, | |
| "grad_norm": 20.609886169433594, | |
| "learning_rate": 4.3010042283298106e-06, | |
| "loss": 0.8975, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.333186310155328, | |
| "grad_norm": 21.860870361328125, | |
| "learning_rate": 4.296599718111346e-06, | |
| "loss": 0.8811, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3348949578997143, | |
| "grad_norm": 21.051359176635742, | |
| "learning_rate": 4.292195207892882e-06, | |
| "loss": 0.9229, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3366036056441006, | |
| "grad_norm": 22.4477596282959, | |
| "learning_rate": 4.287790697674419e-06, | |
| "loss": 0.865, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.3383122533884869, | |
| "grad_norm": 20.99222755432129, | |
| "learning_rate": 4.283386187455955e-06, | |
| "loss": 0.8377, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.3400209011328732, | |
| "grad_norm": 23.59244155883789, | |
| "learning_rate": 4.2789816772374915e-06, | |
| "loss": 0.8419, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.3417295488772595, | |
| "grad_norm": 17.199111938476562, | |
| "learning_rate": 4.274577167019028e-06, | |
| "loss": 0.9104, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.34343819662164576, | |
| "grad_norm": 23.190162658691406, | |
| "learning_rate": 4.270172656800564e-06, | |
| "loss": 0.859, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3451468443660321, | |
| "grad_norm": 22.3214168548584, | |
| "learning_rate": 4.265768146582101e-06, | |
| "loss": 0.898, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.3468554921104184, | |
| "grad_norm": 17.06951141357422, | |
| "learning_rate": 4.2613636363636365e-06, | |
| "loss": 0.9043, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.34856413985480467, | |
| "grad_norm": 22.465560913085938, | |
| "learning_rate": 4.256959126145173e-06, | |
| "loss": 0.8559, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.35027278759919095, | |
| "grad_norm": 20.7056884765625, | |
| "learning_rate": 4.25255461592671e-06, | |
| "loss": 0.8545, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3519814353435773, | |
| "grad_norm": 18.856229782104492, | |
| "learning_rate": 4.248150105708246e-06, | |
| "loss": 0.8404, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.35369008308796357, | |
| "grad_norm": 19.156654357910156, | |
| "learning_rate": 4.2437455954897824e-06, | |
| "loss": 0.9017, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.35539873083234985, | |
| "grad_norm": 19.859079360961914, | |
| "learning_rate": 4.239341085271318e-06, | |
| "loss": 0.9067, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.3571073785767362, | |
| "grad_norm": 20.216876983642578, | |
| "learning_rate": 4.234936575052854e-06, | |
| "loss": 0.8961, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.35881602632112247, | |
| "grad_norm": 21.373823165893555, | |
| "learning_rate": 4.230532064834391e-06, | |
| "loss": 0.8803, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.36052467406550875, | |
| "grad_norm": 21.679407119750977, | |
| "learning_rate": 4.226127554615927e-06, | |
| "loss": 0.857, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.36223332180989504, | |
| "grad_norm": 21.290212631225586, | |
| "learning_rate": 4.221723044397463e-06, | |
| "loss": 0.9099, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.3639419695542814, | |
| "grad_norm": 23.025487899780273, | |
| "learning_rate": 4.217318534179e-06, | |
| "loss": 0.8501, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.36565061729866766, | |
| "grad_norm": 24.255035400390625, | |
| "learning_rate": 4.212914023960536e-06, | |
| "loss": 0.8415, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.36735926504305394, | |
| "grad_norm": 19.89132308959961, | |
| "learning_rate": 4.2085095137420726e-06, | |
| "loss": 0.8498, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3690679127874402, | |
| "grad_norm": 19.75184440612793, | |
| "learning_rate": 4.204105003523608e-06, | |
| "loss": 0.8162, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.37077656053182656, | |
| "grad_norm": 19.339553833007812, | |
| "learning_rate": 4.199700493305145e-06, | |
| "loss": 0.8784, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.37248520827621284, | |
| "grad_norm": 15.743782997131348, | |
| "learning_rate": 4.195295983086681e-06, | |
| "loss": 0.8739, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.3741938560205991, | |
| "grad_norm": 20.931917190551758, | |
| "learning_rate": 4.190891472868218e-06, | |
| "loss": 0.8697, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.37590250376498546, | |
| "grad_norm": 21.439781188964844, | |
| "learning_rate": 4.186486962649754e-06, | |
| "loss": 0.9417, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.37761115150937175, | |
| "grad_norm": 19.33049964904785, | |
| "learning_rate": 4.18208245243129e-06, | |
| "loss": 0.8648, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.379319799253758, | |
| "grad_norm": 20.86115074157715, | |
| "learning_rate": 4.177677942212826e-06, | |
| "loss": 0.9008, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3810284469981443, | |
| "grad_norm": 21.383541107177734, | |
| "learning_rate": 4.173273431994363e-06, | |
| "loss": 0.8436, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.38273709474253065, | |
| "grad_norm": 20.323444366455078, | |
| "learning_rate": 4.1688689217758985e-06, | |
| "loss": 0.8607, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.38444574248691693, | |
| "grad_norm": 20.108402252197266, | |
| "learning_rate": 4.164464411557435e-06, | |
| "loss": 0.8718, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3861543902313032, | |
| "grad_norm": 27.39733123779297, | |
| "learning_rate": 4.160059901338971e-06, | |
| "loss": 0.801, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.3878630379756895, | |
| "grad_norm": 19.76158332824707, | |
| "learning_rate": 4.155655391120508e-06, | |
| "loss": 0.8525, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.38957168572007583, | |
| "grad_norm": 20.22632598876953, | |
| "learning_rate": 4.1512508809020444e-06, | |
| "loss": 0.8277, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3912803334644621, | |
| "grad_norm": 20.0892333984375, | |
| "learning_rate": 4.14684637068358e-06, | |
| "loss": 0.8352, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3929889812088484, | |
| "grad_norm": 18.96234893798828, | |
| "learning_rate": 4.142441860465117e-06, | |
| "loss": 0.8461, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.39469762895323474, | |
| "grad_norm": 24.289127349853516, | |
| "learning_rate": 4.138037350246653e-06, | |
| "loss": 0.8953, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.396406276697621, | |
| "grad_norm": 22.399789810180664, | |
| "learning_rate": 4.1336328400281895e-06, | |
| "loss": 0.8364, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.3981149244420073, | |
| "grad_norm": 24.583871841430664, | |
| "learning_rate": 4.129228329809726e-06, | |
| "loss": 0.8271, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.3998235721863936, | |
| "grad_norm": 25.536149978637695, | |
| "learning_rate": 4.124823819591261e-06, | |
| "loss": 0.8332, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.4015322199307799, | |
| "grad_norm": 25.381229400634766, | |
| "learning_rate": 4.120419309372798e-06, | |
| "loss": 0.8155, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4032408676751662, | |
| "grad_norm": 20.306066513061523, | |
| "learning_rate": 4.1160147991543346e-06, | |
| "loss": 0.8213, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.4049495154195525, | |
| "grad_norm": 22.400867462158203, | |
| "learning_rate": 4.11161028893587e-06, | |
| "loss": 0.8163, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.40665816316393877, | |
| "grad_norm": 16.857330322265625, | |
| "learning_rate": 4.107205778717407e-06, | |
| "loss": 0.8498, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.4083668109083251, | |
| "grad_norm": 23.580421447753906, | |
| "learning_rate": 4.102801268498943e-06, | |
| "loss": 0.8312, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.4100754586527114, | |
| "grad_norm": 19.323286056518555, | |
| "learning_rate": 4.09839675828048e-06, | |
| "loss": 0.8104, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.41178410639709767, | |
| "grad_norm": 20.80855941772461, | |
| "learning_rate": 4.093992248062016e-06, | |
| "loss": 0.8135, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.41349275414148395, | |
| "grad_norm": 19.280595779418945, | |
| "learning_rate": 4.089587737843552e-06, | |
| "loss": 0.844, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.4152014018858703, | |
| "grad_norm": 24.815204620361328, | |
| "learning_rate": 4.085183227625089e-06, | |
| "loss": 0.8324, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.4169100496302566, | |
| "grad_norm": 19.941333770751953, | |
| "learning_rate": 4.080778717406625e-06, | |
| "loss": 0.8529, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.41861869737464286, | |
| "grad_norm": 18.017372131347656, | |
| "learning_rate": 4.076374207188161e-06, | |
| "loss": 0.8462, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4203273451190292, | |
| "grad_norm": 15.000432014465332, | |
| "learning_rate": 4.071969696969697e-06, | |
| "loss": 0.8409, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.4220359928634155, | |
| "grad_norm": 26.720317840576172, | |
| "learning_rate": 4.067565186751233e-06, | |
| "loss": 0.8698, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.42374464060780176, | |
| "grad_norm": 21.395301818847656, | |
| "learning_rate": 4.06316067653277e-06, | |
| "loss": 0.7904, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.42545328835218804, | |
| "grad_norm": 22.071170806884766, | |
| "learning_rate": 4.058756166314306e-06, | |
| "loss": 0.8307, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.4271619360965744, | |
| "grad_norm": 18.912866592407227, | |
| "learning_rate": 4.054351656095842e-06, | |
| "loss": 0.8372, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.42887058384096066, | |
| "grad_norm": 23.51670265197754, | |
| "learning_rate": 4.049947145877379e-06, | |
| "loss": 0.8141, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.43057923158534694, | |
| "grad_norm": 17.042999267578125, | |
| "learning_rate": 4.045542635658915e-06, | |
| "loss": 0.8862, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.4322878793297332, | |
| "grad_norm": 21.787776947021484, | |
| "learning_rate": 4.0411381254404515e-06, | |
| "loss": 0.8552, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.43399652707411956, | |
| "grad_norm": 20.526792526245117, | |
| "learning_rate": 4.036733615221987e-06, | |
| "loss": 0.8179, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.43570517481850585, | |
| "grad_norm": 25.407398223876953, | |
| "learning_rate": 4.032329105003524e-06, | |
| "loss": 0.8514, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.43741382256289213, | |
| "grad_norm": 16.01190948486328, | |
| "learning_rate": 4.027924594785061e-06, | |
| "loss": 0.8364, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.43912247030727847, | |
| "grad_norm": 20.050710678100586, | |
| "learning_rate": 4.0235200845665965e-06, | |
| "loss": 0.8362, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.44083111805166475, | |
| "grad_norm": 20.279884338378906, | |
| "learning_rate": 4.019115574348133e-06, | |
| "loss": 0.8034, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.44253976579605103, | |
| "grad_norm": 18.78345489501953, | |
| "learning_rate": 4.014711064129669e-06, | |
| "loss": 0.8336, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.4442484135404373, | |
| "grad_norm": 24.339946746826172, | |
| "learning_rate": 4.010306553911205e-06, | |
| "loss": 0.8588, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.44595706128482365, | |
| "grad_norm": 19.264131546020508, | |
| "learning_rate": 4.005902043692742e-06, | |
| "loss": 0.8536, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.44766570902920994, | |
| "grad_norm": 18.921791076660156, | |
| "learning_rate": 4.0014975334742774e-06, | |
| "loss": 0.819, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.4493743567735962, | |
| "grad_norm": 18.068126678466797, | |
| "learning_rate": 3.997093023255814e-06, | |
| "loss": 0.8061, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4510830045179825, | |
| "grad_norm": 17.197391510009766, | |
| "learning_rate": 3.992688513037351e-06, | |
| "loss": 0.7977, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.45279165226236884, | |
| "grad_norm": 17.76527976989746, | |
| "learning_rate": 3.988284002818887e-06, | |
| "loss": 0.9012, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.4545003000067551, | |
| "grad_norm": 19.648696899414062, | |
| "learning_rate": 3.983879492600423e-06, | |
| "loss": 0.8426, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.4562089477511414, | |
| "grad_norm": 23.721616744995117, | |
| "learning_rate": 3.979474982381959e-06, | |
| "loss": 0.8055, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.45791759549552774, | |
| "grad_norm": 19.17746353149414, | |
| "learning_rate": 3.975070472163496e-06, | |
| "loss": 0.8192, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.459626243239914, | |
| "grad_norm": 19.428604125976562, | |
| "learning_rate": 3.9706659619450326e-06, | |
| "loss": 0.8168, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.4613348909843003, | |
| "grad_norm": 20.59436798095703, | |
| "learning_rate": 3.966261451726568e-06, | |
| "loss": 0.873, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4630435387286866, | |
| "grad_norm": 22.71458625793457, | |
| "learning_rate": 3.961856941508105e-06, | |
| "loss": 0.8439, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.4647521864730729, | |
| "grad_norm": 30.239309310913086, | |
| "learning_rate": 3.957452431289641e-06, | |
| "loss": 0.7587, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.4664608342174592, | |
| "grad_norm": 18.89266014099121, | |
| "learning_rate": 3.953047921071177e-06, | |
| "loss": 0.8704, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.4681694819618455, | |
| "grad_norm": 18.356983184814453, | |
| "learning_rate": 3.9486434108527135e-06, | |
| "loss": 0.8343, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.4698781297062318, | |
| "grad_norm": 20.14874267578125, | |
| "learning_rate": 3.944238900634249e-06, | |
| "loss": 0.8119, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4715867774506181, | |
| "grad_norm": 28.85474967956543, | |
| "learning_rate": 3.939834390415786e-06, | |
| "loss": 0.8767, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.4732954251950044, | |
| "grad_norm": 22.18447494506836, | |
| "learning_rate": 3.935429880197323e-06, | |
| "loss": 0.8175, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.4750040729393907, | |
| "grad_norm": 22.46308135986328, | |
| "learning_rate": 3.9310253699788585e-06, | |
| "loss": 0.8281, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.47671272068377696, | |
| "grad_norm": 18.005477905273438, | |
| "learning_rate": 3.926620859760395e-06, | |
| "loss": 0.7928, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4784213684281633, | |
| "grad_norm": 18.78510093688965, | |
| "learning_rate": 3.922216349541931e-06, | |
| "loss": 0.8042, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4801300161725496, | |
| "grad_norm": 22.980220794677734, | |
| "learning_rate": 3.917811839323468e-06, | |
| "loss": 0.8096, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.48183866391693586, | |
| "grad_norm": 18.548603057861328, | |
| "learning_rate": 3.913407329105004e-06, | |
| "loss": 0.8696, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.4835473116613222, | |
| "grad_norm": 23.86473846435547, | |
| "learning_rate": 3.90900281888654e-06, | |
| "loss": 0.8154, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.4852559594057085, | |
| "grad_norm": 24.574298858642578, | |
| "learning_rate": 3.904598308668077e-06, | |
| "loss": 0.8047, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.48696460715009476, | |
| "grad_norm": 21.888259887695312, | |
| "learning_rate": 3.900193798449613e-06, | |
| "loss": 0.8212, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.48867325489448105, | |
| "grad_norm": 18.29496955871582, | |
| "learning_rate": 3.895789288231149e-06, | |
| "loss": 0.8578, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.4903819026388674, | |
| "grad_norm": 19.80244255065918, | |
| "learning_rate": 3.891384778012685e-06, | |
| "loss": 0.8449, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.49209055038325367, | |
| "grad_norm": 22.403602600097656, | |
| "learning_rate": 3.886980267794221e-06, | |
| "loss": 0.8207, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.49379919812763995, | |
| "grad_norm": 25.105716705322266, | |
| "learning_rate": 3.882575757575758e-06, | |
| "loss": 0.8606, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.49550784587202623, | |
| "grad_norm": 19.511430740356445, | |
| "learning_rate": 3.878171247357294e-06, | |
| "loss": 0.8417, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.49721649361641257, | |
| "grad_norm": 20.566545486450195, | |
| "learning_rate": 3.87376673713883e-06, | |
| "loss": 0.7735, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.49892514136079885, | |
| "grad_norm": 19.69638442993164, | |
| "learning_rate": 3.869362226920367e-06, | |
| "loss": 0.8195, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.5006337891051852, | |
| "grad_norm": 19.20965576171875, | |
| "learning_rate": 3.864957716701903e-06, | |
| "loss": 0.8407, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.5023424368495715, | |
| "grad_norm": 18.67803955078125, | |
| "learning_rate": 3.86055320648344e-06, | |
| "loss": 0.8401, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.5040510845939578, | |
| "grad_norm": 19.72920036315918, | |
| "learning_rate": 3.8561486962649755e-06, | |
| "loss": 0.8176, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.505759732338344, | |
| "grad_norm": 27.366355895996094, | |
| "learning_rate": 3.851744186046512e-06, | |
| "loss": 0.8252, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.5074683800827303, | |
| "grad_norm": 24.130985260009766, | |
| "learning_rate": 3.847339675828049e-06, | |
| "loss": 0.8809, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.5091770278271166, | |
| "grad_norm": 17.88861846923828, | |
| "learning_rate": 3.842935165609585e-06, | |
| "loss": 0.7728, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.5108856755715029, | |
| "grad_norm": 22.26430892944336, | |
| "learning_rate": 3.8385306553911205e-06, | |
| "loss": 0.8516, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.5125943233158893, | |
| "grad_norm": 20.059843063354492, | |
| "learning_rate": 3.834126145172657e-06, | |
| "loss": 0.8084, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5143029710602756, | |
| "grad_norm": 22.344680786132812, | |
| "learning_rate": 3.829721634954193e-06, | |
| "loss": 0.7728, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.5160116188046618, | |
| "grad_norm": 18.06348991394043, | |
| "learning_rate": 3.82531712473573e-06, | |
| "loss": 0.8463, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.5177202665490481, | |
| "grad_norm": 20.816757202148438, | |
| "learning_rate": 3.820912614517266e-06, | |
| "loss": 0.7904, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.5194289142934344, | |
| "grad_norm": 24.29160499572754, | |
| "learning_rate": 3.816508104298802e-06, | |
| "loss": 0.8059, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.5211375620378207, | |
| "grad_norm": 21.73212242126465, | |
| "learning_rate": 3.8121035940803385e-06, | |
| "loss": 0.7817, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.522846209782207, | |
| "grad_norm": 20.20355224609375, | |
| "learning_rate": 3.807699083861875e-06, | |
| "loss": 0.7836, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.5245548575265934, | |
| "grad_norm": 15.811525344848633, | |
| "learning_rate": 3.803294573643411e-06, | |
| "loss": 0.84, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.5262635052709796, | |
| "grad_norm": 23.239578247070312, | |
| "learning_rate": 3.7988900634249478e-06, | |
| "loss": 0.795, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.5279721530153659, | |
| "grad_norm": 18.50345802307129, | |
| "learning_rate": 3.794485553206484e-06, | |
| "loss": 0.8536, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.5296808007597522, | |
| "grad_norm": 21.647409439086914, | |
| "learning_rate": 3.7900810429880203e-06, | |
| "loss": 0.8141, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.5313894485041385, | |
| "grad_norm": 22.411800384521484, | |
| "learning_rate": 3.7856765327695566e-06, | |
| "loss": 0.7931, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.5330980962485248, | |
| "grad_norm": 23.15050506591797, | |
| "learning_rate": 3.7812720225510924e-06, | |
| "loss": 0.7902, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5348067439929111, | |
| "grad_norm": 26.446077346801758, | |
| "learning_rate": 3.7768675123326287e-06, | |
| "loss": 0.8206, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.5365153917372973, | |
| "grad_norm": 18.19157600402832, | |
| "learning_rate": 3.772463002114165e-06, | |
| "loss": 0.796, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.5382240394816837, | |
| "grad_norm": 28.09468650817871, | |
| "learning_rate": 3.768058491895701e-06, | |
| "loss": 0.7592, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.53993268722607, | |
| "grad_norm": 19.753379821777344, | |
| "learning_rate": 3.763653981677238e-06, | |
| "loss": 0.7917, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.5416413349704563, | |
| "grad_norm": 22.52701759338379, | |
| "learning_rate": 3.759249471458774e-06, | |
| "loss": 0.8036, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.5433499827148426, | |
| "grad_norm": 24.160633087158203, | |
| "learning_rate": 3.7548449612403104e-06, | |
| "loss": 0.8111, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.5450586304592289, | |
| "grad_norm": 20.43000030517578, | |
| "learning_rate": 3.7504404510218467e-06, | |
| "loss": 0.8181, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5467672782036151, | |
| "grad_norm": 20.047271728515625, | |
| "learning_rate": 3.746035940803383e-06, | |
| "loss": 0.8229, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5484759259480014, | |
| "grad_norm": 20.642215728759766, | |
| "learning_rate": 3.741631430584919e-06, | |
| "loss": 0.8322, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5501845736923878, | |
| "grad_norm": 19.686071395874023, | |
| "learning_rate": 3.737226920366456e-06, | |
| "loss": 0.775, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5518932214367741, | |
| "grad_norm": 17.0440616607666, | |
| "learning_rate": 3.732822410147992e-06, | |
| "loss": 0.8128, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5536018691811604, | |
| "grad_norm": 20.75046730041504, | |
| "learning_rate": 3.7284178999295284e-06, | |
| "loss": 0.8061, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5553105169255467, | |
| "grad_norm": 23.867816925048828, | |
| "learning_rate": 3.7240133897110643e-06, | |
| "loss": 0.7951, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.557019164669933, | |
| "grad_norm": 26.70461082458496, | |
| "learning_rate": 3.7196088794926005e-06, | |
| "loss": 0.8343, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5587278124143192, | |
| "grad_norm": 17.15665054321289, | |
| "learning_rate": 3.715204369274137e-06, | |
| "loss": 0.8434, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5604364601587055, | |
| "grad_norm": 23.122482299804688, | |
| "learning_rate": 3.710799859055673e-06, | |
| "loss": 0.799, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5621451079030919, | |
| "grad_norm": 18.05946922302246, | |
| "learning_rate": 3.7063953488372093e-06, | |
| "loss": 0.8326, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5638537556474782, | |
| "grad_norm": 21.532657623291016, | |
| "learning_rate": 3.701990838618746e-06, | |
| "loss": 0.7646, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5655624033918645, | |
| "grad_norm": 21.229511260986328, | |
| "learning_rate": 3.6975863284002823e-06, | |
| "loss": 0.7565, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5672710511362508, | |
| "grad_norm": 18.513898849487305, | |
| "learning_rate": 3.6931818181818186e-06, | |
| "loss": 0.8021, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.568979698880637, | |
| "grad_norm": 20.819110870361328, | |
| "learning_rate": 3.688777307963355e-06, | |
| "loss": 0.8215, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5706883466250233, | |
| "grad_norm": 27.454303741455078, | |
| "learning_rate": 3.684372797744891e-06, | |
| "loss": 0.7404, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5723969943694096, | |
| "grad_norm": 20.618860244750977, | |
| "learning_rate": 3.6799682875264273e-06, | |
| "loss": 0.7855, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5741056421137959, | |
| "grad_norm": 21.150808334350586, | |
| "learning_rate": 3.675563777307964e-06, | |
| "loss": 0.7411, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5758142898581823, | |
| "grad_norm": 23.632627487182617, | |
| "learning_rate": 3.6711592670895003e-06, | |
| "loss": 0.7503, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.5775229376025686, | |
| "grad_norm": 19.350055694580078, | |
| "learning_rate": 3.666754756871036e-06, | |
| "loss": 0.8135, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.5792315853469548, | |
| "grad_norm": 19.341176986694336, | |
| "learning_rate": 3.6623502466525724e-06, | |
| "loss": 0.8338, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5809402330913411, | |
| "grad_norm": 24.91313362121582, | |
| "learning_rate": 3.6579457364341087e-06, | |
| "loss": 0.7763, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5826488808357274, | |
| "grad_norm": 23.72249412536621, | |
| "learning_rate": 3.653541226215645e-06, | |
| "loss": 0.7926, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5843575285801137, | |
| "grad_norm": 22.838260650634766, | |
| "learning_rate": 3.649136715997181e-06, | |
| "loss": 0.8036, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5860661763245, | |
| "grad_norm": 19.691679000854492, | |
| "learning_rate": 3.6447322057787175e-06, | |
| "loss": 0.8427, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.5877748240688864, | |
| "grad_norm": 21.973587036132812, | |
| "learning_rate": 3.640327695560254e-06, | |
| "loss": 0.8159, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5894834718132727, | |
| "grad_norm": 21.701208114624023, | |
| "learning_rate": 3.6359231853417904e-06, | |
| "loss": 0.8004, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5911921195576589, | |
| "grad_norm": 21.209928512573242, | |
| "learning_rate": 3.6315186751233267e-06, | |
| "loss": 0.8038, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5929007673020452, | |
| "grad_norm": 19.597747802734375, | |
| "learning_rate": 3.627114164904863e-06, | |
| "loss": 0.7868, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5946094150464315, | |
| "grad_norm": 18.882831573486328, | |
| "learning_rate": 3.6227096546863992e-06, | |
| "loss": 0.7861, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5963180627908178, | |
| "grad_norm": 18.91342544555664, | |
| "learning_rate": 3.6183051444679355e-06, | |
| "loss": 0.8082, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.5980267105352041, | |
| "grad_norm": 23.127704620361328, | |
| "learning_rate": 3.613900634249472e-06, | |
| "loss": 0.8093, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5997353582795903, | |
| "grad_norm": 23.314237594604492, | |
| "learning_rate": 3.6094961240310076e-06, | |
| "loss": 0.8148, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.6014440060239767, | |
| "grad_norm": 19.88514518737793, | |
| "learning_rate": 3.6050916138125443e-06, | |
| "loss": 0.7951, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.603152653768363, | |
| "grad_norm": 23.107532501220703, | |
| "learning_rate": 3.6006871035940805e-06, | |
| "loss": 0.7852, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.6048613015127493, | |
| "grad_norm": 26.108352661132812, | |
| "learning_rate": 3.596282593375617e-06, | |
| "loss": 0.7846, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.6065699492571356, | |
| "grad_norm": 21.61062240600586, | |
| "learning_rate": 3.591878083157153e-06, | |
| "loss": 0.8116, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6082785970015219, | |
| "grad_norm": 22.475379943847656, | |
| "learning_rate": 3.5874735729386893e-06, | |
| "loss": 0.8082, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.6099872447459082, | |
| "grad_norm": 20.961181640625, | |
| "learning_rate": 3.5830690627202256e-06, | |
| "loss": 0.7747, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.6116958924902944, | |
| "grad_norm": 23.609365463256836, | |
| "learning_rate": 3.5786645525017623e-06, | |
| "loss": 0.828, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.6134045402346808, | |
| "grad_norm": 17.144989013671875, | |
| "learning_rate": 3.5742600422832986e-06, | |
| "loss": 0.8089, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.6151131879790671, | |
| "grad_norm": 24.28973388671875, | |
| "learning_rate": 3.569855532064835e-06, | |
| "loss": 0.782, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6168218357234534, | |
| "grad_norm": 21.782333374023438, | |
| "learning_rate": 3.565451021846371e-06, | |
| "loss": 0.8252, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.6185304834678397, | |
| "grad_norm": 18.921234130859375, | |
| "learning_rate": 3.5610465116279074e-06, | |
| "loss": 0.7856, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.620239131212226, | |
| "grad_norm": 27.037317276000977, | |
| "learning_rate": 3.5566420014094436e-06, | |
| "loss": 0.7732, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.6219477789566122, | |
| "grad_norm": 20.37610626220703, | |
| "learning_rate": 3.5522374911909795e-06, | |
| "loss": 0.8081, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.6236564267009985, | |
| "grad_norm": 20.596923828125, | |
| "learning_rate": 3.547832980972516e-06, | |
| "loss": 0.8218, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6253650744453849, | |
| "grad_norm": 19.31607437133789, | |
| "learning_rate": 3.5434284707540524e-06, | |
| "loss": 0.8212, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.6270737221897712, | |
| "grad_norm": 25.045026779174805, | |
| "learning_rate": 3.5390239605355887e-06, | |
| "loss": 0.8197, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.6287823699341575, | |
| "grad_norm": 26.2932071685791, | |
| "learning_rate": 3.534619450317125e-06, | |
| "loss": 0.8084, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.6304910176785438, | |
| "grad_norm": 22.81402587890625, | |
| "learning_rate": 3.530214940098661e-06, | |
| "loss": 0.7689, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.63219966542293, | |
| "grad_norm": 19.472158432006836, | |
| "learning_rate": 3.5258104298801975e-06, | |
| "loss": 0.7875, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6339083131673163, | |
| "grad_norm": 18.043285369873047, | |
| "learning_rate": 3.5214059196617337e-06, | |
| "loss": 0.8188, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.6356169609117026, | |
| "grad_norm": 29.622112274169922, | |
| "learning_rate": 3.5170014094432704e-06, | |
| "loss": 0.7512, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.6373256086560889, | |
| "grad_norm": 20.153039932250977, | |
| "learning_rate": 3.5125968992248067e-06, | |
| "loss": 0.7823, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.6390342564004753, | |
| "grad_norm": 23.100482940673828, | |
| "learning_rate": 3.508192389006343e-06, | |
| "loss": 0.8137, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.6407429041448616, | |
| "grad_norm": 23.236019134521484, | |
| "learning_rate": 3.5037878787878792e-06, | |
| "loss": 0.7014, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6424515518892479, | |
| "grad_norm": 22.595932006835938, | |
| "learning_rate": 3.4993833685694155e-06, | |
| "loss": 0.7896, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.6441601996336341, | |
| "grad_norm": 24.64199447631836, | |
| "learning_rate": 3.4949788583509513e-06, | |
| "loss": 0.7879, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.6458688473780204, | |
| "grad_norm": 17.925630569458008, | |
| "learning_rate": 3.4905743481324876e-06, | |
| "loss": 0.8212, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.6475774951224067, | |
| "grad_norm": 27.082433700561523, | |
| "learning_rate": 3.4861698379140243e-06, | |
| "loss": 0.8151, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.649286142866793, | |
| "grad_norm": 19.66040802001953, | |
| "learning_rate": 3.4817653276955606e-06, | |
| "loss": 0.7707, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6509947906111794, | |
| "grad_norm": 22.2485408782959, | |
| "learning_rate": 3.477360817477097e-06, | |
| "loss": 0.742, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6527034383555657, | |
| "grad_norm": 20.138118743896484, | |
| "learning_rate": 3.472956307258633e-06, | |
| "loss": 0.8006, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.6544120860999519, | |
| "grad_norm": 21.199825286865234, | |
| "learning_rate": 3.4685517970401693e-06, | |
| "loss": 0.7662, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6561207338443382, | |
| "grad_norm": 24.360260009765625, | |
| "learning_rate": 3.4641472868217056e-06, | |
| "loss": 0.8112, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.6578293815887245, | |
| "grad_norm": 16.09538459777832, | |
| "learning_rate": 3.4597427766032423e-06, | |
| "loss": 0.7945, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6595380293331108, | |
| "grad_norm": 22.721424102783203, | |
| "learning_rate": 3.4553382663847786e-06, | |
| "loss": 0.8033, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6612466770774971, | |
| "grad_norm": 24.86945343017578, | |
| "learning_rate": 3.450933756166315e-06, | |
| "loss": 0.7616, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6629553248218834, | |
| "grad_norm": 23.66960906982422, | |
| "learning_rate": 3.446529245947851e-06, | |
| "loss": 0.739, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6646639725662697, | |
| "grad_norm": 26.404010772705078, | |
| "learning_rate": 3.4421247357293874e-06, | |
| "loss": 0.8027, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.666372620310656, | |
| "grad_norm": 17.85309410095215, | |
| "learning_rate": 3.437720225510923e-06, | |
| "loss": 0.8071, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6680812680550423, | |
| "grad_norm": 21.82198143005371, | |
| "learning_rate": 3.4333157152924595e-06, | |
| "loss": 0.8042, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6697899157994286, | |
| "grad_norm": 23.275218963623047, | |
| "learning_rate": 3.4289112050739957e-06, | |
| "loss": 0.7481, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.6714985635438149, | |
| "grad_norm": 21.871013641357422, | |
| "learning_rate": 3.4245066948555324e-06, | |
| "loss": 0.7892, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6732072112882012, | |
| "grad_norm": 21.8370418548584, | |
| "learning_rate": 3.4201021846370687e-06, | |
| "loss": 0.7582, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.6749158590325874, | |
| "grad_norm": 23.338394165039062, | |
| "learning_rate": 3.415697674418605e-06, | |
| "loss": 0.7742, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6766245067769738, | |
| "grad_norm": 22.160715103149414, | |
| "learning_rate": 3.4112931642001412e-06, | |
| "loss": 0.7382, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6783331545213601, | |
| "grad_norm": 20.671384811401367, | |
| "learning_rate": 3.4068886539816775e-06, | |
| "loss": 0.7889, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.6800418022657464, | |
| "grad_norm": 25.99142837524414, | |
| "learning_rate": 3.4024841437632137e-06, | |
| "loss": 0.7906, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6817504500101327, | |
| "grad_norm": 21.951120376586914, | |
| "learning_rate": 3.3980796335447504e-06, | |
| "loss": 0.7836, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.683459097754519, | |
| "grad_norm": 19.033308029174805, | |
| "learning_rate": 3.3936751233262867e-06, | |
| "loss": 0.7602, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6851677454989052, | |
| "grad_norm": 23.86874008178711, | |
| "learning_rate": 3.389270613107823e-06, | |
| "loss": 0.7759, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.6868763932432915, | |
| "grad_norm": 19.606098175048828, | |
| "learning_rate": 3.3848661028893592e-06, | |
| "loss": 0.7874, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6885850409876779, | |
| "grad_norm": 20.22423553466797, | |
| "learning_rate": 3.380461592670895e-06, | |
| "loss": 0.825, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6902936887320642, | |
| "grad_norm": 27.637001037597656, | |
| "learning_rate": 3.3760570824524313e-06, | |
| "loss": 0.771, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6920023364764505, | |
| "grad_norm": 18.97125244140625, | |
| "learning_rate": 3.3716525722339676e-06, | |
| "loss": 0.7249, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6937109842208368, | |
| "grad_norm": 22.724328994750977, | |
| "learning_rate": 3.367248062015504e-06, | |
| "loss": 0.746, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.695419631965223, | |
| "grad_norm": 21.274978637695312, | |
| "learning_rate": 3.3628435517970406e-06, | |
| "loss": 0.7504, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.6971282797096093, | |
| "grad_norm": 23.363569259643555, | |
| "learning_rate": 3.358439041578577e-06, | |
| "loss": 0.6809, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6988369274539956, | |
| "grad_norm": 27.47598648071289, | |
| "learning_rate": 3.354034531360113e-06, | |
| "loss": 0.755, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.7005455751983819, | |
| "grad_norm": 23.85652732849121, | |
| "learning_rate": 3.3496300211416494e-06, | |
| "loss": 0.7601, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.7022542229427683, | |
| "grad_norm": 18.246395111083984, | |
| "learning_rate": 3.3452255109231856e-06, | |
| "loss": 0.7201, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.7039628706871546, | |
| "grad_norm": 22.6968936920166, | |
| "learning_rate": 3.340821000704722e-06, | |
| "loss": 0.772, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.7056715184315409, | |
| "grad_norm": 16.688634872436523, | |
| "learning_rate": 3.3364164904862586e-06, | |
| "loss": 0.7743, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.7073801661759271, | |
| "grad_norm": 22.384685516357422, | |
| "learning_rate": 3.332011980267795e-06, | |
| "loss": 0.7562, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.7090888139203134, | |
| "grad_norm": 25.848621368408203, | |
| "learning_rate": 3.327607470049331e-06, | |
| "loss": 0.778, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.7107974616646997, | |
| "grad_norm": 20.71343231201172, | |
| "learning_rate": 3.323202959830867e-06, | |
| "loss": 0.7714, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.712506109409086, | |
| "grad_norm": 25.288433074951172, | |
| "learning_rate": 3.318798449612403e-06, | |
| "loss": 0.7812, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.7142147571534724, | |
| "grad_norm": 25.958364486694336, | |
| "learning_rate": 3.3143939393939395e-06, | |
| "loss": 0.8008, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.7159234048978587, | |
| "grad_norm": 23.568279266357422, | |
| "learning_rate": 3.3099894291754757e-06, | |
| "loss": 0.7468, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.7176320526422449, | |
| "grad_norm": 25.222332000732422, | |
| "learning_rate": 3.305584918957012e-06, | |
| "loss": 0.7379, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7193407003866312, | |
| "grad_norm": 23.69734764099121, | |
| "learning_rate": 3.3011804087385487e-06, | |
| "loss": 0.7478, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.7210493481310175, | |
| "grad_norm": 18.56196403503418, | |
| "learning_rate": 3.296775898520085e-06, | |
| "loss": 0.7341, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.7227579958754038, | |
| "grad_norm": 28.462255477905273, | |
| "learning_rate": 3.2923713883016212e-06, | |
| "loss": 0.7084, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.7244666436197901, | |
| "grad_norm": 23.669126510620117, | |
| "learning_rate": 3.2879668780831575e-06, | |
| "loss": 0.741, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.7261752913641765, | |
| "grad_norm": 22.7609920501709, | |
| "learning_rate": 3.2835623678646938e-06, | |
| "loss": 0.7507, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.7278839391085627, | |
| "grad_norm": 21.962385177612305, | |
| "learning_rate": 3.27915785764623e-06, | |
| "loss": 0.7521, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.729592586852949, | |
| "grad_norm": 23.406116485595703, | |
| "learning_rate": 3.2747533474277667e-06, | |
| "loss": 0.7374, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.7313012345973353, | |
| "grad_norm": 25.467397689819336, | |
| "learning_rate": 3.270348837209303e-06, | |
| "loss": 0.7894, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.7330098823417216, | |
| "grad_norm": 21.29004669189453, | |
| "learning_rate": 3.265944326990839e-06, | |
| "loss": 0.7763, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.7347185300861079, | |
| "grad_norm": 26.53734588623047, | |
| "learning_rate": 3.261539816772375e-06, | |
| "loss": 0.7704, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.7364271778304942, | |
| "grad_norm": 26.881288528442383, | |
| "learning_rate": 3.2571353065539113e-06, | |
| "loss": 0.7655, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.7381358255748804, | |
| "grad_norm": 21.281936645507812, | |
| "learning_rate": 3.2527307963354476e-06, | |
| "loss": 0.7732, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.7398444733192668, | |
| "grad_norm": 22.189983367919922, | |
| "learning_rate": 3.248326286116984e-06, | |
| "loss": 0.766, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.7415531210636531, | |
| "grad_norm": 20.438308715820312, | |
| "learning_rate": 3.24392177589852e-06, | |
| "loss": 0.7765, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.7432617688080394, | |
| "grad_norm": 23.522388458251953, | |
| "learning_rate": 3.239517265680057e-06, | |
| "loss": 0.7617, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.7449704165524257, | |
| "grad_norm": 27.77216148376465, | |
| "learning_rate": 3.235112755461593e-06, | |
| "loss": 0.7321, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.746679064296812, | |
| "grad_norm": 25.899330139160156, | |
| "learning_rate": 3.2307082452431294e-06, | |
| "loss": 0.705, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.7483877120411982, | |
| "grad_norm": 24.98331069946289, | |
| "learning_rate": 3.2263037350246656e-06, | |
| "loss": 0.7356, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7500963597855845, | |
| "grad_norm": 22.49882698059082, | |
| "learning_rate": 3.221899224806202e-06, | |
| "loss": 0.751, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.7518050075299709, | |
| "grad_norm": 21.93841552734375, | |
| "learning_rate": 3.217494714587738e-06, | |
| "loss": 0.7846, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7535136552743572, | |
| "grad_norm": 16.464521408081055, | |
| "learning_rate": 3.213090204369275e-06, | |
| "loss": 0.7572, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.7552223030187435, | |
| "grad_norm": 22.928815841674805, | |
| "learning_rate": 3.2086856941508103e-06, | |
| "loss": 0.7004, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.7569309507631298, | |
| "grad_norm": 28.229320526123047, | |
| "learning_rate": 3.204281183932347e-06, | |
| "loss": 0.7712, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.758639598507516, | |
| "grad_norm": 17.726673126220703, | |
| "learning_rate": 3.1998766737138832e-06, | |
| "loss": 0.7637, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.7603482462519023, | |
| "grad_norm": 23.8514404296875, | |
| "learning_rate": 3.1954721634954195e-06, | |
| "loss": 0.7547, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7620568939962886, | |
| "grad_norm": 24.728208541870117, | |
| "learning_rate": 3.1910676532769557e-06, | |
| "loss": 0.7626, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.7637655417406749, | |
| "grad_norm": 20.695667266845703, | |
| "learning_rate": 3.186663143058492e-06, | |
| "loss": 0.7339, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.7654741894850613, | |
| "grad_norm": 21.278423309326172, | |
| "learning_rate": 3.1822586328400283e-06, | |
| "loss": 0.7922, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.7671828372294476, | |
| "grad_norm": 23.316635131835938, | |
| "learning_rate": 3.177854122621565e-06, | |
| "loss": 0.7576, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.7688914849738339, | |
| "grad_norm": 24.75078010559082, | |
| "learning_rate": 3.1734496124031012e-06, | |
| "loss": 0.7455, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7706001327182201, | |
| "grad_norm": 23.322919845581055, | |
| "learning_rate": 3.1690451021846375e-06, | |
| "loss": 0.7637, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7723087804626064, | |
| "grad_norm": 26.70413589477539, | |
| "learning_rate": 3.1646405919661738e-06, | |
| "loss": 0.6983, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.7740174282069927, | |
| "grad_norm": 21.821128845214844, | |
| "learning_rate": 3.16023608174771e-06, | |
| "loss": 0.7044, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.775726075951379, | |
| "grad_norm": 19.717451095581055, | |
| "learning_rate": 3.1558315715292463e-06, | |
| "loss": 0.7294, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7774347236957654, | |
| "grad_norm": 21.586071014404297, | |
| "learning_rate": 3.151427061310782e-06, | |
| "loss": 0.77, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7791433714401517, | |
| "grad_norm": 25.673486709594727, | |
| "learning_rate": 3.1470225510923184e-06, | |
| "loss": 0.7258, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.780852019184538, | |
| "grad_norm": 27.769350051879883, | |
| "learning_rate": 3.142618040873855e-06, | |
| "loss": 0.797, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.7825606669289242, | |
| "grad_norm": 20.539966583251953, | |
| "learning_rate": 3.1382135306553914e-06, | |
| "loss": 0.7611, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7842693146733105, | |
| "grad_norm": 21.524412155151367, | |
| "learning_rate": 3.1338090204369276e-06, | |
| "loss": 0.7666, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.7859779624176968, | |
| "grad_norm": 22.3591365814209, | |
| "learning_rate": 3.129404510218464e-06, | |
| "loss": 0.7882, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7876866101620831, | |
| "grad_norm": 23.00992202758789, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.7842, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7893952579064695, | |
| "grad_norm": 16.515499114990234, | |
| "learning_rate": 3.1205954897815364e-06, | |
| "loss": 0.7369, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.7911039056508558, | |
| "grad_norm": 21.174406051635742, | |
| "learning_rate": 3.116190979563073e-06, | |
| "loss": 0.738, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.792812553395242, | |
| "grad_norm": 23.586978912353516, | |
| "learning_rate": 3.1117864693446094e-06, | |
| "loss": 0.6997, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7945212011396283, | |
| "grad_norm": 32.21963882446289, | |
| "learning_rate": 3.1073819591261456e-06, | |
| "loss": 0.7019, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7962298488840146, | |
| "grad_norm": 25.9871883392334, | |
| "learning_rate": 3.102977448907682e-06, | |
| "loss": 0.7083, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7979384966284009, | |
| "grad_norm": 24.332395553588867, | |
| "learning_rate": 3.098572938689218e-06, | |
| "loss": 0.7863, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.7996471443727872, | |
| "grad_norm": 20.869014739990234, | |
| "learning_rate": 3.094168428470754e-06, | |
| "loss": 0.7582, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.8013557921171734, | |
| "grad_norm": 22.537940979003906, | |
| "learning_rate": 3.0897639182522903e-06, | |
| "loss": 0.806, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.8030644398615598, | |
| "grad_norm": 26.17819595336914, | |
| "learning_rate": 3.0853594080338265e-06, | |
| "loss": 0.743, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.8047730876059461, | |
| "grad_norm": 23.158397674560547, | |
| "learning_rate": 3.0809548978153632e-06, | |
| "loss": 0.7075, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.8064817353503324, | |
| "grad_norm": 18.984607696533203, | |
| "learning_rate": 3.0765503875968995e-06, | |
| "loss": 0.7483, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.8081903830947187, | |
| "grad_norm": 22.03697967529297, | |
| "learning_rate": 3.0721458773784358e-06, | |
| "loss": 0.7295, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.809899030839105, | |
| "grad_norm": 19.310800552368164, | |
| "learning_rate": 3.067741367159972e-06, | |
| "loss": 0.7566, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.8116076785834913, | |
| "grad_norm": 27.38188934326172, | |
| "learning_rate": 3.0633368569415083e-06, | |
| "loss": 0.7487, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.8133163263278775, | |
| "grad_norm": 30.696491241455078, | |
| "learning_rate": 3.0589323467230446e-06, | |
| "loss": 0.7468, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.8150249740722639, | |
| "grad_norm": 25.93939208984375, | |
| "learning_rate": 3.0545278365045812e-06, | |
| "loss": 0.7608, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.8167336218166502, | |
| "grad_norm": 24.9782772064209, | |
| "learning_rate": 3.0501233262861175e-06, | |
| "loss": 0.7327, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.8184422695610365, | |
| "grad_norm": 19.76726531982422, | |
| "learning_rate": 3.0457188160676538e-06, | |
| "loss": 0.7585, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.8201509173054228, | |
| "grad_norm": 24.16695785522461, | |
| "learning_rate": 3.04131430584919e-06, | |
| "loss": 0.7812, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8218595650498091, | |
| "grad_norm": 25.34935188293457, | |
| "learning_rate": 3.036909795630726e-06, | |
| "loss": 0.718, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.8235682127941953, | |
| "grad_norm": 21.88555335998535, | |
| "learning_rate": 3.032505285412262e-06, | |
| "loss": 0.7264, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.8252768605385816, | |
| "grad_norm": 28.31941795349121, | |
| "learning_rate": 3.0281007751937984e-06, | |
| "loss": 0.7012, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.8269855082829679, | |
| "grad_norm": 24.483379364013672, | |
| "learning_rate": 3.0236962649753347e-06, | |
| "loss": 0.6828, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.8286941560273543, | |
| "grad_norm": 22.461471557617188, | |
| "learning_rate": 3.0192917547568714e-06, | |
| "loss": 0.7266, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.8304028037717406, | |
| "grad_norm": 19.53203773498535, | |
| "learning_rate": 3.0148872445384076e-06, | |
| "loss": 0.707, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.8321114515161269, | |
| "grad_norm": 26.503684997558594, | |
| "learning_rate": 3.010482734319944e-06, | |
| "loss": 0.7399, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.8338200992605131, | |
| "grad_norm": 25.25548553466797, | |
| "learning_rate": 3.00607822410148e-06, | |
| "loss": 0.7094, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.8355287470048994, | |
| "grad_norm": 27.260940551757812, | |
| "learning_rate": 3.0016737138830164e-06, | |
| "loss": 0.7311, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.8372373947492857, | |
| "grad_norm": 22.992063522338867, | |
| "learning_rate": 2.9972692036645527e-06, | |
| "loss": 0.7389, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.838946042493672, | |
| "grad_norm": 24.592796325683594, | |
| "learning_rate": 2.9928646934460894e-06, | |
| "loss": 0.753, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.8406546902380584, | |
| "grad_norm": 21.972124099731445, | |
| "learning_rate": 2.9884601832276256e-06, | |
| "loss": 0.7347, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.8423633379824447, | |
| "grad_norm": 21.52046775817871, | |
| "learning_rate": 2.984055673009162e-06, | |
| "loss": 0.6925, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.844071985726831, | |
| "grad_norm": 26.47010040283203, | |
| "learning_rate": 2.9796511627906977e-06, | |
| "loss": 0.6887, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.8457806334712172, | |
| "grad_norm": 31.673635482788086, | |
| "learning_rate": 2.975246652572234e-06, | |
| "loss": 0.7223, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.8474892812156035, | |
| "grad_norm": 24.043643951416016, | |
| "learning_rate": 2.9708421423537703e-06, | |
| "loss": 0.7438, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.8491979289599898, | |
| "grad_norm": 21.57198715209961, | |
| "learning_rate": 2.9664376321353065e-06, | |
| "loss": 0.7187, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.8509065767043761, | |
| "grad_norm": 27.323469161987305, | |
| "learning_rate": 2.962033121916843e-06, | |
| "loss": 0.7423, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.8526152244487625, | |
| "grad_norm": 29.45259666442871, | |
| "learning_rate": 2.9576286116983795e-06, | |
| "loss": 0.7215, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.8543238721931488, | |
| "grad_norm": 25.042516708374023, | |
| "learning_rate": 2.9532241014799158e-06, | |
| "loss": 0.7226, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.856032519937535, | |
| "grad_norm": 20.377517700195312, | |
| "learning_rate": 2.948819591261452e-06, | |
| "loss": 0.7774, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.8577411676819213, | |
| "grad_norm": 19.54035758972168, | |
| "learning_rate": 2.9444150810429883e-06, | |
| "loss": 0.7994, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.8594498154263076, | |
| "grad_norm": 23.07032012939453, | |
| "learning_rate": 2.9400105708245246e-06, | |
| "loss": 0.7022, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.8611584631706939, | |
| "grad_norm": 23.482563018798828, | |
| "learning_rate": 2.935606060606061e-06, | |
| "loss": 0.7228, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.8628671109150802, | |
| "grad_norm": 20.6116886138916, | |
| "learning_rate": 2.9312015503875975e-06, | |
| "loss": 0.6769, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.8645757586594665, | |
| "grad_norm": 23.133941650390625, | |
| "learning_rate": 2.9267970401691338e-06, | |
| "loss": 0.7216, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.8662844064038528, | |
| "grad_norm": 19.501455307006836, | |
| "learning_rate": 2.9223925299506696e-06, | |
| "loss": 0.7417, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.8679930541482391, | |
| "grad_norm": 20.669921875, | |
| "learning_rate": 2.917988019732206e-06, | |
| "loss": 0.7187, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.8697017018926254, | |
| "grad_norm": 24.454565048217773, | |
| "learning_rate": 2.913583509513742e-06, | |
| "loss": 0.6937, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.8714103496370117, | |
| "grad_norm": 26.569597244262695, | |
| "learning_rate": 2.9091789992952784e-06, | |
| "loss": 0.7769, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.873118997381398, | |
| "grad_norm": 23.066076278686523, | |
| "learning_rate": 2.9047744890768147e-06, | |
| "loss": 0.7607, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.8748276451257843, | |
| "grad_norm": 25.31006622314453, | |
| "learning_rate": 2.900369978858351e-06, | |
| "loss": 0.692, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.8765362928701705, | |
| "grad_norm": 24.027446746826172, | |
| "learning_rate": 2.8959654686398876e-06, | |
| "loss": 0.6777, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8782449406145569, | |
| "grad_norm": 21.320232391357422, | |
| "learning_rate": 2.891560958421424e-06, | |
| "loss": 0.7671, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.8799535883589432, | |
| "grad_norm": 22.37028694152832, | |
| "learning_rate": 2.88715644820296e-06, | |
| "loss": 0.7451, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8816622361033295, | |
| "grad_norm": 17.270870208740234, | |
| "learning_rate": 2.8827519379844964e-06, | |
| "loss": 0.7202, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8833708838477158, | |
| "grad_norm": 32.7978401184082, | |
| "learning_rate": 2.8783474277660327e-06, | |
| "loss": 0.6955, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8850795315921021, | |
| "grad_norm": 30.631633758544922, | |
| "learning_rate": 2.873942917547569e-06, | |
| "loss": 0.7657, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8867881793364883, | |
| "grad_norm": 21.487262725830078, | |
| "learning_rate": 2.8695384073291056e-06, | |
| "loss": 0.723, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8884968270808746, | |
| "grad_norm": 25.697208404541016, | |
| "learning_rate": 2.865133897110641e-06, | |
| "loss": 0.7678, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8902054748252609, | |
| "grad_norm": 17.121862411499023, | |
| "learning_rate": 2.8607293868921778e-06, | |
| "loss": 0.736, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8919141225696473, | |
| "grad_norm": 22.777664184570312, | |
| "learning_rate": 2.856324876673714e-06, | |
| "loss": 0.6902, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.8936227703140336, | |
| "grad_norm": 21.997692108154297, | |
| "learning_rate": 2.8519203664552503e-06, | |
| "loss": 0.7177, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8953314180584199, | |
| "grad_norm": 27.831954956054688, | |
| "learning_rate": 2.8475158562367866e-06, | |
| "loss": 0.7061, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.8970400658028062, | |
| "grad_norm": 20.20841407775879, | |
| "learning_rate": 2.843111346018323e-06, | |
| "loss": 0.7103, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8987487135471924, | |
| "grad_norm": 22.670791625976562, | |
| "learning_rate": 2.838706835799859e-06, | |
| "loss": 0.6827, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.9004573612915787, | |
| "grad_norm": 19.863046646118164, | |
| "learning_rate": 2.8343023255813958e-06, | |
| "loss": 0.7659, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.902166009035965, | |
| "grad_norm": 21.82082176208496, | |
| "learning_rate": 2.829897815362932e-06, | |
| "loss": 0.6768, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.9038746567803514, | |
| "grad_norm": 20.27167320251465, | |
| "learning_rate": 2.8254933051444683e-06, | |
| "loss": 0.7313, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.9055833045247377, | |
| "grad_norm": 23.12434196472168, | |
| "learning_rate": 2.8210887949260046e-06, | |
| "loss": 0.7167, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.907291952269124, | |
| "grad_norm": 28.580188751220703, | |
| "learning_rate": 2.816684284707541e-06, | |
| "loss": 0.707, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.9090006000135102, | |
| "grad_norm": 21.65957260131836, | |
| "learning_rate": 2.812279774489077e-06, | |
| "loss": 0.7243, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.9107092477578965, | |
| "grad_norm": 18.040910720825195, | |
| "learning_rate": 2.807875264270613e-06, | |
| "loss": 0.714, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.9124178955022828, | |
| "grad_norm": 25.710729598999023, | |
| "learning_rate": 2.803470754052149e-06, | |
| "loss": 0.7092, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.9141265432466691, | |
| "grad_norm": 23.372407913208008, | |
| "learning_rate": 2.799066243833686e-06, | |
| "loss": 0.6511, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.9158351909910555, | |
| "grad_norm": 24.783931732177734, | |
| "learning_rate": 2.794661733615222e-06, | |
| "loss": 0.6906, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.9175438387354418, | |
| "grad_norm": 25.27882194519043, | |
| "learning_rate": 2.7902572233967584e-06, | |
| "loss": 0.686, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.919252486479828, | |
| "grad_norm": 22.388492584228516, | |
| "learning_rate": 2.7858527131782947e-06, | |
| "loss": 0.6987, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.9209611342242143, | |
| "grad_norm": 20.66554832458496, | |
| "learning_rate": 2.781448202959831e-06, | |
| "loss": 0.6719, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.9226697819686006, | |
| "grad_norm": 21.613603591918945, | |
| "learning_rate": 2.7770436927413672e-06, | |
| "loss": 0.7096, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.9243784297129869, | |
| "grad_norm": 22.45414161682129, | |
| "learning_rate": 2.772639182522904e-06, | |
| "loss": 0.6965, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.9260870774573732, | |
| "grad_norm": 22.07771110534668, | |
| "learning_rate": 2.76823467230444e-06, | |
| "loss": 0.6987, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.9277957252017595, | |
| "grad_norm": 22.09317970275879, | |
| "learning_rate": 2.7638301620859764e-06, | |
| "loss": 0.7033, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.9295043729461459, | |
| "grad_norm": 22.29112434387207, | |
| "learning_rate": 2.7594256518675127e-06, | |
| "loss": 0.7126, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.9312130206905321, | |
| "grad_norm": 25.35603141784668, | |
| "learning_rate": 2.755021141649049e-06, | |
| "loss": 0.6872, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.9329216684349184, | |
| "grad_norm": 26.43601417541504, | |
| "learning_rate": 2.750616631430585e-06, | |
| "loss": 0.6884, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.9346303161793047, | |
| "grad_norm": 22.09392738342285, | |
| "learning_rate": 2.746212121212121e-06, | |
| "loss": 0.7547, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.936338963923691, | |
| "grad_norm": 18.14749526977539, | |
| "learning_rate": 2.7418076109936578e-06, | |
| "loss": 0.7237, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.9380476116680773, | |
| "grad_norm": 25.575664520263672, | |
| "learning_rate": 2.737403100775194e-06, | |
| "loss": 0.7274, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.9397562594124635, | |
| "grad_norm": 30.890182495117188, | |
| "learning_rate": 2.7329985905567303e-06, | |
| "loss": 0.657, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.9414649071568499, | |
| "grad_norm": 25.72110939025879, | |
| "learning_rate": 2.7285940803382666e-06, | |
| "loss": 0.6839, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.9431735549012362, | |
| "grad_norm": 19.578693389892578, | |
| "learning_rate": 2.724189570119803e-06, | |
| "loss": 0.7292, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.9448822026456225, | |
| "grad_norm": 19.946809768676758, | |
| "learning_rate": 2.719785059901339e-06, | |
| "loss": 0.7023, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.9465908503900088, | |
| "grad_norm": 29.049034118652344, | |
| "learning_rate": 2.7153805496828758e-06, | |
| "loss": 0.6711, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.9482994981343951, | |
| "grad_norm": 26.28841781616211, | |
| "learning_rate": 2.710976039464412e-06, | |
| "loss": 0.6878, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.9500081458787814, | |
| "grad_norm": 28.58267593383789, | |
| "learning_rate": 2.7065715292459483e-06, | |
| "loss": 0.6849, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.9517167936231676, | |
| "grad_norm": 23.82330894470215, | |
| "learning_rate": 2.7021670190274846e-06, | |
| "loss": 0.6599, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.9534254413675539, | |
| "grad_norm": 23.920379638671875, | |
| "learning_rate": 2.697762508809021e-06, | |
| "loss": 0.7053, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.9551340891119403, | |
| "grad_norm": 21.284543991088867, | |
| "learning_rate": 2.6933579985905567e-06, | |
| "loss": 0.6852, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.9568427368563266, | |
| "grad_norm": 25.53280258178711, | |
| "learning_rate": 2.688953488372093e-06, | |
| "loss": 0.7453, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.9585513846007129, | |
| "grad_norm": 25.06231689453125, | |
| "learning_rate": 2.684548978153629e-06, | |
| "loss": 0.7138, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.9602600323450992, | |
| "grad_norm": 23.394676208496094, | |
| "learning_rate": 2.680144467935166e-06, | |
| "loss": 0.7542, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.9619686800894854, | |
| "grad_norm": 18.197351455688477, | |
| "learning_rate": 2.675739957716702e-06, | |
| "loss": 0.6856, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.9636773278338717, | |
| "grad_norm": 20.14853286743164, | |
| "learning_rate": 2.6713354474982384e-06, | |
| "loss": 0.7383, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.965385975578258, | |
| "grad_norm": 19.874074935913086, | |
| "learning_rate": 2.6669309372797747e-06, | |
| "loss": 0.7068, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.9670946233226444, | |
| "grad_norm": 21.235719680786133, | |
| "learning_rate": 2.662526427061311e-06, | |
| "loss": 0.7039, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.9688032710670307, | |
| "grad_norm": 24.528348922729492, | |
| "learning_rate": 2.6581219168428472e-06, | |
| "loss": 0.7245, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.970511918811417, | |
| "grad_norm": 23.650028228759766, | |
| "learning_rate": 2.653717406624384e-06, | |
| "loss": 0.6733, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.9722205665558032, | |
| "grad_norm": 24.60836410522461, | |
| "learning_rate": 2.64931289640592e-06, | |
| "loss": 0.6852, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.9739292143001895, | |
| "grad_norm": 21.282657623291016, | |
| "learning_rate": 2.6449083861874564e-06, | |
| "loss": 0.6296, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9756378620445758, | |
| "grad_norm": 25.645389556884766, | |
| "learning_rate": 2.6405038759689927e-06, | |
| "loss": 0.716, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.9773465097889621, | |
| "grad_norm": 27.467487335205078, | |
| "learning_rate": 2.6360993657505286e-06, | |
| "loss": 0.6401, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.9790551575333485, | |
| "grad_norm": 24.290742874145508, | |
| "learning_rate": 2.631694855532065e-06, | |
| "loss": 0.7524, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9807638052777348, | |
| "grad_norm": 25.512126922607422, | |
| "learning_rate": 2.627290345313601e-06, | |
| "loss": 0.7001, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.982472453022121, | |
| "grad_norm": 25.331077575683594, | |
| "learning_rate": 2.6228858350951373e-06, | |
| "loss": 0.7483, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.9841811007665073, | |
| "grad_norm": 27.04343032836914, | |
| "learning_rate": 2.618481324876674e-06, | |
| "loss": 0.6901, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9858897485108936, | |
| "grad_norm": 22.927780151367188, | |
| "learning_rate": 2.6140768146582103e-06, | |
| "loss": 0.6261, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.9875983962552799, | |
| "grad_norm": 19.579212188720703, | |
| "learning_rate": 2.6096723044397466e-06, | |
| "loss": 0.6957, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.9893070439996662, | |
| "grad_norm": 29.58092498779297, | |
| "learning_rate": 2.605267794221283e-06, | |
| "loss": 0.7329, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9910156917440525, | |
| "grad_norm": 17.51485252380371, | |
| "learning_rate": 2.600863284002819e-06, | |
| "loss": 0.6811, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9927243394884389, | |
| "grad_norm": 24.039936065673828, | |
| "learning_rate": 2.5964587737843554e-06, | |
| "loss": 0.7713, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9944329872328251, | |
| "grad_norm": 21.120576858520508, | |
| "learning_rate": 2.592054263565892e-06, | |
| "loss": 0.675, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9961416349772114, | |
| "grad_norm": 21.085262298583984, | |
| "learning_rate": 2.5876497533474283e-06, | |
| "loss": 0.6972, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9978502827215977, | |
| "grad_norm": 23.86699104309082, | |
| "learning_rate": 2.5832452431289646e-06, | |
| "loss": 0.7248, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.999558930465984, | |
| "grad_norm": 22.0477237701416, | |
| "learning_rate": 2.5788407329105004e-06, | |
| "loss": 0.6543, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9999006600148612, | |
| "eval_loss": 0.848216712474823, | |
| "eval_runtime": 139.044, | |
| "eval_samples_per_second": 70.906, | |
| "eval_steps_per_second": 8.868, | |
| "step": 5852 | |
| }, | |
| { | |
| "epoch": 1.001366918195509, | |
| "grad_norm": 17.0991268157959, | |
| "learning_rate": 2.5744362226920367e-06, | |
| "loss": 0.6552, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.0030755659398953, | |
| "grad_norm": 25.986722946166992, | |
| "learning_rate": 2.570031712473573e-06, | |
| "loss": 0.4587, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.0047842136842817, | |
| "grad_norm": 30.915128707885742, | |
| "learning_rate": 2.5656272022551092e-06, | |
| "loss": 0.429, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.006492861428668, | |
| "grad_norm": 25.264280319213867, | |
| "learning_rate": 2.5612226920366455e-06, | |
| "loss": 0.4217, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.0082015091730543, | |
| "grad_norm": 30.28125, | |
| "learning_rate": 2.556818181818182e-06, | |
| "loss": 0.4479, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.0099101569174405, | |
| "grad_norm": 22.753318786621094, | |
| "learning_rate": 2.5524136715997184e-06, | |
| "loss": 0.4314, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.0116188046618269, | |
| "grad_norm": 31.079761505126953, | |
| "learning_rate": 2.5480091613812547e-06, | |
| "loss": 0.4476, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.013327452406213, | |
| "grad_norm": 23.833829879760742, | |
| "learning_rate": 2.543604651162791e-06, | |
| "loss": 0.4658, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.0150361001505994, | |
| "grad_norm": 29.113054275512695, | |
| "learning_rate": 2.5392001409443272e-06, | |
| "loss": 0.4135, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.0167447478949858, | |
| "grad_norm": 33.021793365478516, | |
| "learning_rate": 2.5347956307258635e-06, | |
| "loss": 0.455, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.018453395639372, | |
| "grad_norm": 25.65928840637207, | |
| "learning_rate": 2.5303911205074e-06, | |
| "loss": 0.4224, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.0201620433837584, | |
| "grad_norm": 29.965845108032227, | |
| "learning_rate": 2.5259866102889365e-06, | |
| "loss": 0.4035, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.0218706911281445, | |
| "grad_norm": 27.758283615112305, | |
| "learning_rate": 2.5215821000704723e-06, | |
| "loss": 0.4181, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.023579338872531, | |
| "grad_norm": 27.05050277709961, | |
| "learning_rate": 2.5171775898520086e-06, | |
| "loss": 0.4385, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.0252879866169171, | |
| "grad_norm": 32.60606002807617, | |
| "learning_rate": 2.512773079633545e-06, | |
| "loss": 0.4366, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0269966343613035, | |
| "grad_norm": 26.773223876953125, | |
| "learning_rate": 2.508368569415081e-06, | |
| "loss": 0.3887, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.02870528210569, | |
| "grad_norm": 28.4731502532959, | |
| "learning_rate": 2.5039640591966174e-06, | |
| "loss": 0.4236, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.030413929850076, | |
| "grad_norm": 27.751144409179688, | |
| "learning_rate": 2.4995595489781536e-06, | |
| "loss": 0.4405, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.0321225775944625, | |
| "grad_norm": 30.297574996948242, | |
| "learning_rate": 2.4951550387596903e-06, | |
| "loss": 0.4587, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.0338312253388486, | |
| "grad_norm": 27.601472854614258, | |
| "learning_rate": 2.4907505285412266e-06, | |
| "loss": 0.4233, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.035539873083235, | |
| "grad_norm": 28.992273330688477, | |
| "learning_rate": 2.486346018322763e-06, | |
| "loss": 0.3768, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.0372485208276212, | |
| "grad_norm": 30.652511596679688, | |
| "learning_rate": 2.481941508104299e-06, | |
| "loss": 0.4453, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.0389571685720076, | |
| "grad_norm": 27.534353256225586, | |
| "learning_rate": 2.4775369978858354e-06, | |
| "loss": 0.4561, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.0406658163163938, | |
| "grad_norm": 33.124420166015625, | |
| "learning_rate": 2.4731324876673716e-06, | |
| "loss": 0.4058, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.0423744640607802, | |
| "grad_norm": 27.169292449951172, | |
| "learning_rate": 2.468727977448908e-06, | |
| "loss": 0.3672, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.0440831118051666, | |
| "grad_norm": 34.9072151184082, | |
| "learning_rate": 2.464323467230444e-06, | |
| "loss": 0.3951, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.0457917595495527, | |
| "grad_norm": 19.93570899963379, | |
| "learning_rate": 2.4599189570119804e-06, | |
| "loss": 0.4027, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.0475004072939391, | |
| "grad_norm": 20.85097312927246, | |
| "learning_rate": 2.4555144467935167e-06, | |
| "loss": 0.3867, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.0492090550383253, | |
| "grad_norm": 39.88778305053711, | |
| "learning_rate": 2.451109936575053e-06, | |
| "loss": 0.4293, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.0509177027827117, | |
| "grad_norm": 24.23454475402832, | |
| "learning_rate": 2.4467054263565892e-06, | |
| "loss": 0.4324, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.0526263505270979, | |
| "grad_norm": 26.025646209716797, | |
| "learning_rate": 2.4423009161381255e-06, | |
| "loss": 0.4314, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.0543349982714842, | |
| "grad_norm": 32.79511260986328, | |
| "learning_rate": 2.4378964059196618e-06, | |
| "loss": 0.3892, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.0560436460158706, | |
| "grad_norm": 29.5976505279541, | |
| "learning_rate": 2.4334918957011984e-06, | |
| "loss": 0.4593, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.0577522937602568, | |
| "grad_norm": 31.08228874206543, | |
| "learning_rate": 2.4290873854827347e-06, | |
| "loss": 0.3868, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.0594609415046432, | |
| "grad_norm": 32.12119674682617, | |
| "learning_rate": 2.424682875264271e-06, | |
| "loss": 0.4333, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.0611695892490294, | |
| "grad_norm": 41.33872985839844, | |
| "learning_rate": 2.420278365045807e-06, | |
| "loss": 0.4146, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.0628782369934158, | |
| "grad_norm": 41.04495620727539, | |
| "learning_rate": 2.4158738548273435e-06, | |
| "loss": 0.465, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.064586884737802, | |
| "grad_norm": 36.077674865722656, | |
| "learning_rate": 2.4114693446088798e-06, | |
| "loss": 0.4483, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.0662955324821883, | |
| "grad_norm": 27.00971221923828, | |
| "learning_rate": 2.407064834390416e-06, | |
| "loss": 0.4006, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.0680041802265747, | |
| "grad_norm": 26.599790573120117, | |
| "learning_rate": 2.4026603241719523e-06, | |
| "loss": 0.4588, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.069712827970961, | |
| "grad_norm": 32.89334487915039, | |
| "learning_rate": 2.3982558139534886e-06, | |
| "loss": 0.4048, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.0714214757153473, | |
| "grad_norm": 28.60340118408203, | |
| "learning_rate": 2.393851303735025e-06, | |
| "loss": 0.4211, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.0731301234597335, | |
| "grad_norm": 33.43773651123047, | |
| "learning_rate": 2.389446793516561e-06, | |
| "loss": 0.449, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.0748387712041199, | |
| "grad_norm": 24.94864273071289, | |
| "learning_rate": 2.3850422832980974e-06, | |
| "loss": 0.4117, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.076547418948506, | |
| "grad_norm": 40.33943557739258, | |
| "learning_rate": 2.3806377730796336e-06, | |
| "loss": 0.399, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.0782560666928924, | |
| "grad_norm": 21.64677619934082, | |
| "learning_rate": 2.37623326286117e-06, | |
| "loss": 0.3691, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.0799647144372788, | |
| "grad_norm": 24.09543800354004, | |
| "learning_rate": 2.3718287526427066e-06, | |
| "loss": 0.4372, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.081673362181665, | |
| "grad_norm": 38.64820861816406, | |
| "learning_rate": 2.367424242424243e-06, | |
| "loss": 0.4074, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.0833820099260514, | |
| "grad_norm": 25.985990524291992, | |
| "learning_rate": 2.3630197322057787e-06, | |
| "loss": 0.4339, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.0850906576704376, | |
| "grad_norm": 24.800357818603516, | |
| "learning_rate": 2.358615221987315e-06, | |
| "loss": 0.4702, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.086799305414824, | |
| "grad_norm": 21.988859176635742, | |
| "learning_rate": 2.3542107117688516e-06, | |
| "loss": 0.4824, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.0885079531592101, | |
| "grad_norm": 39.72243881225586, | |
| "learning_rate": 2.349806201550388e-06, | |
| "loss": 0.4188, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.0902166009035965, | |
| "grad_norm": 23.119239807128906, | |
| "learning_rate": 2.345401691331924e-06, | |
| "loss": 0.4573, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.091925248647983, | |
| "grad_norm": 20.915830612182617, | |
| "learning_rate": 2.3409971811134604e-06, | |
| "loss": 0.4256, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.093633896392369, | |
| "grad_norm": 25.44793701171875, | |
| "learning_rate": 2.3365926708949967e-06, | |
| "loss": 0.416, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.0953425441367555, | |
| "grad_norm": 34.23642349243164, | |
| "learning_rate": 2.332188160676533e-06, | |
| "loss": 0.3993, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.0970511918811416, | |
| "grad_norm": 26.873048782348633, | |
| "learning_rate": 2.3277836504580692e-06, | |
| "loss": 0.4516, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.098759839625528, | |
| "grad_norm": 29.566207885742188, | |
| "learning_rate": 2.3233791402396055e-06, | |
| "loss": 0.4385, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.1004684873699142, | |
| "grad_norm": 38.95769500732422, | |
| "learning_rate": 2.3189746300211418e-06, | |
| "loss": 0.4334, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.1021771351143006, | |
| "grad_norm": 22.23900604248047, | |
| "learning_rate": 2.314570119802678e-06, | |
| "loss": 0.4133, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.1038857828586868, | |
| "grad_norm": 32.352516174316406, | |
| "learning_rate": 2.3101656095842147e-06, | |
| "loss": 0.4445, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.1055944306030732, | |
| "grad_norm": 27.49093246459961, | |
| "learning_rate": 2.3057610993657506e-06, | |
| "loss": 0.458, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.1073030783474596, | |
| "grad_norm": 22.2708740234375, | |
| "learning_rate": 2.301356589147287e-06, | |
| "loss": 0.4362, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.1090117260918457, | |
| "grad_norm": 28.47286605834961, | |
| "learning_rate": 2.296952078928823e-06, | |
| "loss": 0.4389, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.1107203738362321, | |
| "grad_norm": 33.60470199584961, | |
| "learning_rate": 2.2925475687103598e-06, | |
| "loss": 0.434, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.1124290215806183, | |
| "grad_norm": 25.99380874633789, | |
| "learning_rate": 2.288143058491896e-06, | |
| "loss": 0.4128, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.1141376693250047, | |
| "grad_norm": 23.311767578125, | |
| "learning_rate": 2.2837385482734323e-06, | |
| "loss": 0.3983, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.115846317069391, | |
| "grad_norm": 42.49270248413086, | |
| "learning_rate": 2.2793340380549686e-06, | |
| "loss": 0.4046, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.1175549648137773, | |
| "grad_norm": 34.39870834350586, | |
| "learning_rate": 2.274929527836505e-06, | |
| "loss": 0.4225, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.1192636125581636, | |
| "grad_norm": 58.568817138671875, | |
| "learning_rate": 2.270525017618041e-06, | |
| "loss": 0.403, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.1209722603025498, | |
| "grad_norm": 40.097511291503906, | |
| "learning_rate": 2.2661205073995774e-06, | |
| "loss": 0.4099, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.1226809080469362, | |
| "grad_norm": 22.363500595092773, | |
| "learning_rate": 2.2617159971811136e-06, | |
| "loss": 0.4422, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.1243895557913224, | |
| "grad_norm": 29.2000732421875, | |
| "learning_rate": 2.25731148696265e-06, | |
| "loss": 0.4302, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.1260982035357088, | |
| "grad_norm": 25.729015350341797, | |
| "learning_rate": 2.252906976744186e-06, | |
| "loss": 0.4165, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.127806851280095, | |
| "grad_norm": 41.546085357666016, | |
| "learning_rate": 2.2485024665257224e-06, | |
| "loss": 0.4259, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.1295154990244813, | |
| "grad_norm": 27.8181095123291, | |
| "learning_rate": 2.2440979563072587e-06, | |
| "loss": 0.4418, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.1312241467688677, | |
| "grad_norm": 27.532690048217773, | |
| "learning_rate": 2.239693446088795e-06, | |
| "loss": 0.4072, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.132932794513254, | |
| "grad_norm": 38.94101333618164, | |
| "learning_rate": 2.2352889358703312e-06, | |
| "loss": 0.4003, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.1346414422576403, | |
| "grad_norm": 28.348133087158203, | |
| "learning_rate": 2.230884425651868e-06, | |
| "loss": 0.3975, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.1363500900020265, | |
| "grad_norm": 37.84804916381836, | |
| "learning_rate": 2.226479915433404e-06, | |
| "loss": 0.4254, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.1380587377464129, | |
| "grad_norm": 26.082874298095703, | |
| "learning_rate": 2.2220754052149404e-06, | |
| "loss": 0.3791, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.139767385490799, | |
| "grad_norm": 28.30021095275879, | |
| "learning_rate": 2.2176708949964763e-06, | |
| "loss": 0.4773, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.1414760332351854, | |
| "grad_norm": 23.014328002929688, | |
| "learning_rate": 2.213266384778013e-06, | |
| "loss": 0.4389, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.1431846809795718, | |
| "grad_norm": 22.675397872924805, | |
| "learning_rate": 2.2088618745595492e-06, | |
| "loss": 0.4072, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.144893328723958, | |
| "grad_norm": 37.76887893676758, | |
| "learning_rate": 2.2044573643410855e-06, | |
| "loss": 0.4555, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.1466019764683444, | |
| "grad_norm": 25.542505264282227, | |
| "learning_rate": 2.2000528541226218e-06, | |
| "loss": 0.421, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.1483106242127306, | |
| "grad_norm": 26.092363357543945, | |
| "learning_rate": 2.195648343904158e-06, | |
| "loss": 0.433, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.150019271957117, | |
| "grad_norm": 38.503875732421875, | |
| "learning_rate": 2.1912438336856943e-06, | |
| "loss": 0.4213, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.1517279197015031, | |
| "grad_norm": 29.79505157470703, | |
| "learning_rate": 2.1868393234672306e-06, | |
| "loss": 0.3697, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.1534365674458895, | |
| "grad_norm": 31.861398696899414, | |
| "learning_rate": 2.182434813248767e-06, | |
| "loss": 0.4401, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.155145215190276, | |
| "grad_norm": 25.554759979248047, | |
| "learning_rate": 2.178030303030303e-06, | |
| "loss": 0.444, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.156853862934662, | |
| "grad_norm": 25.79574966430664, | |
| "learning_rate": 2.1736257928118394e-06, | |
| "loss": 0.4126, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.1585625106790485, | |
| "grad_norm": 30.038307189941406, | |
| "learning_rate": 2.169221282593376e-06, | |
| "loss": 0.4217, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.1602711584234346, | |
| "grad_norm": 29.3577823638916, | |
| "learning_rate": 2.1648167723749123e-06, | |
| "loss": 0.4702, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.161979806167821, | |
| "grad_norm": 28.816720962524414, | |
| "learning_rate": 2.160412262156448e-06, | |
| "loss": 0.4295, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.1636884539122072, | |
| "grad_norm": 27.419452667236328, | |
| "learning_rate": 2.1560077519379844e-06, | |
| "loss": 0.4234, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.1653971016565936, | |
| "grad_norm": 26.20050048828125, | |
| "learning_rate": 2.151603241719521e-06, | |
| "loss": 0.387, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.1671057494009798, | |
| "grad_norm": 25.682668685913086, | |
| "learning_rate": 2.1471987315010574e-06, | |
| "loss": 0.3946, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.1688143971453662, | |
| "grad_norm": 28.867799758911133, | |
| "learning_rate": 2.1427942212825936e-06, | |
| "loss": 0.4315, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.1705230448897526, | |
| "grad_norm": 27.76809310913086, | |
| "learning_rate": 2.13838971106413e-06, | |
| "loss": 0.4153, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.1722316926341387, | |
| "grad_norm": 55.45150375366211, | |
| "learning_rate": 2.133985200845666e-06, | |
| "loss": 0.4121, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.1739403403785251, | |
| "grad_norm": 35.20660400390625, | |
| "learning_rate": 2.1295806906272024e-06, | |
| "loss": 0.4489, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.1756489881229113, | |
| "grad_norm": 31.571155548095703, | |
| "learning_rate": 2.1251761804087387e-06, | |
| "loss": 0.4098, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.1773576358672977, | |
| "grad_norm": 39.88226318359375, | |
| "learning_rate": 2.120771670190275e-06, | |
| "loss": 0.3931, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.179066283611684, | |
| "grad_norm": 36.098209381103516, | |
| "learning_rate": 2.1163671599718112e-06, | |
| "loss": 0.3719, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.1807749313560703, | |
| "grad_norm": 27.275989532470703, | |
| "learning_rate": 2.1119626497533475e-06, | |
| "loss": 0.4229, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.1824835791004566, | |
| "grad_norm": 33.59117126464844, | |
| "learning_rate": 2.107558139534884e-06, | |
| "loss": 0.4148, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.1841922268448428, | |
| "grad_norm": 34.12028884887695, | |
| "learning_rate": 2.10315362931642e-06, | |
| "loss": 0.4032, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.1859008745892292, | |
| "grad_norm": 30.586210250854492, | |
| "learning_rate": 2.0987491190979563e-06, | |
| "loss": 0.3997, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.1876095223336154, | |
| "grad_norm": 37.81381607055664, | |
| "learning_rate": 2.0943446088794926e-06, | |
| "loss": 0.355, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.1893181700780018, | |
| "grad_norm": 24.5543270111084, | |
| "learning_rate": 2.0899400986610292e-06, | |
| "loss": 0.4053, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.191026817822388, | |
| "grad_norm": 30.73529052734375, | |
| "learning_rate": 2.0855355884425655e-06, | |
| "loss": 0.366, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.1927354655667743, | |
| "grad_norm": 39.325965881347656, | |
| "learning_rate": 2.0811310782241018e-06, | |
| "loss": 0.4275, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.1944441133111607, | |
| "grad_norm": 23.889474868774414, | |
| "learning_rate": 2.0767265680056376e-06, | |
| "loss": 0.3998, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.196152761055547, | |
| "grad_norm": 40.00243377685547, | |
| "learning_rate": 2.0723220577871743e-06, | |
| "loss": 0.4105, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1978614087999333, | |
| "grad_norm": 29.528654098510742, | |
| "learning_rate": 2.0679175475687106e-06, | |
| "loss": 0.3773, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.1995700565443195, | |
| "grad_norm": 36.32196044921875, | |
| "learning_rate": 2.063513037350247e-06, | |
| "loss": 0.3841, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.2012787042887059, | |
| "grad_norm": 27.748289108276367, | |
| "learning_rate": 2.059108527131783e-06, | |
| "loss": 0.4538, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.202987352033092, | |
| "grad_norm": 28.619266510009766, | |
| "learning_rate": 2.0547040169133194e-06, | |
| "loss": 0.4644, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.2046959997774784, | |
| "grad_norm": 35.11726379394531, | |
| "learning_rate": 2.0502995066948556e-06, | |
| "loss": 0.4483, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.2064046475218648, | |
| "grad_norm": 26.959434509277344, | |
| "learning_rate": 2.045894996476392e-06, | |
| "loss": 0.4232, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.208113295266251, | |
| "grad_norm": 28.872108459472656, | |
| "learning_rate": 2.041490486257928e-06, | |
| "loss": 0.4432, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.2098219430106374, | |
| "grad_norm": 28.600481033325195, | |
| "learning_rate": 2.0370859760394644e-06, | |
| "loss": 0.4602, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.2115305907550236, | |
| "grad_norm": 29.9169864654541, | |
| "learning_rate": 2.0326814658210007e-06, | |
| "loss": 0.4027, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.21323923849941, | |
| "grad_norm": 25.34281349182129, | |
| "learning_rate": 2.0282769556025374e-06, | |
| "loss": 0.3764, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.2149478862437961, | |
| "grad_norm": 26.075227737426758, | |
| "learning_rate": 2.0238724453840736e-06, | |
| "loss": 0.4072, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.2166565339881825, | |
| "grad_norm": 36.243865966796875, | |
| "learning_rate": 2.0194679351656095e-06, | |
| "loss": 0.4358, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.218365181732569, | |
| "grad_norm": 30.791261672973633, | |
| "learning_rate": 2.0150634249471458e-06, | |
| "loss": 0.4268, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.220073829476955, | |
| "grad_norm": 31.967105865478516, | |
| "learning_rate": 2.0106589147286824e-06, | |
| "loss": 0.4535, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.2217824772213415, | |
| "grad_norm": 22.780460357666016, | |
| "learning_rate": 2.0062544045102187e-06, | |
| "loss": 0.3439, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.2234911249657276, | |
| "grad_norm": 24.721939086914062, | |
| "learning_rate": 2.001849894291755e-06, | |
| "loss": 0.3946, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.225199772710114, | |
| "grad_norm": 31.781126022338867, | |
| "learning_rate": 1.9974453840732912e-06, | |
| "loss": 0.4221, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.2269084204545002, | |
| "grad_norm": 39.08473587036133, | |
| "learning_rate": 1.9930408738548275e-06, | |
| "loss": 0.4108, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.2286170681988866, | |
| "grad_norm": 32.67459487915039, | |
| "learning_rate": 1.9886363636363638e-06, | |
| "loss": 0.4214, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.2303257159432728, | |
| "grad_norm": 36.37043762207031, | |
| "learning_rate": 1.9842318534179e-06, | |
| "loss": 0.3654, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.2320343636876592, | |
| "grad_norm": 30.632551193237305, | |
| "learning_rate": 1.9798273431994363e-06, | |
| "loss": 0.3435, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.2337430114320456, | |
| "grad_norm": 27.24967384338379, | |
| "learning_rate": 1.9754228329809726e-06, | |
| "loss": 0.3853, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.2354516591764317, | |
| "grad_norm": 34.78539276123047, | |
| "learning_rate": 1.971018322762509e-06, | |
| "loss": 0.4485, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.2371603069208181, | |
| "grad_norm": 30.56952476501465, | |
| "learning_rate": 1.9666138125440455e-06, | |
| "loss": 0.4489, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.2388689546652043, | |
| "grad_norm": 25.958833694458008, | |
| "learning_rate": 1.9622093023255814e-06, | |
| "loss": 0.3734, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.2405776024095907, | |
| "grad_norm": 21.95493507385254, | |
| "learning_rate": 1.9578047921071176e-06, | |
| "loss": 0.3972, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.242286250153977, | |
| "grad_norm": 30.268014907836914, | |
| "learning_rate": 1.9534002818886543e-06, | |
| "loss": 0.3786, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.2439948978983633, | |
| "grad_norm": 38.55772399902344, | |
| "learning_rate": 1.9489957716701906e-06, | |
| "loss": 0.4305, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.2457035456427497, | |
| "grad_norm": 34.68306350708008, | |
| "learning_rate": 1.944591261451727e-06, | |
| "loss": 0.4009, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.2474121933871358, | |
| "grad_norm": 31.926652908325195, | |
| "learning_rate": 1.940186751233263e-06, | |
| "loss": 0.3752, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.2491208411315222, | |
| "grad_norm": 25.892805099487305, | |
| "learning_rate": 1.9357822410147994e-06, | |
| "loss": 0.3809, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.2508294888759084, | |
| "grad_norm": 34.08556365966797, | |
| "learning_rate": 1.9313777307963356e-06, | |
| "loss": 0.4777, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.2525381366202948, | |
| "grad_norm": 22.77074432373047, | |
| "learning_rate": 1.926973220577872e-06, | |
| "loss": 0.4021, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.254246784364681, | |
| "grad_norm": 40.69630432128906, | |
| "learning_rate": 1.922568710359408e-06, | |
| "loss": 0.4507, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.2559554321090673, | |
| "grad_norm": 26.732057571411133, | |
| "learning_rate": 1.9181642001409444e-06, | |
| "loss": 0.3927, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.2576640798534537, | |
| "grad_norm": 27.998336791992188, | |
| "learning_rate": 1.9137596899224807e-06, | |
| "loss": 0.4073, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.25937272759784, | |
| "grad_norm": 29.810136795043945, | |
| "learning_rate": 1.9093551797040174e-06, | |
| "loss": 0.4041, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.2610813753422263, | |
| "grad_norm": 26.727005004882812, | |
| "learning_rate": 1.9049506694855532e-06, | |
| "loss": 0.3886, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.2627900230866125, | |
| "grad_norm": 36.07413101196289, | |
| "learning_rate": 1.9005461592670895e-06, | |
| "loss": 0.3573, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.2644986708309989, | |
| "grad_norm": 32.144283294677734, | |
| "learning_rate": 1.896141649048626e-06, | |
| "loss": 0.4074, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.2662073185753853, | |
| "grad_norm": 24.47068977355957, | |
| "learning_rate": 1.8917371388301622e-06, | |
| "loss": 0.3934, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.2679159663197714, | |
| "grad_norm": 29.83626365661621, | |
| "learning_rate": 1.8873326286116985e-06, | |
| "loss": 0.4235, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.2696246140641576, | |
| "grad_norm": 27.749542236328125, | |
| "learning_rate": 1.882928118393235e-06, | |
| "loss": 0.3754, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.271333261808544, | |
| "grad_norm": 25.998891830444336, | |
| "learning_rate": 1.8785236081747712e-06, | |
| "loss": 0.3815, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.2730419095529304, | |
| "grad_norm": 32.17466735839844, | |
| "learning_rate": 1.8741190979563073e-06, | |
| "loss": 0.4338, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.2747505572973166, | |
| "grad_norm": 28.768695831298828, | |
| "learning_rate": 1.8697145877378436e-06, | |
| "loss": 0.4261, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.276459205041703, | |
| "grad_norm": 29.64584732055664, | |
| "learning_rate": 1.86531007751938e-06, | |
| "loss": 0.3678, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.2781678527860891, | |
| "grad_norm": 32.0334587097168, | |
| "learning_rate": 1.8609055673009163e-06, | |
| "loss": 0.3834, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.2798765005304755, | |
| "grad_norm": 33.2336540222168, | |
| "learning_rate": 1.8565010570824526e-06, | |
| "loss": 0.4092, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.281585148274862, | |
| "grad_norm": 27.663143157958984, | |
| "learning_rate": 1.852096546863989e-06, | |
| "loss": 0.453, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.283293796019248, | |
| "grad_norm": 26.34569549560547, | |
| "learning_rate": 1.847692036645525e-06, | |
| "loss": 0.3915, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.2850024437636345, | |
| "grad_norm": 30.302072525024414, | |
| "learning_rate": 1.8432875264270614e-06, | |
| "loss": 0.4404, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.2867110915080207, | |
| "grad_norm": 29.25191879272461, | |
| "learning_rate": 1.8388830162085976e-06, | |
| "loss": 0.3705, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.288419739252407, | |
| "grad_norm": 25.125303268432617, | |
| "learning_rate": 1.8344785059901341e-06, | |
| "loss": 0.4077, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.2901283869967934, | |
| "grad_norm": 36.632869720458984, | |
| "learning_rate": 1.8300739957716704e-06, | |
| "loss": 0.4101, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.2918370347411796, | |
| "grad_norm": 34.67438507080078, | |
| "learning_rate": 1.8256694855532066e-06, | |
| "loss": 0.4094, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.2935456824855658, | |
| "grad_norm": 38.920654296875, | |
| "learning_rate": 1.8212649753347431e-06, | |
| "loss": 0.4094, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.2952543302299522, | |
| "grad_norm": 27.154075622558594, | |
| "learning_rate": 1.8168604651162792e-06, | |
| "loss": 0.3877, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.2969629779743386, | |
| "grad_norm": 28.526582717895508, | |
| "learning_rate": 1.8124559548978154e-06, | |
| "loss": 0.4418, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.2986716257187247, | |
| "grad_norm": 23.52269172668457, | |
| "learning_rate": 1.8080514446793517e-06, | |
| "loss": 0.3878, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.3003802734631111, | |
| "grad_norm": 24.462650299072266, | |
| "learning_rate": 1.8036469344608882e-06, | |
| "loss": 0.3884, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.3020889212074973, | |
| "grad_norm": 28.307111740112305, | |
| "learning_rate": 1.7992424242424244e-06, | |
| "loss": 0.3681, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.3037975689518837, | |
| "grad_norm": 27.19947624206543, | |
| "learning_rate": 1.7948379140239607e-06, | |
| "loss": 0.3905, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.30550621669627, | |
| "grad_norm": 40.48936080932617, | |
| "learning_rate": 1.7904334038054968e-06, | |
| "loss": 0.414, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.3072148644406563, | |
| "grad_norm": 30.751718521118164, | |
| "learning_rate": 1.7860288935870332e-06, | |
| "loss": 0.4488, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.3089235121850427, | |
| "grad_norm": 32.26466369628906, | |
| "learning_rate": 1.7816243833685695e-06, | |
| "loss": 0.4015, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.3106321599294288, | |
| "grad_norm": 32.198055267333984, | |
| "learning_rate": 1.7772198731501058e-06, | |
| "loss": 0.3984, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.3123408076738152, | |
| "grad_norm": 26.396881103515625, | |
| "learning_rate": 1.7728153629316422e-06, | |
| "loss": 0.3793, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.3140494554182014, | |
| "grad_norm": 37.478797912597656, | |
| "learning_rate": 1.7684108527131785e-06, | |
| "loss": 0.3862, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.3157581031625878, | |
| "grad_norm": 40.08991622924805, | |
| "learning_rate": 1.7640063424947148e-06, | |
| "loss": 0.4314, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.317466750906974, | |
| "grad_norm": 33.823116302490234, | |
| "learning_rate": 1.7596018322762508e-06, | |
| "loss": 0.3761, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.3191753986513604, | |
| "grad_norm": 30.793943405151367, | |
| "learning_rate": 1.7551973220577873e-06, | |
| "loss": 0.4022, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.3208840463957467, | |
| "grad_norm": 29.607755661010742, | |
| "learning_rate": 1.7507928118393236e-06, | |
| "loss": 0.4276, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.322592694140133, | |
| "grad_norm": 36.47589111328125, | |
| "learning_rate": 1.7463883016208598e-06, | |
| "loss": 0.3849, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.3243013418845193, | |
| "grad_norm": 29.826234817504883, | |
| "learning_rate": 1.7419837914023963e-06, | |
| "loss": 0.4171, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.3260099896289055, | |
| "grad_norm": 30.34208106994629, | |
| "learning_rate": 1.7375792811839326e-06, | |
| "loss": 0.4239, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.3277186373732919, | |
| "grad_norm": 32.37610626220703, | |
| "learning_rate": 1.7331747709654686e-06, | |
| "loss": 0.4417, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.3294272851176783, | |
| "grad_norm": 29.77751922607422, | |
| "learning_rate": 1.728770260747005e-06, | |
| "loss": 0.3529, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.3311359328620644, | |
| "grad_norm": 27.710689544677734, | |
| "learning_rate": 1.7243657505285414e-06, | |
| "loss": 0.396, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.3328445806064506, | |
| "grad_norm": 35.94044876098633, | |
| "learning_rate": 1.7199612403100776e-06, | |
| "loss": 0.4285, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.334553228350837, | |
| "grad_norm": 31.398242950439453, | |
| "learning_rate": 1.715556730091614e-06, | |
| "loss": 0.3621, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.3362618760952234, | |
| "grad_norm": 22.245264053344727, | |
| "learning_rate": 1.7111522198731504e-06, | |
| "loss": 0.4047, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.3379705238396096, | |
| "grad_norm": 25.29467010498047, | |
| "learning_rate": 1.7067477096546866e-06, | |
| "loss": 0.4099, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.339679171583996, | |
| "grad_norm": 28.86480140686035, | |
| "learning_rate": 1.7023431994362227e-06, | |
| "loss": 0.39, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.3413878193283821, | |
| "grad_norm": 32.23060607910156, | |
| "learning_rate": 1.697938689217759e-06, | |
| "loss": 0.4299, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.3430964670727685, | |
| "grad_norm": 37.11185836791992, | |
| "learning_rate": 1.6935341789992954e-06, | |
| "loss": 0.3409, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.344805114817155, | |
| "grad_norm": 31.354124069213867, | |
| "learning_rate": 1.6891296687808317e-06, | |
| "loss": 0.4066, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.346513762561541, | |
| "grad_norm": 29.411638259887695, | |
| "learning_rate": 1.684725158562368e-06, | |
| "loss": 0.4163, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.3482224103059275, | |
| "grad_norm": 29.95796775817871, | |
| "learning_rate": 1.6803206483439045e-06, | |
| "loss": 0.413, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.3499310580503137, | |
| "grad_norm": 26.26283073425293, | |
| "learning_rate": 1.6759161381254405e-06, | |
| "loss": 0.4122, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.3516397057947, | |
| "grad_norm": 23.130903244018555, | |
| "learning_rate": 1.6715116279069768e-06, | |
| "loss": 0.414, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.3533483535390864, | |
| "grad_norm": 33.57529830932617, | |
| "learning_rate": 1.667107117688513e-06, | |
| "loss": 0.4446, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.3550570012834726, | |
| "grad_norm": 27.545856475830078, | |
| "learning_rate": 1.6627026074700495e-06, | |
| "loss": 0.3671, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.3567656490278588, | |
| "grad_norm": 28.595279693603516, | |
| "learning_rate": 1.6582980972515858e-06, | |
| "loss": 0.3938, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.3584742967722452, | |
| "grad_norm": 34.10601806640625, | |
| "learning_rate": 1.653893587033122e-06, | |
| "loss": 0.4092, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.3601829445166316, | |
| "grad_norm": 36.68281936645508, | |
| "learning_rate": 1.6494890768146585e-06, | |
| "loss": 0.4359, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.3618915922610177, | |
| "grad_norm": 36.802757263183594, | |
| "learning_rate": 1.6450845665961946e-06, | |
| "loss": 0.3926, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.3636002400054041, | |
| "grad_norm": 38.538978576660156, | |
| "learning_rate": 1.6406800563777308e-06, | |
| "loss": 0.4068, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.3653088877497903, | |
| "grad_norm": 24.058565139770508, | |
| "learning_rate": 1.636275546159267e-06, | |
| "loss": 0.3744, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.3670175354941767, | |
| "grad_norm": 25.09589385986328, | |
| "learning_rate": 1.6318710359408036e-06, | |
| "loss": 0.4155, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.368726183238563, | |
| "grad_norm": 35.97821044921875, | |
| "learning_rate": 1.6274665257223398e-06, | |
| "loss": 0.4474, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.3704348309829493, | |
| "grad_norm": 30.579835891723633, | |
| "learning_rate": 1.6230620155038761e-06, | |
| "loss": 0.3728, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.3721434787273357, | |
| "grad_norm": 24.492128372192383, | |
| "learning_rate": 1.6186575052854122e-06, | |
| "loss": 0.4528, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.3738521264717218, | |
| "grad_norm": 29.143388748168945, | |
| "learning_rate": 1.6142529950669486e-06, | |
| "loss": 0.3942, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.3755607742161082, | |
| "grad_norm": 32.478759765625, | |
| "learning_rate": 1.609848484848485e-06, | |
| "loss": 0.3717, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.3772694219604944, | |
| "grad_norm": 30.298538208007812, | |
| "learning_rate": 1.6054439746300212e-06, | |
| "loss": 0.4021, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.3789780697048808, | |
| "grad_norm": 30.066699981689453, | |
| "learning_rate": 1.6010394644115576e-06, | |
| "loss": 0.3804, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.380686717449267, | |
| "grad_norm": 35.945133209228516, | |
| "learning_rate": 1.596634954193094e-06, | |
| "loss": 0.4372, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.3823953651936534, | |
| "grad_norm": 21.04485321044922, | |
| "learning_rate": 1.5922304439746302e-06, | |
| "loss": 0.3724, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.3841040129380398, | |
| "grad_norm": 27.374027252197266, | |
| "learning_rate": 1.5878259337561662e-06, | |
| "loss": 0.3829, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.385812660682426, | |
| "grad_norm": 27.289045333862305, | |
| "learning_rate": 1.5834214235377027e-06, | |
| "loss": 0.3279, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.3875213084268123, | |
| "grad_norm": 19.139402389526367, | |
| "learning_rate": 1.579016913319239e-06, | |
| "loss": 0.3983, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.3892299561711985, | |
| "grad_norm": 31.3995418548584, | |
| "learning_rate": 1.5746124031007752e-06, | |
| "loss": 0.4445, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.3909386039155849, | |
| "grad_norm": 23.96241569519043, | |
| "learning_rate": 1.5702078928823117e-06, | |
| "loss": 0.4473, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.3926472516599713, | |
| "grad_norm": 37.16488265991211, | |
| "learning_rate": 1.565803382663848e-06, | |
| "loss": 0.3865, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.3943558994043574, | |
| "grad_norm": 31.697296142578125, | |
| "learning_rate": 1.561398872445384e-06, | |
| "loss": 0.371, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.3960645471487436, | |
| "grad_norm": 24.636869430541992, | |
| "learning_rate": 1.5569943622269205e-06, | |
| "loss": 0.3392, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.39777319489313, | |
| "grad_norm": 36.1915168762207, | |
| "learning_rate": 1.5525898520084568e-06, | |
| "loss": 0.3992, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.3994818426375164, | |
| "grad_norm": 25.10267448425293, | |
| "learning_rate": 1.548185341789993e-06, | |
| "loss": 0.4454, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.4011904903819026, | |
| "grad_norm": 27.928958892822266, | |
| "learning_rate": 1.5437808315715295e-06, | |
| "loss": 0.3802, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.402899138126289, | |
| "grad_norm": 27.088727951049805, | |
| "learning_rate": 1.5393763213530658e-06, | |
| "loss": 0.3792, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.4046077858706751, | |
| "grad_norm": 28.89666175842285, | |
| "learning_rate": 1.534971811134602e-06, | |
| "loss": 0.3851, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.4063164336150615, | |
| "grad_norm": 35.841854095458984, | |
| "learning_rate": 1.530567300916138e-06, | |
| "loss": 0.4547, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.408025081359448, | |
| "grad_norm": 32.671783447265625, | |
| "learning_rate": 1.5261627906976746e-06, | |
| "loss": 0.3867, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.409733729103834, | |
| "grad_norm": 26.516185760498047, | |
| "learning_rate": 1.5217582804792108e-06, | |
| "loss": 0.4012, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.4114423768482205, | |
| "grad_norm": 37.364967346191406, | |
| "learning_rate": 1.5173537702607471e-06, | |
| "loss": 0.365, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.4131510245926067, | |
| "grad_norm": 27.502492904663086, | |
| "learning_rate": 1.5129492600422836e-06, | |
| "loss": 0.3835, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.414859672336993, | |
| "grad_norm": 30.40472412109375, | |
| "learning_rate": 1.5085447498238199e-06, | |
| "loss": 0.3815, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.4165683200813795, | |
| "grad_norm": 24.262475967407227, | |
| "learning_rate": 1.504140239605356e-06, | |
| "loss": 0.4278, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.4182769678257656, | |
| "grad_norm": 31.887592315673828, | |
| "learning_rate": 1.4997357293868922e-06, | |
| "loss": 0.4523, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.4199856155701518, | |
| "grad_norm": 25.971759796142578, | |
| "learning_rate": 1.4953312191684286e-06, | |
| "loss": 0.4228, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.4216942633145382, | |
| "grad_norm": 24.0732364654541, | |
| "learning_rate": 1.490926708949965e-06, | |
| "loss": 0.4284, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.4234029110589246, | |
| "grad_norm": 35.71511459350586, | |
| "learning_rate": 1.4865221987315012e-06, | |
| "loss": 0.3756, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.4251115588033108, | |
| "grad_norm": 25.345888137817383, | |
| "learning_rate": 1.4821176885130377e-06, | |
| "loss": 0.4182, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.4268202065476971, | |
| "grad_norm": 31.115188598632812, | |
| "learning_rate": 1.477713178294574e-06, | |
| "loss": 0.4363, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.4285288542920833, | |
| "grad_norm": 35.88517761230469, | |
| "learning_rate": 1.47330866807611e-06, | |
| "loss": 0.4125, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.4302375020364697, | |
| "grad_norm": 30.74094581604004, | |
| "learning_rate": 1.4689041578576462e-06, | |
| "loss": 0.3921, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.431946149780856, | |
| "grad_norm": 30.39889144897461, | |
| "learning_rate": 1.4644996476391827e-06, | |
| "loss": 0.4258, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.4336547975252423, | |
| "grad_norm": 30.968448638916016, | |
| "learning_rate": 1.460095137420719e-06, | |
| "loss": 0.3653, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.4353634452696287, | |
| "grad_norm": 29.428611755371094, | |
| "learning_rate": 1.4556906272022552e-06, | |
| "loss": 0.402, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.4370720930140148, | |
| "grad_norm": 29.114940643310547, | |
| "learning_rate": 1.4512861169837917e-06, | |
| "loss": 0.3934, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.4387807407584012, | |
| "grad_norm": 32.88404083251953, | |
| "learning_rate": 1.4468816067653278e-06, | |
| "loss": 0.401, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.4404893885027874, | |
| "grad_norm": 32.356021881103516, | |
| "learning_rate": 1.442477096546864e-06, | |
| "loss": 0.3543, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.4421980362471738, | |
| "grad_norm": 33.27191925048828, | |
| "learning_rate": 1.4380725863284003e-06, | |
| "loss": 0.4492, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.44390668399156, | |
| "grad_norm": 33.288536071777344, | |
| "learning_rate": 1.4336680761099368e-06, | |
| "loss": 0.3954, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.4456153317359464, | |
| "grad_norm": 30.489593505859375, | |
| "learning_rate": 1.429263565891473e-06, | |
| "loss": 0.4248, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.4473239794803328, | |
| "grad_norm": 38.16218566894531, | |
| "learning_rate": 1.4248590556730093e-06, | |
| "loss": 0.4624, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.449032627224719, | |
| "grad_norm": 25.624847412109375, | |
| "learning_rate": 1.4204545454545458e-06, | |
| "loss": 0.4115, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.4507412749691053, | |
| "grad_norm": 34.9322395324707, | |
| "learning_rate": 1.4160500352360818e-06, | |
| "loss": 0.4179, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.4524499227134915, | |
| "grad_norm": 31.277803421020508, | |
| "learning_rate": 1.4116455250176181e-06, | |
| "loss": 0.3662, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.4541585704578779, | |
| "grad_norm": 32.513633728027344, | |
| "learning_rate": 1.4072410147991544e-06, | |
| "loss": 0.4179, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.4558672182022643, | |
| "grad_norm": 31.79774284362793, | |
| "learning_rate": 1.4028365045806909e-06, | |
| "loss": 0.4221, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.4575758659466505, | |
| "grad_norm": 35.4056282043457, | |
| "learning_rate": 1.3984319943622271e-06, | |
| "loss": 0.4233, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.4592845136910366, | |
| "grad_norm": 32.08757019042969, | |
| "learning_rate": 1.3940274841437634e-06, | |
| "loss": 0.4619, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.460993161435423, | |
| "grad_norm": 31.21336555480957, | |
| "learning_rate": 1.3896229739252994e-06, | |
| "loss": 0.3375, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.4627018091798094, | |
| "grad_norm": 31.44502067565918, | |
| "learning_rate": 1.385218463706836e-06, | |
| "loss": 0.4121, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.4644104569241956, | |
| "grad_norm": 21.62190818786621, | |
| "learning_rate": 1.3808139534883722e-06, | |
| "loss": 0.3727, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.466119104668582, | |
| "grad_norm": 33.74460983276367, | |
| "learning_rate": 1.3764094432699084e-06, | |
| "loss": 0.426, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.4678277524129681, | |
| "grad_norm": 22.65791130065918, | |
| "learning_rate": 1.372004933051445e-06, | |
| "loss": 0.3669, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.4695364001573545, | |
| "grad_norm": 38.821624755859375, | |
| "learning_rate": 1.3676004228329812e-06, | |
| "loss": 0.3906, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.471245047901741, | |
| "grad_norm": 38.148475646972656, | |
| "learning_rate": 1.3631959126145175e-06, | |
| "loss": 0.4369, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.472953695646127, | |
| "grad_norm": 25.316579818725586, | |
| "learning_rate": 1.3587914023960535e-06, | |
| "loss": 0.3884, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.4746623433905135, | |
| "grad_norm": 40.01092529296875, | |
| "learning_rate": 1.35438689217759e-06, | |
| "loss": 0.4031, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.4763709911348997, | |
| "grad_norm": 23.749156951904297, | |
| "learning_rate": 1.3499823819591262e-06, | |
| "loss": 0.4174, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.478079638879286, | |
| "grad_norm": 25.226078033447266, | |
| "learning_rate": 1.3455778717406625e-06, | |
| "loss": 0.3646, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.4797882866236725, | |
| "grad_norm": 32.117034912109375, | |
| "learning_rate": 1.341173361522199e-06, | |
| "loss": 0.409, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.4814969343680586, | |
| "grad_norm": 27.94634437561035, | |
| "learning_rate": 1.3367688513037353e-06, | |
| "loss": 0.4158, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.4832055821124448, | |
| "grad_norm": 27.515697479248047, | |
| "learning_rate": 1.3323643410852713e-06, | |
| "loss": 0.3976, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.4849142298568312, | |
| "grad_norm": 41.739105224609375, | |
| "learning_rate": 1.3279598308668076e-06, | |
| "loss": 0.337, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.4866228776012176, | |
| "grad_norm": 37.13324737548828, | |
| "learning_rate": 1.323555320648344e-06, | |
| "loss": 0.381, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.4883315253456038, | |
| "grad_norm": 23.801599502563477, | |
| "learning_rate": 1.3191508104298803e-06, | |
| "loss": 0.4244, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.4900401730899901, | |
| "grad_norm": 28.293941497802734, | |
| "learning_rate": 1.3147463002114166e-06, | |
| "loss": 0.3995, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.4917488208343763, | |
| "grad_norm": 23.51873779296875, | |
| "learning_rate": 1.310341789992953e-06, | |
| "loss": 0.3845, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.4934574685787627, | |
| "grad_norm": 25.12767219543457, | |
| "learning_rate": 1.3059372797744893e-06, | |
| "loss": 0.3522, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.495166116323149, | |
| "grad_norm": 21.655824661254883, | |
| "learning_rate": 1.3015327695560254e-06, | |
| "loss": 0.3776, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.4968747640675353, | |
| "grad_norm": 32.18788146972656, | |
| "learning_rate": 1.2971282593375616e-06, | |
| "loss": 0.401, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.4985834118119217, | |
| "grad_norm": 45.1816520690918, | |
| "learning_rate": 1.2927237491190981e-06, | |
| "loss": 0.4374, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.5002920595563078, | |
| "grad_norm": 33.538047790527344, | |
| "learning_rate": 1.2883192389006344e-06, | |
| "loss": 0.4288, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.5020007073006942, | |
| "grad_norm": 31.226816177368164, | |
| "learning_rate": 1.2839147286821706e-06, | |
| "loss": 0.3961, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.5037093550450806, | |
| "grad_norm": 24.751720428466797, | |
| "learning_rate": 1.2795102184637071e-06, | |
| "loss": 0.351, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.5054180027894668, | |
| "grad_norm": 35.17796325683594, | |
| "learning_rate": 1.2751057082452432e-06, | |
| "loss": 0.4673, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.507126650533853, | |
| "grad_norm": 26.320959091186523, | |
| "learning_rate": 1.2707011980267794e-06, | |
| "loss": 0.4025, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.5088352982782394, | |
| "grad_norm": 25.2487735748291, | |
| "learning_rate": 1.2662966878083157e-06, | |
| "loss": 0.3631, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.5105439460226258, | |
| "grad_norm": 26.821157455444336, | |
| "learning_rate": 1.2618921775898522e-06, | |
| "loss": 0.3253, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.512252593767012, | |
| "grad_norm": 20.908111572265625, | |
| "learning_rate": 1.2574876673713885e-06, | |
| "loss": 0.3598, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.513961241511398, | |
| "grad_norm": 39.165706634521484, | |
| "learning_rate": 1.2530831571529247e-06, | |
| "loss": 0.4052, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.5156698892557845, | |
| "grad_norm": 32.390995025634766, | |
| "learning_rate": 1.248678646934461e-06, | |
| "loss": 0.4037, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.517378537000171, | |
| "grad_norm": 28.246858596801758, | |
| "learning_rate": 1.2442741367159972e-06, | |
| "loss": 0.3995, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.5190871847445573, | |
| "grad_norm": 31.864625930786133, | |
| "learning_rate": 1.2398696264975335e-06, | |
| "loss": 0.3803, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.5207958324889435, | |
| "grad_norm": 23.626855850219727, | |
| "learning_rate": 1.2354651162790698e-06, | |
| "loss": 0.3864, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.5225044802333296, | |
| "grad_norm": 24.359804153442383, | |
| "learning_rate": 1.2310606060606063e-06, | |
| "loss": 0.3669, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.524213127977716, | |
| "grad_norm": 27.360803604125977, | |
| "learning_rate": 1.2266560958421425e-06, | |
| "loss": 0.3916, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.5259217757221024, | |
| "grad_norm": 27.511882781982422, | |
| "learning_rate": 1.2222515856236788e-06, | |
| "loss": 0.3745, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.5276304234664888, | |
| "grad_norm": 26.44959831237793, | |
| "learning_rate": 1.217847075405215e-06, | |
| "loss": 0.3918, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.529339071210875, | |
| "grad_norm": 29.03026008605957, | |
| "learning_rate": 1.2134425651867513e-06, | |
| "loss": 0.3829, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.5310477189552611, | |
| "grad_norm": 31.914691925048828, | |
| "learning_rate": 1.2090380549682876e-06, | |
| "loss": 0.3987, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.5327563666996475, | |
| "grad_norm": 39.63639831542969, | |
| "learning_rate": 1.2046335447498238e-06, | |
| "loss": 0.4399, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.534465014444034, | |
| "grad_norm": 25.887651443481445, | |
| "learning_rate": 1.2002290345313603e-06, | |
| "loss": 0.4067, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.53617366218842, | |
| "grad_norm": 30.17310333251953, | |
| "learning_rate": 1.1958245243128964e-06, | |
| "loss": 0.397, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.5378823099328063, | |
| "grad_norm": 28.75864601135254, | |
| "learning_rate": 1.1914200140944329e-06, | |
| "loss": 0.4094, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.5395909576771927, | |
| "grad_norm": 40.84502029418945, | |
| "learning_rate": 1.1870155038759691e-06, | |
| "loss": 0.35, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.541299605421579, | |
| "grad_norm": 27.4794864654541, | |
| "learning_rate": 1.1826109936575054e-06, | |
| "loss": 0.4164, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.5430082531659655, | |
| "grad_norm": 35.87556076049805, | |
| "learning_rate": 1.1782064834390416e-06, | |
| "loss": 0.3561, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.5447169009103516, | |
| "grad_norm": 32.51176071166992, | |
| "learning_rate": 1.173801973220578e-06, | |
| "loss": 0.3813, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.5464255486547378, | |
| "grad_norm": 34.02533721923828, | |
| "learning_rate": 1.1693974630021144e-06, | |
| "loss": 0.4186, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.5481341963991242, | |
| "grad_norm": 25.257232666015625, | |
| "learning_rate": 1.1649929527836504e-06, | |
| "loss": 0.3428, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.5498428441435106, | |
| "grad_norm": 27.12441635131836, | |
| "learning_rate": 1.160588442565187e-06, | |
| "loss": 0.3731, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.5515514918878968, | |
| "grad_norm": 32.43393325805664, | |
| "learning_rate": 1.1561839323467232e-06, | |
| "loss": 0.3484, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.5532601396322832, | |
| "grad_norm": 35.18085479736328, | |
| "learning_rate": 1.1517794221282595e-06, | |
| "loss": 0.3964, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.5549687873766693, | |
| "grad_norm": 30.050132751464844, | |
| "learning_rate": 1.1473749119097957e-06, | |
| "loss": 0.3969, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.5566774351210557, | |
| "grad_norm": 34.45301818847656, | |
| "learning_rate": 1.142970401691332e-06, | |
| "loss": 0.3945, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.5583860828654421, | |
| "grad_norm": 31.908273696899414, | |
| "learning_rate": 1.1385658914728682e-06, | |
| "loss": 0.3823, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.5600947306098283, | |
| "grad_norm": 26.37557601928711, | |
| "learning_rate": 1.1341613812544045e-06, | |
| "loss": 0.3638, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.5618033783542145, | |
| "grad_norm": 25.550487518310547, | |
| "learning_rate": 1.129756871035941e-06, | |
| "loss": 0.3556, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.5635120260986008, | |
| "grad_norm": 28.921995162963867, | |
| "learning_rate": 1.1253523608174773e-06, | |
| "loss": 0.4134, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.5652206738429872, | |
| "grad_norm": 26.404720306396484, | |
| "learning_rate": 1.1209478505990135e-06, | |
| "loss": 0.3664, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.5669293215873736, | |
| "grad_norm": 29.88231086730957, | |
| "learning_rate": 1.1165433403805498e-06, | |
| "loss": 0.3848, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.5686379693317598, | |
| "grad_norm": 38.20869827270508, | |
| "learning_rate": 1.112138830162086e-06, | |
| "loss": 0.3823, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.570346617076146, | |
| "grad_norm": 42.82072448730469, | |
| "learning_rate": 1.1077343199436223e-06, | |
| "loss": 0.372, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.5720552648205324, | |
| "grad_norm": 27.147830963134766, | |
| "learning_rate": 1.1033298097251586e-06, | |
| "loss": 0.4172, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.5737639125649188, | |
| "grad_norm": 32.74360656738281, | |
| "learning_rate": 1.098925299506695e-06, | |
| "loss": 0.3772, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.575472560309305, | |
| "grad_norm": 23.909259796142578, | |
| "learning_rate": 1.0945207892882311e-06, | |
| "loss": 0.3215, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.5771812080536913, | |
| "grad_norm": 32.20122146606445, | |
| "learning_rate": 1.0901162790697676e-06, | |
| "loss": 0.3864, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.5788898557980775, | |
| "grad_norm": 29.837228775024414, | |
| "learning_rate": 1.0857117688513039e-06, | |
| "loss": 0.4026, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.580598503542464, | |
| "grad_norm": 34.161033630371094, | |
| "learning_rate": 1.0813072586328401e-06, | |
| "loss": 0.4217, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.5823071512868503, | |
| "grad_norm": 39.935638427734375, | |
| "learning_rate": 1.0769027484143764e-06, | |
| "loss": 0.3818, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.5840157990312365, | |
| "grad_norm": 29.2546443939209, | |
| "learning_rate": 1.0724982381959126e-06, | |
| "loss": 0.406, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.5857244467756226, | |
| "grad_norm": 33.643367767333984, | |
| "learning_rate": 1.0680937279774491e-06, | |
| "loss": 0.3627, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.587433094520009, | |
| "grad_norm": 48.66536331176758, | |
| "learning_rate": 1.0636892177589852e-06, | |
| "loss": 0.3924, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.5891417422643954, | |
| "grad_norm": 29.057153701782227, | |
| "learning_rate": 1.0592847075405217e-06, | |
| "loss": 0.3797, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.5908503900087818, | |
| "grad_norm": 30.0162296295166, | |
| "learning_rate": 1.054880197322058e-06, | |
| "loss": 0.4337, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.592559037753168, | |
| "grad_norm": 30.404836654663086, | |
| "learning_rate": 1.0504756871035942e-06, | |
| "loss": 0.3784, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.5942676854975542, | |
| "grad_norm": 41.39947509765625, | |
| "learning_rate": 1.0460711768851305e-06, | |
| "loss": 0.3421, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.5959763332419405, | |
| "grad_norm": 25.326269149780273, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "loss": 0.3901, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.597684980986327, | |
| "grad_norm": 29.41655731201172, | |
| "learning_rate": 1.037262156448203e-06, | |
| "loss": 0.3393, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.5993936287307131, | |
| "grad_norm": 30.155683517456055, | |
| "learning_rate": 1.0328576462297392e-06, | |
| "loss": 0.3745, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.6011022764750993, | |
| "grad_norm": 27.556821823120117, | |
| "learning_rate": 1.0284531360112757e-06, | |
| "loss": 0.4145, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.6028109242194857, | |
| "grad_norm": 39.412540435791016, | |
| "learning_rate": 1.0240486257928118e-06, | |
| "loss": 0.3815, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.604519571963872, | |
| "grad_norm": 30.376188278198242, | |
| "learning_rate": 1.0196441155743483e-06, | |
| "loss": 0.3812, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.6062282197082585, | |
| "grad_norm": 35.539546966552734, | |
| "learning_rate": 1.0152396053558845e-06, | |
| "loss": 0.3949, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.6079368674526446, | |
| "grad_norm": 27.052183151245117, | |
| "learning_rate": 1.0108350951374208e-06, | |
| "loss": 0.4059, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.6096455151970308, | |
| "grad_norm": 22.53864860534668, | |
| "learning_rate": 1.006430584918957e-06, | |
| "loss": 0.3381, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.6113541629414172, | |
| "grad_norm": 33.662052154541016, | |
| "learning_rate": 1.0020260747004933e-06, | |
| "loss": 0.3852, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.6130628106858036, | |
| "grad_norm": 35.129295349121094, | |
| "learning_rate": 9.976215644820298e-07, | |
| "loss": 0.3737, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.6147714584301898, | |
| "grad_norm": 14.55792236328125, | |
| "learning_rate": 9.932170542635658e-07, | |
| "loss": 0.3682, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.6164801061745762, | |
| "grad_norm": 34.31297302246094, | |
| "learning_rate": 9.888125440451023e-07, | |
| "loss": 0.394, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.6181887539189623, | |
| "grad_norm": 28.12514305114746, | |
| "learning_rate": 9.844080338266386e-07, | |
| "loss": 0.4253, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.6198974016633487, | |
| "grad_norm": 31.71592140197754, | |
| "learning_rate": 9.800035236081749e-07, | |
| "loss": 0.3742, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.6216060494077351, | |
| "grad_norm": 33.897281646728516, | |
| "learning_rate": 9.755990133897111e-07, | |
| "loss": 0.3855, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.6233146971521213, | |
| "grad_norm": 26.927099227905273, | |
| "learning_rate": 9.711945031712474e-07, | |
| "loss": 0.4042, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.6250233448965075, | |
| "grad_norm": 31.36831283569336, | |
| "learning_rate": 9.667899929527836e-07, | |
| "loss": 0.4033, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.6267319926408939, | |
| "grad_norm": 32.52813720703125, | |
| "learning_rate": 9.6238548273432e-07, | |
| "loss": 0.4001, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.6284406403852802, | |
| "grad_norm": 29.446916580200195, | |
| "learning_rate": 9.579809725158564e-07, | |
| "loss": 0.3865, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.6301492881296666, | |
| "grad_norm": 22.962326049804688, | |
| "learning_rate": 9.535764622973927e-07, | |
| "loss": 0.3399, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.6318579358740528, | |
| "grad_norm": 22.97249984741211, | |
| "learning_rate": 9.491719520789289e-07, | |
| "loss": 0.3967, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.633566583618439, | |
| "grad_norm": 40.018470764160156, | |
| "learning_rate": 9.447674418604652e-07, | |
| "loss": 0.3935, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.6352752313628254, | |
| "grad_norm": 22.444059371948242, | |
| "learning_rate": 9.403629316420016e-07, | |
| "loss": 0.3224, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.6369838791072118, | |
| "grad_norm": 34.330078125, | |
| "learning_rate": 9.359584214235377e-07, | |
| "loss": 0.3486, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.638692526851598, | |
| "grad_norm": 35.540557861328125, | |
| "learning_rate": 9.315539112050741e-07, | |
| "loss": 0.3563, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.6404011745959843, | |
| "grad_norm": 24.032527923583984, | |
| "learning_rate": 9.271494009866105e-07, | |
| "loss": 0.3191, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.6421098223403705, | |
| "grad_norm": 38.39560317993164, | |
| "learning_rate": 9.227448907681466e-07, | |
| "loss": 0.3694, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.643818470084757, | |
| "grad_norm": 40.29669952392578, | |
| "learning_rate": 9.18340380549683e-07, | |
| "loss": 0.3968, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.6455271178291433, | |
| "grad_norm": 28.967849731445312, | |
| "learning_rate": 9.139358703312193e-07, | |
| "loss": 0.3501, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.6472357655735295, | |
| "grad_norm": 44.81010437011719, | |
| "learning_rate": 9.095313601127555e-07, | |
| "loss": 0.415, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.6489444133179156, | |
| "grad_norm": 25.93589210510254, | |
| "learning_rate": 9.051268498942918e-07, | |
| "loss": 0.3933, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.650653061062302, | |
| "grad_norm": 31.824234008789062, | |
| "learning_rate": 9.007223396758282e-07, | |
| "loss": 0.3739, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.6523617088066884, | |
| "grad_norm": 34.3546142578125, | |
| "learning_rate": 8.963178294573645e-07, | |
| "loss": 0.3996, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.6540703565510748, | |
| "grad_norm": 23.639925003051758, | |
| "learning_rate": 8.919133192389007e-07, | |
| "loss": 0.3853, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.655779004295461, | |
| "grad_norm": 30.642179489135742, | |
| "learning_rate": 8.875088090204371e-07, | |
| "loss": 0.3762, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.6574876520398472, | |
| "grad_norm": 30.923620223999023, | |
| "learning_rate": 8.831042988019733e-07, | |
| "loss": 0.3923, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.6591962997842336, | |
| "grad_norm": 27.91309356689453, | |
| "learning_rate": 8.786997885835096e-07, | |
| "loss": 0.3604, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.66090494752862, | |
| "grad_norm": 23.54095458984375, | |
| "learning_rate": 8.742952783650459e-07, | |
| "loss": 0.3512, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.6626135952730061, | |
| "grad_norm": 31.084632873535156, | |
| "learning_rate": 8.698907681465822e-07, | |
| "loss": 0.3475, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.6643222430173923, | |
| "grad_norm": 34.60007095336914, | |
| "learning_rate": 8.654862579281184e-07, | |
| "loss": 0.3999, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.6660308907617787, | |
| "grad_norm": 32.12785339355469, | |
| "learning_rate": 8.610817477096548e-07, | |
| "loss": 0.4169, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.667739538506165, | |
| "grad_norm": 26.730180740356445, | |
| "learning_rate": 8.566772374911911e-07, | |
| "loss": 0.3191, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.6694481862505515, | |
| "grad_norm": 29.191030502319336, | |
| "learning_rate": 8.522727272727273e-07, | |
| "loss": 0.3468, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.6711568339949376, | |
| "grad_norm": 30.42900848388672, | |
| "learning_rate": 8.478682170542637e-07, | |
| "loss": 0.3948, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.6728654817393238, | |
| "grad_norm": 36.10079574584961, | |
| "learning_rate": 8.434637068357999e-07, | |
| "loss": 0.3741, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.6745741294837102, | |
| "grad_norm": 24.84588050842285, | |
| "learning_rate": 8.390591966173363e-07, | |
| "loss": 0.3342, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.6762827772280966, | |
| "grad_norm": 28.362817764282227, | |
| "learning_rate": 8.346546863988725e-07, | |
| "loss": 0.3723, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.6779914249724828, | |
| "grad_norm": 31.064945220947266, | |
| "learning_rate": 8.302501761804088e-07, | |
| "loss": 0.3648, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.6797000727168692, | |
| "grad_norm": 43.73317337036133, | |
| "learning_rate": 8.258456659619452e-07, | |
| "loss": 0.3849, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.6814087204612553, | |
| "grad_norm": 36.45133590698242, | |
| "learning_rate": 8.214411557434814e-07, | |
| "loss": 0.4135, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.6831173682056417, | |
| "grad_norm": 24.040943145751953, | |
| "learning_rate": 8.170366455250177e-07, | |
| "loss": 0.3828, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.6848260159500281, | |
| "grad_norm": 34.76506805419922, | |
| "learning_rate": 8.12632135306554e-07, | |
| "loss": 0.3583, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.6865346636944143, | |
| "grad_norm": 34.13239669799805, | |
| "learning_rate": 8.082276250880903e-07, | |
| "loss": 0.3613, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.6882433114388005, | |
| "grad_norm": 25.49158477783203, | |
| "learning_rate": 8.038231148696265e-07, | |
| "loss": 0.4262, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.6899519591831869, | |
| "grad_norm": 35.75178909301758, | |
| "learning_rate": 7.994186046511629e-07, | |
| "loss": 0.4004, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.6916606069275733, | |
| "grad_norm": 32.253150939941406, | |
| "learning_rate": 7.95014094432699e-07, | |
| "loss": 0.3644, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.6933692546719596, | |
| "grad_norm": 37.89906692504883, | |
| "learning_rate": 7.906095842142354e-07, | |
| "loss": 0.3617, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.6950779024163458, | |
| "grad_norm": 21.644926071166992, | |
| "learning_rate": 7.862050739957718e-07, | |
| "loss": 0.3764, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.696786550160732, | |
| "grad_norm": 34.483253479003906, | |
| "learning_rate": 7.818005637773081e-07, | |
| "loss": 0.4131, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.6984951979051184, | |
| "grad_norm": 43.877708435058594, | |
| "learning_rate": 7.773960535588443e-07, | |
| "loss": 0.3782, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.7002038456495048, | |
| "grad_norm": 56.46201705932617, | |
| "learning_rate": 7.729915433403806e-07, | |
| "loss": 0.3922, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.701912493393891, | |
| "grad_norm": 30.294981002807617, | |
| "learning_rate": 7.68587033121917e-07, | |
| "loss": 0.3839, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.7036211411382773, | |
| "grad_norm": 36.37797927856445, | |
| "learning_rate": 7.641825229034531e-07, | |
| "loss": 0.37, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.7053297888826635, | |
| "grad_norm": 32.37224197387695, | |
| "learning_rate": 7.597780126849895e-07, | |
| "loss": 0.3552, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.70703843662705, | |
| "grad_norm": 37.46088790893555, | |
| "learning_rate": 7.553735024665259e-07, | |
| "loss": 0.3815, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.7087470843714363, | |
| "grad_norm": 32.850372314453125, | |
| "learning_rate": 7.50968992248062e-07, | |
| "loss": 0.3688, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.7104557321158225, | |
| "grad_norm": 47.176239013671875, | |
| "learning_rate": 7.465644820295984e-07, | |
| "loss": 0.3789, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.7121643798602086, | |
| "grad_norm": 24.945432662963867, | |
| "learning_rate": 7.421599718111347e-07, | |
| "loss": 0.382, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.713873027604595, | |
| "grad_norm": 21.04591941833496, | |
| "learning_rate": 7.377554615926709e-07, | |
| "loss": 0.3695, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.7155816753489814, | |
| "grad_norm": 33.52159881591797, | |
| "learning_rate": 7.333509513742072e-07, | |
| "loss": 0.3597, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.7172903230933678, | |
| "grad_norm": 30.122079849243164, | |
| "learning_rate": 7.289464411557436e-07, | |
| "loss": 0.3875, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.718998970837754, | |
| "grad_norm": 24.38621711730957, | |
| "learning_rate": 7.245419309372799e-07, | |
| "loss": 0.3351, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.7207076185821402, | |
| "grad_norm": 47.98723220825195, | |
| "learning_rate": 7.201374207188161e-07, | |
| "loss": 0.3836, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.7224162663265266, | |
| "grad_norm": 37.2187614440918, | |
| "learning_rate": 7.157329105003525e-07, | |
| "loss": 0.3614, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.724124914070913, | |
| "grad_norm": 30.610862731933594, | |
| "learning_rate": 7.113284002818887e-07, | |
| "loss": 0.3795, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.7258335618152991, | |
| "grad_norm": 22.508331298828125, | |
| "learning_rate": 7.06923890063425e-07, | |
| "loss": 0.3536, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.7275422095596853, | |
| "grad_norm": 37.26981735229492, | |
| "learning_rate": 7.025193798449613e-07, | |
| "loss": 0.3832, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.7292508573040717, | |
| "grad_norm": 19.104637145996094, | |
| "learning_rate": 6.981148696264976e-07, | |
| "loss": 0.3522, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.730959505048458, | |
| "grad_norm": 23.52967071533203, | |
| "learning_rate": 6.937103594080338e-07, | |
| "loss": 0.391, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.7326681527928445, | |
| "grad_norm": 27.223722457885742, | |
| "learning_rate": 6.893058491895702e-07, | |
| "loss": 0.3904, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.7343768005372306, | |
| "grad_norm": 30.344676971435547, | |
| "learning_rate": 6.849013389711065e-07, | |
| "loss": 0.391, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.7360854482816168, | |
| "grad_norm": 22.83699607849121, | |
| "learning_rate": 6.804968287526427e-07, | |
| "loss": 0.3653, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.7377940960260032, | |
| "grad_norm": 32.153663635253906, | |
| "learning_rate": 6.760923185341791e-07, | |
| "loss": 0.3531, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.7395027437703896, | |
| "grad_norm": 36.864925384521484, | |
| "learning_rate": 6.716878083157153e-07, | |
| "loss": 0.3391, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.7412113915147758, | |
| "grad_norm": 41.715576171875, | |
| "learning_rate": 6.672832980972517e-07, | |
| "loss": 0.3993, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.7429200392591622, | |
| "grad_norm": 35.69621276855469, | |
| "learning_rate": 6.628787878787879e-07, | |
| "loss": 0.3121, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.7446286870035483, | |
| "grad_norm": 33.0884895324707, | |
| "learning_rate": 6.584742776603242e-07, | |
| "loss": 0.3827, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.7463373347479347, | |
| "grad_norm": 26.627431869506836, | |
| "learning_rate": 6.540697674418606e-07, | |
| "loss": 0.3759, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.7480459824923211, | |
| "grad_norm": 32.8358039855957, | |
| "learning_rate": 6.496652572233968e-07, | |
| "loss": 0.3983, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.7497546302367073, | |
| "grad_norm": 26.243980407714844, | |
| "learning_rate": 6.452607470049331e-07, | |
| "loss": 0.3857, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.7514632779810935, | |
| "grad_norm": 26.84737205505371, | |
| "learning_rate": 6.408562367864694e-07, | |
| "loss": 0.3771, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.7531719257254799, | |
| "grad_norm": 31.410524368286133, | |
| "learning_rate": 6.364517265680057e-07, | |
| "loss": 0.3794, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.7548805734698663, | |
| "grad_norm": 38.454044342041016, | |
| "learning_rate": 6.320472163495419e-07, | |
| "loss": 0.3595, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.7565892212142526, | |
| "grad_norm": 18.861108779907227, | |
| "learning_rate": 6.276427061310783e-07, | |
| "loss": 0.3457, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.7582978689586388, | |
| "grad_norm": 28.400564193725586, | |
| "learning_rate": 6.232381959126146e-07, | |
| "loss": 0.3511, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.760006516703025, | |
| "grad_norm": 47.41775894165039, | |
| "learning_rate": 6.188336856941508e-07, | |
| "loss": 0.3544, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.7617151644474114, | |
| "grad_norm": 30.009010314941406, | |
| "learning_rate": 6.144291754756872e-07, | |
| "loss": 0.3273, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.7634238121917978, | |
| "grad_norm": 25.67041778564453, | |
| "learning_rate": 6.100246652572235e-07, | |
| "loss": 0.3619, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.765132459936184, | |
| "grad_norm": 28.06591796875, | |
| "learning_rate": 6.056201550387597e-07, | |
| "loss": 0.3577, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.7668411076805703, | |
| "grad_norm": 25.041889190673828, | |
| "learning_rate": 6.01215644820296e-07, | |
| "loss": 0.3688, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.7685497554249565, | |
| "grad_norm": 32.429443359375, | |
| "learning_rate": 5.968111346018323e-07, | |
| "loss": 0.4009, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.770258403169343, | |
| "grad_norm": 23.519460678100586, | |
| "learning_rate": 5.924066243833686e-07, | |
| "loss": 0.366, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.7719670509137293, | |
| "grad_norm": 36.32727813720703, | |
| "learning_rate": 5.880021141649049e-07, | |
| "loss": 0.4004, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.7736756986581155, | |
| "grad_norm": 30.008052825927734, | |
| "learning_rate": 5.835976039464412e-07, | |
| "loss": 0.3707, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.7753843464025016, | |
| "grad_norm": 34.22142791748047, | |
| "learning_rate": 5.791930937279775e-07, | |
| "loss": 0.3848, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.777092994146888, | |
| "grad_norm": 21.506912231445312, | |
| "learning_rate": 5.747885835095138e-07, | |
| "loss": 0.3677, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.7788016418912744, | |
| "grad_norm": 33.4599609375, | |
| "learning_rate": 5.703840732910502e-07, | |
| "loss": 0.394, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.7805102896356608, | |
| "grad_norm": 36.893394470214844, | |
| "learning_rate": 5.659795630725864e-07, | |
| "loss": 0.3871, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.782218937380047, | |
| "grad_norm": 46.39961624145508, | |
| "learning_rate": 5.615750528541227e-07, | |
| "loss": 0.4025, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.7839275851244332, | |
| "grad_norm": 23.366689682006836, | |
| "learning_rate": 5.57170542635659e-07, | |
| "loss": 0.3694, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.7856362328688196, | |
| "grad_norm": 61.47678756713867, | |
| "learning_rate": 5.527660324171952e-07, | |
| "loss": 0.3759, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.787344880613206, | |
| "grad_norm": 27.12241554260254, | |
| "learning_rate": 5.483615221987316e-07, | |
| "loss": 0.3552, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.7890535283575921, | |
| "grad_norm": 29.733963012695312, | |
| "learning_rate": 5.439570119802679e-07, | |
| "loss": 0.3966, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.7907621761019783, | |
| "grad_norm": 29.353418350219727, | |
| "learning_rate": 5.395525017618041e-07, | |
| "loss": 0.3642, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.7924708238463647, | |
| "grad_norm": 26.14151954650879, | |
| "learning_rate": 5.351479915433405e-07, | |
| "loss": 0.3776, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.794179471590751, | |
| "grad_norm": 33.61710739135742, | |
| "learning_rate": 5.307434813248768e-07, | |
| "loss": 0.3484, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.7958881193351375, | |
| "grad_norm": 25.79818344116211, | |
| "learning_rate": 5.26338971106413e-07, | |
| "loss": 0.3498, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.7975967670795236, | |
| "grad_norm": 34.903533935546875, | |
| "learning_rate": 5.219344608879493e-07, | |
| "loss": 0.4224, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.7993054148239098, | |
| "grad_norm": 42.71726608276367, | |
| "learning_rate": 5.175299506694856e-07, | |
| "loss": 0.3524, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.8010140625682962, | |
| "grad_norm": 35.538875579833984, | |
| "learning_rate": 5.131254404510219e-07, | |
| "loss": 0.3471, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.8027227103126826, | |
| "grad_norm": 34.071388244628906, | |
| "learning_rate": 5.087209302325582e-07, | |
| "loss": 0.3994, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.8044313580570688, | |
| "grad_norm": 30.25017738342285, | |
| "learning_rate": 5.043164200140945e-07, | |
| "loss": 0.4127, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.8061400058014552, | |
| "grad_norm": 27.28138542175293, | |
| "learning_rate": 4.999119097956308e-07, | |
| "loss": 0.3286, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.8078486535458413, | |
| "grad_norm": 38.14504623413086, | |
| "learning_rate": 4.955073995771671e-07, | |
| "loss": 0.3763, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.8095573012902277, | |
| "grad_norm": 30.078149795532227, | |
| "learning_rate": 4.911028893587034e-07, | |
| "loss": 0.352, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.8112659490346141, | |
| "grad_norm": 24.926767349243164, | |
| "learning_rate": 4.866983791402396e-07, | |
| "loss": 0.3421, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.8129745967790003, | |
| "grad_norm": 46.622650146484375, | |
| "learning_rate": 4.822938689217759e-07, | |
| "loss": 0.3835, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.8146832445233865, | |
| "grad_norm": 21.296682357788086, | |
| "learning_rate": 4.778893587033123e-07, | |
| "loss": 0.4098, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.8163918922677729, | |
| "grad_norm": 27.754459381103516, | |
| "learning_rate": 4.7348484848484853e-07, | |
| "loss": 0.3576, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.8181005400121593, | |
| "grad_norm": 26.44339942932129, | |
| "learning_rate": 4.690803382663848e-07, | |
| "loss": 0.3411, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.8198091877565457, | |
| "grad_norm": 24.727949142456055, | |
| "learning_rate": 4.646758280479211e-07, | |
| "loss": 0.333, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.8215178355009318, | |
| "grad_norm": 36.50139617919922, | |
| "learning_rate": 4.602713178294574e-07, | |
| "loss": 0.3346, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.823226483245318, | |
| "grad_norm": 32.99855041503906, | |
| "learning_rate": 4.5586680761099375e-07, | |
| "loss": 0.4303, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.8249351309897044, | |
| "grad_norm": 23.56210708618164, | |
| "learning_rate": 4.5146229739253e-07, | |
| "loss": 0.3567, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.8266437787340908, | |
| "grad_norm": 32.45067596435547, | |
| "learning_rate": 4.470577871740663e-07, | |
| "loss": 0.3615, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 1.828352426478477, | |
| "grad_norm": 26.946245193481445, | |
| "learning_rate": 4.426532769556026e-07, | |
| "loss": 0.3577, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.8300610742228633, | |
| "grad_norm": 25.937786102294922, | |
| "learning_rate": 4.3824876673713886e-07, | |
| "loss": 0.3657, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.8317697219672495, | |
| "grad_norm": 22.442626953125, | |
| "learning_rate": 4.338442565186752e-07, | |
| "loss": 0.3988, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 1.833478369711636, | |
| "grad_norm": 35.23172378540039, | |
| "learning_rate": 4.2943974630021144e-07, | |
| "loss": 0.3931, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 1.8351870174560223, | |
| "grad_norm": 36.66183090209961, | |
| "learning_rate": 4.250352360817477e-07, | |
| "loss": 0.4132, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.8368956652004085, | |
| "grad_norm": 38.16518783569336, | |
| "learning_rate": 4.206307258632841e-07, | |
| "loss": 0.3592, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.8386043129447946, | |
| "grad_norm": 29.179283142089844, | |
| "learning_rate": 4.1622621564482034e-07, | |
| "loss": 0.376, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 1.840312960689181, | |
| "grad_norm": 32.8124885559082, | |
| "learning_rate": 4.118217054263566e-07, | |
| "loss": 0.4341, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.8420216084335674, | |
| "grad_norm": 34.435943603515625, | |
| "learning_rate": 4.0741719520789293e-07, | |
| "loss": 0.3602, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 1.8437302561779538, | |
| "grad_norm": 23.411712646484375, | |
| "learning_rate": 4.030126849894292e-07, | |
| "loss": 0.326, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 1.84543890392234, | |
| "grad_norm": 39.64480209350586, | |
| "learning_rate": 3.986081747709655e-07, | |
| "loss": 0.393, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.8471475516667262, | |
| "grad_norm": 30.89308738708496, | |
| "learning_rate": 3.942036645525018e-07, | |
| "loss": 0.3746, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 1.8488561994111126, | |
| "grad_norm": 27.453231811523438, | |
| "learning_rate": 3.8979915433403804e-07, | |
| "loss": 0.3775, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 1.850564847155499, | |
| "grad_norm": 39.444679260253906, | |
| "learning_rate": 3.853946441155744e-07, | |
| "loss": 0.3238, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 1.8522734948998851, | |
| "grad_norm": 25.87100601196289, | |
| "learning_rate": 3.809901338971107e-07, | |
| "loss": 0.3351, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 1.8539821426442713, | |
| "grad_norm": 38.50906753540039, | |
| "learning_rate": 3.76585623678647e-07, | |
| "loss": 0.4047, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.8556907903886577, | |
| "grad_norm": 28.932676315307617, | |
| "learning_rate": 3.7218111346018326e-07, | |
| "loss": 0.3853, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 1.857399438133044, | |
| "grad_norm": 38.7553596496582, | |
| "learning_rate": 3.6777660324171953e-07, | |
| "loss": 0.4117, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 1.8591080858774305, | |
| "grad_norm": 37.8046760559082, | |
| "learning_rate": 3.6337209302325584e-07, | |
| "loss": 0.3444, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 1.8608167336218167, | |
| "grad_norm": 37.61636734008789, | |
| "learning_rate": 3.589675828047921e-07, | |
| "loss": 0.37, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 1.8625253813662028, | |
| "grad_norm": 31.169891357421875, | |
| "learning_rate": 3.545630725863284e-07, | |
| "loss": 0.3625, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.8642340291105892, | |
| "grad_norm": 33.97384262084961, | |
| "learning_rate": 3.5015856236786475e-07, | |
| "loss": 0.3595, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 1.8659426768549756, | |
| "grad_norm": 33.35996627807617, | |
| "learning_rate": 3.45754052149401e-07, | |
| "loss": 0.367, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 1.867651324599362, | |
| "grad_norm": 31.67682647705078, | |
| "learning_rate": 3.4134954193093733e-07, | |
| "loss": 0.3905, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 1.8693599723437482, | |
| "grad_norm": 34.16012954711914, | |
| "learning_rate": 3.369450317124736e-07, | |
| "loss": 0.3639, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 1.8710686200881343, | |
| "grad_norm": 38.885986328125, | |
| "learning_rate": 3.3254052149400986e-07, | |
| "loss": 0.3593, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.8727772678325207, | |
| "grad_norm": 35.09337615966797, | |
| "learning_rate": 3.281360112755462e-07, | |
| "loss": 0.4085, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 1.8744859155769071, | |
| "grad_norm": 36.90644073486328, | |
| "learning_rate": 3.2373150105708244e-07, | |
| "loss": 0.3615, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 1.8761945633212933, | |
| "grad_norm": 25.444183349609375, | |
| "learning_rate": 3.193269908386188e-07, | |
| "loss": 0.3644, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 1.8779032110656795, | |
| "grad_norm": 30.481740951538086, | |
| "learning_rate": 3.149224806201551e-07, | |
| "loss": 0.378, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 1.8796118588100659, | |
| "grad_norm": 37.78234100341797, | |
| "learning_rate": 3.1051797040169134e-07, | |
| "loss": 0.387, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.8813205065544523, | |
| "grad_norm": 25.613048553466797, | |
| "learning_rate": 3.061134601832276e-07, | |
| "loss": 0.3335, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 1.8830291542988387, | |
| "grad_norm": 39.77134323120117, | |
| "learning_rate": 3.0170894996476393e-07, | |
| "loss": 0.3843, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 1.8847378020432248, | |
| "grad_norm": 22.53700065612793, | |
| "learning_rate": 2.9730443974630025e-07, | |
| "loss": 0.3685, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 1.886446449787611, | |
| "grad_norm": 44.945308685302734, | |
| "learning_rate": 2.928999295278365e-07, | |
| "loss": 0.3302, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 1.8881550975319974, | |
| "grad_norm": 31.36821174621582, | |
| "learning_rate": 2.8849541930937283e-07, | |
| "loss": 0.3876, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.8898637452763838, | |
| "grad_norm": 38.52021408081055, | |
| "learning_rate": 2.840909090909091e-07, | |
| "loss": 0.3741, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 1.89157239302077, | |
| "grad_norm": 40.30624008178711, | |
| "learning_rate": 2.796863988724454e-07, | |
| "loss": 0.3921, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 1.8932810407651564, | |
| "grad_norm": 29.259140014648438, | |
| "learning_rate": 2.7528188865398173e-07, | |
| "loss": 0.3893, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 1.8949896885095425, | |
| "grad_norm": 25.17171287536621, | |
| "learning_rate": 2.70877378435518e-07, | |
| "loss": 0.3756, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 1.896698336253929, | |
| "grad_norm": 37.15606689453125, | |
| "learning_rate": 2.6647286821705426e-07, | |
| "loss": 0.4048, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.8984069839983153, | |
| "grad_norm": 24.475324630737305, | |
| "learning_rate": 2.620683579985906e-07, | |
| "loss": 0.3709, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 1.9001156317427015, | |
| "grad_norm": 28.089601516723633, | |
| "learning_rate": 2.576638477801269e-07, | |
| "loss": 0.3933, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 1.9018242794870877, | |
| "grad_norm": 24.580224990844727, | |
| "learning_rate": 2.5325933756166316e-07, | |
| "loss": 0.3557, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 1.903532927231474, | |
| "grad_norm": 31.057662963867188, | |
| "learning_rate": 2.4885482734319943e-07, | |
| "loss": 0.3912, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 1.9052415749758604, | |
| "grad_norm": 36.91437530517578, | |
| "learning_rate": 2.4445031712473575e-07, | |
| "loss": 0.363, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.9069502227202468, | |
| "grad_norm": 28.377185821533203, | |
| "learning_rate": 2.4004580690627206e-07, | |
| "loss": 0.41, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 1.908658870464633, | |
| "grad_norm": 33.51364517211914, | |
| "learning_rate": 2.3564129668780836e-07, | |
| "loss": 0.362, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 1.9103675182090192, | |
| "grad_norm": 23.851999282836914, | |
| "learning_rate": 2.3123678646934465e-07, | |
| "loss": 0.3516, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 1.9120761659534056, | |
| "grad_norm": 27.512645721435547, | |
| "learning_rate": 2.268322762508809e-07, | |
| "loss": 0.3736, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 1.913784813697792, | |
| "grad_norm": 33.09054183959961, | |
| "learning_rate": 2.224277660324172e-07, | |
| "loss": 0.3536, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.9154934614421781, | |
| "grad_norm": 39.228851318359375, | |
| "learning_rate": 2.1802325581395352e-07, | |
| "loss": 0.3922, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 1.9172021091865643, | |
| "grad_norm": 27.592710494995117, | |
| "learning_rate": 2.1361874559548981e-07, | |
| "loss": 0.3146, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 1.9189107569309507, | |
| "grad_norm": 50.0390739440918, | |
| "learning_rate": 2.0921423537702608e-07, | |
| "loss": 0.3441, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 1.920619404675337, | |
| "grad_norm": 39.61098098754883, | |
| "learning_rate": 2.0480972515856237e-07, | |
| "loss": 0.3493, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 1.9223280524197235, | |
| "grad_norm": 38.638954162597656, | |
| "learning_rate": 2.004052149400987e-07, | |
| "loss": 0.3626, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.9240367001641097, | |
| "grad_norm": 29.187583923339844, | |
| "learning_rate": 1.9600070472163498e-07, | |
| "loss": 0.3438, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 1.9257453479084958, | |
| "grad_norm": 28.951478958129883, | |
| "learning_rate": 1.9159619450317125e-07, | |
| "loss": 0.3602, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 1.9274539956528822, | |
| "grad_norm": 23.78569984436035, | |
| "learning_rate": 1.8719168428470754e-07, | |
| "loss": 0.4069, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 1.9291626433972686, | |
| "grad_norm": 28.406557083129883, | |
| "learning_rate": 1.8278717406624386e-07, | |
| "loss": 0.3304, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 1.930871291141655, | |
| "grad_norm": 36.53167724609375, | |
| "learning_rate": 1.7838266384778015e-07, | |
| "loss": 0.3741, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.9325799388860412, | |
| "grad_norm": 39.00297164916992, | |
| "learning_rate": 1.7397815362931644e-07, | |
| "loss": 0.3252, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 1.9342885866304274, | |
| "grad_norm": 37.43632888793945, | |
| "learning_rate": 1.695736434108527e-07, | |
| "loss": 0.3953, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 1.9359972343748137, | |
| "grad_norm": 34.39417266845703, | |
| "learning_rate": 1.6516913319238902e-07, | |
| "loss": 0.4112, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 1.9377058821192001, | |
| "grad_norm": 31.974533081054688, | |
| "learning_rate": 1.6076462297392531e-07, | |
| "loss": 0.3451, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 1.9394145298635863, | |
| "grad_norm": 26.460182189941406, | |
| "learning_rate": 1.563601127554616e-07, | |
| "loss": 0.318, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.9411231776079725, | |
| "grad_norm": 37.38439178466797, | |
| "learning_rate": 1.519556025369979e-07, | |
| "loss": 0.3795, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 1.9428318253523589, | |
| "grad_norm": 21.876747131347656, | |
| "learning_rate": 1.475510923185342e-07, | |
| "loss": 0.3331, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 1.9445404730967453, | |
| "grad_norm": 38.800811767578125, | |
| "learning_rate": 1.4314658210007048e-07, | |
| "loss": 0.4218, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 1.9462491208411317, | |
| "grad_norm": 32.05302810668945, | |
| "learning_rate": 1.3874207188160677e-07, | |
| "loss": 0.3484, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 1.9479577685855178, | |
| "grad_norm": 28.057106018066406, | |
| "learning_rate": 1.3433756166314306e-07, | |
| "loss": 0.3579, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.949666416329904, | |
| "grad_norm": 34.402259826660156, | |
| "learning_rate": 1.2993305144467938e-07, | |
| "loss": 0.3871, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 1.9513750640742904, | |
| "grad_norm": 23.892139434814453, | |
| "learning_rate": 1.2552854122621565e-07, | |
| "loss": 0.3813, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 1.9530837118186768, | |
| "grad_norm": 33.99159240722656, | |
| "learning_rate": 1.2112403100775197e-07, | |
| "loss": 0.3698, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 1.954792359563063, | |
| "grad_norm": 39.73171615600586, | |
| "learning_rate": 1.1671952078928824e-07, | |
| "loss": 0.3888, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 1.9565010073074494, | |
| "grad_norm": 40.77421951293945, | |
| "learning_rate": 1.1231501057082454e-07, | |
| "loss": 0.3514, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.9582096550518355, | |
| "grad_norm": 32.23746871948242, | |
| "learning_rate": 1.0791050035236083e-07, | |
| "loss": 0.3555, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 1.959918302796222, | |
| "grad_norm": 37.01948165893555, | |
| "learning_rate": 1.0350599013389712e-07, | |
| "loss": 0.3536, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 1.9616269505406083, | |
| "grad_norm": 29.169981002807617, | |
| "learning_rate": 9.910147991543341e-08, | |
| "loss": 0.3298, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 1.9633355982849945, | |
| "grad_norm": 21.912952423095703, | |
| "learning_rate": 9.46969696969697e-08, | |
| "loss": 0.2856, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 1.9650442460293807, | |
| "grad_norm": 31.11917495727539, | |
| "learning_rate": 9.0292459478506e-08, | |
| "loss": 0.3485, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.966752893773767, | |
| "grad_norm": 38.79056167602539, | |
| "learning_rate": 8.588794926004229e-08, | |
| "loss": 0.3723, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 1.9684615415181534, | |
| "grad_norm": 26.17238426208496, | |
| "learning_rate": 8.148343904157858e-08, | |
| "loss": 0.3234, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 1.9701701892625398, | |
| "grad_norm": 32.50604248046875, | |
| "learning_rate": 7.707892882311487e-08, | |
| "loss": 0.3453, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 1.971878837006926, | |
| "grad_norm": 42.897769927978516, | |
| "learning_rate": 7.267441860465117e-08, | |
| "loss": 0.3336, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 1.9735874847513122, | |
| "grad_norm": 31.65463638305664, | |
| "learning_rate": 6.826990838618747e-08, | |
| "loss": 0.3547, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.9752961324956986, | |
| "grad_norm": 36.5434684753418, | |
| "learning_rate": 6.386539816772376e-08, | |
| "loss": 0.3433, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 1.977004780240085, | |
| "grad_norm": 20.959672927856445, | |
| "learning_rate": 5.946088794926004e-08, | |
| "loss": 0.331, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 1.9787134279844711, | |
| "grad_norm": 32.63426971435547, | |
| "learning_rate": 5.5056377730796334e-08, | |
| "loss": 0.3522, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 1.9804220757288573, | |
| "grad_norm": 30.64798927307129, | |
| "learning_rate": 5.065186751233264e-08, | |
| "loss": 0.3582, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 1.9821307234732437, | |
| "grad_norm": 26.718338012695312, | |
| "learning_rate": 4.624735729386893e-08, | |
| "loss": 0.3232, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.98383937121763, | |
| "grad_norm": 34.196048736572266, | |
| "learning_rate": 4.184284707540522e-08, | |
| "loss": 0.329, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 1.9855480189620165, | |
| "grad_norm": 41.088165283203125, | |
| "learning_rate": 3.7438336856941514e-08, | |
| "loss": 0.3263, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 1.9872566667064027, | |
| "grad_norm": 30.549293518066406, | |
| "learning_rate": 3.3033826638477806e-08, | |
| "loss": 0.3245, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 1.9889653144507888, | |
| "grad_norm": 43.00529098510742, | |
| "learning_rate": 2.8629316420014098e-08, | |
| "loss": 0.3949, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 1.9906739621951752, | |
| "grad_norm": 38.130096435546875, | |
| "learning_rate": 2.422480620155039e-08, | |
| "loss": 0.3707, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.9923826099395616, | |
| "grad_norm": 32.19184112548828, | |
| "learning_rate": 1.982029598308668e-08, | |
| "loss": 0.3876, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 1.994091257683948, | |
| "grad_norm": 29.53122329711914, | |
| "learning_rate": 1.5415785764622976e-08, | |
| "loss": 0.3743, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 1.9957999054283342, | |
| "grad_norm": 28.583572387695312, | |
| "learning_rate": 1.1011275546159268e-08, | |
| "loss": 0.3703, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 1.9975085531727204, | |
| "grad_norm": 22.58035659790039, | |
| "learning_rate": 6.606765327695561e-09, | |
| "loss": 0.3334, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 1.9992172009171068, | |
| "grad_norm": 40.383663177490234, | |
| "learning_rate": 2.2022551092318538e-09, | |
| "loss": 0.3615, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.9999006600148612, | |
| "eval_loss": 0.954230546951294, | |
| "eval_runtime": 137.9658, | |
| "eval_samples_per_second": 71.46, | |
| "eval_steps_per_second": 8.937, | |
| "step": 11704 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 11704, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.0899612102793626e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |