| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.845556701590576, |
| "eval_steps": 500, |
| "global_step": 8500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00217143477552793, |
| "grad_norm": 0.9825782179832458, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.7137, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00434286955105586, |
| "grad_norm": 1.4503270387649536, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 1.7044, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006514304326583791, |
| "grad_norm": 0.7030352354049683, |
| "learning_rate": 1.16e-05, |
| "loss": 1.5721, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00868573910211172, |
| "grad_norm": 0.9047777652740479, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 1.4224, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01085717387763965, |
| "grad_norm": 0.6958425641059875, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 1.3783, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.013028608653167581, |
| "grad_norm": 0.683045506477356, |
| "learning_rate": 1.9980353634577606e-05, |
| "loss": 1.3706, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.015200043428695511, |
| "grad_norm": 0.5452519655227661, |
| "learning_rate": 1.9958524339663828e-05, |
| "loss": 1.3022, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01737147820422344, |
| "grad_norm": 0.8496165871620178, |
| "learning_rate": 1.9936695044750056e-05, |
| "loss": 1.2474, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01954291297975137, |
| "grad_norm": 0.5976231098175049, |
| "learning_rate": 1.991486574983628e-05, |
| "loss": 1.2646, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0217143477552793, |
| "grad_norm": 0.7484721541404724, |
| "learning_rate": 1.9893036454922506e-05, |
| "loss": 1.2051, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02388578253080723, |
| "grad_norm": 0.6440810561180115, |
| "learning_rate": 1.9871207160008735e-05, |
| "loss": 1.2289, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.026057217306335163, |
| "grad_norm": 0.7614450454711914, |
| "learning_rate": 1.984937786509496e-05, |
| "loss": 1.26, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.028228652081863093, |
| "grad_norm": 0.7417937517166138, |
| "learning_rate": 1.9827548570181185e-05, |
| "loss": 1.2034, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.030400086857391023, |
| "grad_norm": 0.7169002890586853, |
| "learning_rate": 1.980571927526741e-05, |
| "loss": 1.2561, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03257152163291895, |
| "grad_norm": 0.6170061826705933, |
| "learning_rate": 1.9783889980353638e-05, |
| "loss": 1.1907, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03474295640844688, |
| "grad_norm": 0.7643230557441711, |
| "learning_rate": 1.976206068543986e-05, |
| "loss": 1.2109, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03691439118397481, |
| "grad_norm": 0.8660950660705566, |
| "learning_rate": 1.9740231390526088e-05, |
| "loss": 1.3197, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03908582595950274, |
| "grad_norm": 0.7613770961761475, |
| "learning_rate": 1.9718402095612313e-05, |
| "loss": 1.2072, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04125726073503067, |
| "grad_norm": 0.880974531173706, |
| "learning_rate": 1.9696572800698538e-05, |
| "loss": 1.226, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0434286955105586, |
| "grad_norm": 0.8629663586616516, |
| "learning_rate": 1.9674743505784766e-05, |
| "loss": 1.2298, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04560013028608653, |
| "grad_norm": 0.7981083393096924, |
| "learning_rate": 1.965291421087099e-05, |
| "loss": 1.145, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04777156506161446, |
| "grad_norm": 0.8174938559532166, |
| "learning_rate": 1.9631084915957216e-05, |
| "loss": 1.1745, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04994299983714239, |
| "grad_norm": 0.7619920969009399, |
| "learning_rate": 1.960925562104344e-05, |
| "loss": 1.1557, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.052114434612670325, |
| "grad_norm": 0.8618036508560181, |
| "learning_rate": 1.958742632612967e-05, |
| "loss": 1.2093, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05428586938819825, |
| "grad_norm": 0.9684587717056274, |
| "learning_rate": 1.956559703121589e-05, |
| "loss": 1.1358, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.056457304163726185, |
| "grad_norm": 0.856431245803833, |
| "learning_rate": 1.954376773630212e-05, |
| "loss": 1.0864, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05862873893925411, |
| "grad_norm": 0.8133667707443237, |
| "learning_rate": 1.9521938441388345e-05, |
| "loss": 1.1005, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.060800173714782045, |
| "grad_norm": 1.0199098587036133, |
| "learning_rate": 1.950010914647457e-05, |
| "loss": 1.1349, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06297160849030997, |
| "grad_norm": 0.8546782732009888, |
| "learning_rate": 1.9478279851560794e-05, |
| "loss": 1.1272, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0651430432658379, |
| "grad_norm": 1.0497276782989502, |
| "learning_rate": 1.9456450556647023e-05, |
| "loss": 1.176, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06731447804136584, |
| "grad_norm": 0.9524215459823608, |
| "learning_rate": 1.9434621261733248e-05, |
| "loss": 1.1281, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06948591281689376, |
| "grad_norm": 0.9029881954193115, |
| "learning_rate": 1.9412791966819473e-05, |
| "loss": 1.0668, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07165734759242169, |
| "grad_norm": 1.0050421953201294, |
| "learning_rate": 1.93909626719057e-05, |
| "loss": 1.0993, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07382878236794962, |
| "grad_norm": 0.8202849626541138, |
| "learning_rate": 1.9369133376991923e-05, |
| "loss": 1.1794, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07600021714347756, |
| "grad_norm": 0.8110634684562683, |
| "learning_rate": 1.934730408207815e-05, |
| "loss": 1.1452, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07817165191900548, |
| "grad_norm": 0.9648256301879883, |
| "learning_rate": 1.9325474787164376e-05, |
| "loss": 1.1503, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08034308669453341, |
| "grad_norm": 0.969715416431427, |
| "learning_rate": 1.93036454922506e-05, |
| "loss": 1.129, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08251452147006134, |
| "grad_norm": 1.0881967544555664, |
| "learning_rate": 1.9281816197336826e-05, |
| "loss": 1.1217, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08468595624558928, |
| "grad_norm": 0.9472118616104126, |
| "learning_rate": 1.9259986902423054e-05, |
| "loss": 1.1206, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0868573910211172, |
| "grad_norm": 1.0082671642303467, |
| "learning_rate": 1.923815760750928e-05, |
| "loss": 1.1371, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08902882579664513, |
| "grad_norm": 1.0587445497512817, |
| "learning_rate": 1.9216328312595504e-05, |
| "loss": 1.0499, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09120026057217306, |
| "grad_norm": 0.869490385055542, |
| "learning_rate": 1.9194499017681733e-05, |
| "loss": 1.0992, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.093371695347701, |
| "grad_norm": 1.024477243423462, |
| "learning_rate": 1.9172669722767954e-05, |
| "loss": 1.1033, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09554313012322892, |
| "grad_norm": 0.7851136326789856, |
| "learning_rate": 1.9150840427854183e-05, |
| "loss": 1.1261, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09771456489875685, |
| "grad_norm": 1.0576775074005127, |
| "learning_rate": 1.9129011132940408e-05, |
| "loss": 1.0772, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09988599967428478, |
| "grad_norm": 0.9781667590141296, |
| "learning_rate": 1.9107181838026633e-05, |
| "loss": 1.0995, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10205743444981272, |
| "grad_norm": 1.0188452005386353, |
| "learning_rate": 1.9085352543112858e-05, |
| "loss": 1.1518, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10422886922534065, |
| "grad_norm": 1.052553653717041, |
| "learning_rate": 1.9063523248199086e-05, |
| "loss": 1.1514, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10640030400086857, |
| "grad_norm": 0.9977424144744873, |
| "learning_rate": 1.904169395328531e-05, |
| "loss": 1.1605, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1085717387763965, |
| "grad_norm": 0.9981403946876526, |
| "learning_rate": 1.9019864658371536e-05, |
| "loss": 1.0996, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11074317355192444, |
| "grad_norm": 0.9761925339698792, |
| "learning_rate": 1.899803536345776e-05, |
| "loss": 1.048, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11291460832745237, |
| "grad_norm": 0.9788073301315308, |
| "learning_rate": 1.8976206068543986e-05, |
| "loss": 1.1175, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11508604310298029, |
| "grad_norm": 0.9808152914047241, |
| "learning_rate": 1.8954376773630214e-05, |
| "loss": 1.1271, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11725747787850822, |
| "grad_norm": 0.9630600214004517, |
| "learning_rate": 1.893254747871644e-05, |
| "loss": 1.0699, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11942891265403616, |
| "grad_norm": 1.1894537210464478, |
| "learning_rate": 1.8910718183802664e-05, |
| "loss": 1.0589, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12160034742956409, |
| "grad_norm": 0.967409074306488, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 1.1132, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12377178220509201, |
| "grad_norm": 0.9783412218093872, |
| "learning_rate": 1.8867059593975117e-05, |
| "loss": 1.0887, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12594321698061994, |
| "grad_norm": 0.9031311869621277, |
| "learning_rate": 1.8845230299061342e-05, |
| "loss": 1.0836, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12811465175614786, |
| "grad_norm": 1.2321075201034546, |
| "learning_rate": 1.8823401004147567e-05, |
| "loss": 1.084, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1302860865316758, |
| "grad_norm": 0.9194741249084473, |
| "learning_rate": 1.8801571709233792e-05, |
| "loss": 1.0999, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13245752130720373, |
| "grad_norm": 1.2474993467330933, |
| "learning_rate": 1.877974241432002e-05, |
| "loss": 1.0497, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.13462895608273168, |
| "grad_norm": 1.0515743494033813, |
| "learning_rate": 1.8757913119406246e-05, |
| "loss": 1.0983, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.1368003908582596, |
| "grad_norm": 1.2497025728225708, |
| "learning_rate": 1.873608382449247e-05, |
| "loss": 1.118, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.13897182563378752, |
| "grad_norm": 1.1340830326080322, |
| "learning_rate": 1.8714254529578696e-05, |
| "loss": 1.0629, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.14114326040931546, |
| "grad_norm": 1.1488502025604248, |
| "learning_rate": 1.869242523466492e-05, |
| "loss": 1.1057, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14331469518484338, |
| "grad_norm": 1.1718027591705322, |
| "learning_rate": 1.867059593975115e-05, |
| "loss": 1.0895, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.1454861299603713, |
| "grad_norm": 0.8492761850357056, |
| "learning_rate": 1.8648766644837374e-05, |
| "loss": 1.0919, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.14765756473589925, |
| "grad_norm": 1.0783703327178955, |
| "learning_rate": 1.86269373499236e-05, |
| "loss": 1.0929, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.14982899951142717, |
| "grad_norm": 1.0920681953430176, |
| "learning_rate": 1.8605108055009824e-05, |
| "loss": 1.0545, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.15200043428695512, |
| "grad_norm": 1.0387171506881714, |
| "learning_rate": 1.8583278760096052e-05, |
| "loss": 1.0386, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15417186906248304, |
| "grad_norm": 1.2252532243728638, |
| "learning_rate": 1.8561449465182274e-05, |
| "loss": 1.0743, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.15634330383801096, |
| "grad_norm": 1.0585488080978394, |
| "learning_rate": 1.8539620170268502e-05, |
| "loss": 1.1129, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1585147386135389, |
| "grad_norm": 0.9711065292358398, |
| "learning_rate": 1.8517790875354727e-05, |
| "loss": 1.1111, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16068617338906682, |
| "grad_norm": 1.1681485176086426, |
| "learning_rate": 1.8495961580440952e-05, |
| "loss": 1.072, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.16285760816459477, |
| "grad_norm": 0.9218672513961792, |
| "learning_rate": 1.847413228552718e-05, |
| "loss": 1.0748, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1650290429401227, |
| "grad_norm": 0.9746413230895996, |
| "learning_rate": 1.8452302990613406e-05, |
| "loss": 1.0946, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1672004777156506, |
| "grad_norm": 1.1038978099822998, |
| "learning_rate": 1.843047369569963e-05, |
| "loss": 1.1346, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.16937191249117856, |
| "grad_norm": 0.9651903510093689, |
| "learning_rate": 1.8408644400785856e-05, |
| "loss": 1.0662, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.17154334726670648, |
| "grad_norm": 1.1864938735961914, |
| "learning_rate": 1.8386815105872084e-05, |
| "loss": 1.1023, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1737147820422344, |
| "grad_norm": 0.9629665017127991, |
| "learning_rate": 1.8364985810958305e-05, |
| "loss": 1.0739, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17588621681776234, |
| "grad_norm": 1.2128831148147583, |
| "learning_rate": 1.8343156516044534e-05, |
| "loss": 1.0484, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.17805765159329026, |
| "grad_norm": 1.0595309734344482, |
| "learning_rate": 1.832132722113076e-05, |
| "loss": 1.0829, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1802290863688182, |
| "grad_norm": 1.1851084232330322, |
| "learning_rate": 1.8299497926216984e-05, |
| "loss": 1.0791, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.18240052114434613, |
| "grad_norm": 0.9105240702629089, |
| "learning_rate": 1.8277668631303212e-05, |
| "loss": 1.0909, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.18457195591987405, |
| "grad_norm": 1.0232548713684082, |
| "learning_rate": 1.8255839336389437e-05, |
| "loss": 1.0585, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.186743390695402, |
| "grad_norm": 1.0697710514068604, |
| "learning_rate": 1.8234010041475662e-05, |
| "loss": 1.1133, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.18891482547092991, |
| "grad_norm": 0.9465317130088806, |
| "learning_rate": 1.8212180746561887e-05, |
| "loss": 1.0755, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.19108626024645783, |
| "grad_norm": 1.0849310159683228, |
| "learning_rate": 1.8190351451648115e-05, |
| "loss": 1.1369, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.19325769502198578, |
| "grad_norm": 1.0284308195114136, |
| "learning_rate": 1.8168522156734337e-05, |
| "loss": 1.0504, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1954291297975137, |
| "grad_norm": 1.000159740447998, |
| "learning_rate": 1.8146692861820565e-05, |
| "loss": 1.0658, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.19760056457304165, |
| "grad_norm": 1.0055243968963623, |
| "learning_rate": 1.812486356690679e-05, |
| "loss": 1.0563, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.19977199934856957, |
| "grad_norm": 1.1526374816894531, |
| "learning_rate": 1.8103034271993015e-05, |
| "loss": 1.0802, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2019434341240975, |
| "grad_norm": 0.8575794696807861, |
| "learning_rate": 1.808120497707924e-05, |
| "loss": 1.0893, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.20411486889962543, |
| "grad_norm": 0.985564649105072, |
| "learning_rate": 1.805937568216547e-05, |
| "loss": 1.0543, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.20628630367515335, |
| "grad_norm": 1.2791037559509277, |
| "learning_rate": 1.8037546387251694e-05, |
| "loss": 1.0984, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2084577384506813, |
| "grad_norm": 1.1033849716186523, |
| "learning_rate": 1.801571709233792e-05, |
| "loss": 1.0456, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21062917322620922, |
| "grad_norm": 1.1214113235473633, |
| "learning_rate": 1.7993887797424147e-05, |
| "loss": 1.0519, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.21280060800173714, |
| "grad_norm": 1.1759611368179321, |
| "learning_rate": 1.797205850251037e-05, |
| "loss": 1.0646, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2149720427772651, |
| "grad_norm": 1.0244547128677368, |
| "learning_rate": 1.7950229207596597e-05, |
| "loss": 1.0995, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.217143477552793, |
| "grad_norm": 1.134796142578125, |
| "learning_rate": 1.7928399912682822e-05, |
| "loss": 1.0889, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21931491232832093, |
| "grad_norm": 1.0857653617858887, |
| "learning_rate": 1.7906570617769047e-05, |
| "loss": 1.0283, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.22148634710384887, |
| "grad_norm": 1.1252498626708984, |
| "learning_rate": 1.7884741322855272e-05, |
| "loss": 1.0462, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2236577818793768, |
| "grad_norm": 1.0542049407958984, |
| "learning_rate": 1.78629120279415e-05, |
| "loss": 1.0499, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.22582921665490474, |
| "grad_norm": 1.1074199676513672, |
| "learning_rate": 1.7841082733027725e-05, |
| "loss": 1.0394, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.22800065143043266, |
| "grad_norm": 1.0936591625213623, |
| "learning_rate": 1.781925343811395e-05, |
| "loss": 1.0529, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.23017208620596058, |
| "grad_norm": 1.032329797744751, |
| "learning_rate": 1.779742414320018e-05, |
| "loss": 1.0311, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.23234352098148853, |
| "grad_norm": 1.6111783981323242, |
| "learning_rate": 1.77755948482864e-05, |
| "loss": 1.0481, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.23451495575701645, |
| "grad_norm": 1.1454813480377197, |
| "learning_rate": 1.775376555337263e-05, |
| "loss": 1.0231, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.23668639053254437, |
| "grad_norm": 1.0079035758972168, |
| "learning_rate": 1.7731936258458853e-05, |
| "loss": 1.0907, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2388578253080723, |
| "grad_norm": 1.0366803407669067, |
| "learning_rate": 1.771010696354508e-05, |
| "loss": 1.0368, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24102926008360023, |
| "grad_norm": 1.057990550994873, |
| "learning_rate": 1.7688277668631303e-05, |
| "loss": 1.0462, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.24320069485912818, |
| "grad_norm": 0.9940240383148193, |
| "learning_rate": 1.7666448373717532e-05, |
| "loss": 1.089, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.2453721296346561, |
| "grad_norm": 1.0284287929534912, |
| "learning_rate": 1.7644619078803757e-05, |
| "loss": 1.044, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.24754356441018402, |
| "grad_norm": 1.2615009546279907, |
| "learning_rate": 1.7622789783889982e-05, |
| "loss": 1.0978, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.24971499918571197, |
| "grad_norm": 1.1974271535873413, |
| "learning_rate": 1.7600960488976207e-05, |
| "loss": 1.1405, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2518864339612399, |
| "grad_norm": 1.1542342901229858, |
| "learning_rate": 1.757913119406243e-05, |
| "loss": 1.0874, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.25405786873676783, |
| "grad_norm": 1.2193187475204468, |
| "learning_rate": 1.755730189914866e-05, |
| "loss": 1.06, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2562293035122957, |
| "grad_norm": 0.8851369619369507, |
| "learning_rate": 1.7535472604234885e-05, |
| "loss": 1.0793, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2584007382878237, |
| "grad_norm": 0.9751698970794678, |
| "learning_rate": 1.751364330932111e-05, |
| "loss": 1.085, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.2605721730633516, |
| "grad_norm": 1.1651514768600464, |
| "learning_rate": 1.7491814014407335e-05, |
| "loss": 1.0548, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26274360783887957, |
| "grad_norm": 1.0106171369552612, |
| "learning_rate": 1.7469984719493563e-05, |
| "loss": 1.0162, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.26491504261440746, |
| "grad_norm": 1.0185978412628174, |
| "learning_rate": 1.7448155424579788e-05, |
| "loss": 1.0557, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2670864773899354, |
| "grad_norm": 1.0883762836456299, |
| "learning_rate": 1.7426326129666013e-05, |
| "loss": 1.0507, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.26925791216546335, |
| "grad_norm": 1.1618812084197998, |
| "learning_rate": 1.7404496834752238e-05, |
| "loss": 1.0452, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.27142934694099125, |
| "grad_norm": 1.1427685022354126, |
| "learning_rate": 1.7382667539838463e-05, |
| "loss": 1.0772, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.2736007817165192, |
| "grad_norm": 1.1353427171707153, |
| "learning_rate": 1.7360838244924688e-05, |
| "loss": 1.0152, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.27577221649204714, |
| "grad_norm": 1.0538302659988403, |
| "learning_rate": 1.7339008950010917e-05, |
| "loss": 1.0453, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.27794365126757503, |
| "grad_norm": 0.9475343823432922, |
| "learning_rate": 1.731717965509714e-05, |
| "loss": 1.0159, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.280115086043103, |
| "grad_norm": 1.1043903827667236, |
| "learning_rate": 1.7295350360183367e-05, |
| "loss": 1.0767, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2822865208186309, |
| "grad_norm": 1.2597566843032837, |
| "learning_rate": 1.7273521065269595e-05, |
| "loss": 1.0976, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2844579555941588, |
| "grad_norm": 1.2252488136291504, |
| "learning_rate": 1.725169177035582e-05, |
| "loss": 1.1019, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.28662939036968677, |
| "grad_norm": 1.4177309274673462, |
| "learning_rate": 1.7229862475442045e-05, |
| "loss": 1.1039, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2888008251452147, |
| "grad_norm": 1.2762172222137451, |
| "learning_rate": 1.720803318052827e-05, |
| "loss": 1.0195, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.2909722599207426, |
| "grad_norm": 1.3187838792800903, |
| "learning_rate": 1.7186203885614495e-05, |
| "loss": 1.0851, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.29314369469627055, |
| "grad_norm": 1.2178953886032104, |
| "learning_rate": 1.716437459070072e-05, |
| "loss": 1.0503, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.2953151294717985, |
| "grad_norm": 0.9904911518096924, |
| "learning_rate": 1.7142545295786948e-05, |
| "loss": 1.0683, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.29748656424732645, |
| "grad_norm": 0.9594365358352661, |
| "learning_rate": 1.7120716000873173e-05, |
| "loss": 1.052, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.29965799902285434, |
| "grad_norm": 1.218839406967163, |
| "learning_rate": 1.7098886705959398e-05, |
| "loss": 1.0563, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.3018294337983823, |
| "grad_norm": 1.2965632677078247, |
| "learning_rate": 1.7077057411045626e-05, |
| "loss": 1.1087, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.30400086857391023, |
| "grad_norm": 1.0554800033569336, |
| "learning_rate": 1.705522811613185e-05, |
| "loss": 1.036, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3061723033494381, |
| "grad_norm": 1.1262216567993164, |
| "learning_rate": 1.7033398821218076e-05, |
| "loss": 1.0489, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.30834373812496607, |
| "grad_norm": 1.044252872467041, |
| "learning_rate": 1.70115695263043e-05, |
| "loss": 1.048, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.310515172900494, |
| "grad_norm": 1.4321969747543335, |
| "learning_rate": 1.6989740231390526e-05, |
| "loss": 1.0712, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.3126866076760219, |
| "grad_norm": 0.9649491310119629, |
| "learning_rate": 1.696791093647675e-05, |
| "loss": 1.0366, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.31485804245154986, |
| "grad_norm": 1.0629323720932007, |
| "learning_rate": 1.694608164156298e-05, |
| "loss": 1.0527, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3170294772270778, |
| "grad_norm": 1.1887277364730835, |
| "learning_rate": 1.6924252346649205e-05, |
| "loss": 1.0597, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3192009120026057, |
| "grad_norm": 1.0008701086044312, |
| "learning_rate": 1.690242305173543e-05, |
| "loss": 1.0733, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.32137234677813364, |
| "grad_norm": 1.1184202432632446, |
| "learning_rate": 1.6880593756821658e-05, |
| "loss": 1.0093, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3235437815536616, |
| "grad_norm": 1.2822941541671753, |
| "learning_rate": 1.6858764461907883e-05, |
| "loss": 1.0505, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.32571521632918954, |
| "grad_norm": 1.365919828414917, |
| "learning_rate": 1.6836935166994108e-05, |
| "loss": 1.0616, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32788665110471743, |
| "grad_norm": 1.4940375089645386, |
| "learning_rate": 1.6815105872080333e-05, |
| "loss": 1.0189, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3300580858802454, |
| "grad_norm": 1.443363070487976, |
| "learning_rate": 1.679327657716656e-05, |
| "loss": 1.0591, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.3322295206557733, |
| "grad_norm": 1.0023658275604248, |
| "learning_rate": 1.6771447282252783e-05, |
| "loss": 1.0784, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3344009554313012, |
| "grad_norm": 1.0569523572921753, |
| "learning_rate": 1.674961798733901e-05, |
| "loss": 1.0167, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.33657239020682916, |
| "grad_norm": 1.5533829927444458, |
| "learning_rate": 1.6727788692425236e-05, |
| "loss": 1.059, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3387438249823571, |
| "grad_norm": 1.2175540924072266, |
| "learning_rate": 1.670595939751146e-05, |
| "loss": 1.0566, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.340915259757885, |
| "grad_norm": 1.2406116724014282, |
| "learning_rate": 1.6684130102597686e-05, |
| "loss": 1.0284, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.34308669453341295, |
| "grad_norm": 1.1116441488265991, |
| "learning_rate": 1.6662300807683914e-05, |
| "loss": 1.0686, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.3452581293089409, |
| "grad_norm": 0.9541231989860535, |
| "learning_rate": 1.664047151277014e-05, |
| "loss": 1.0569, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3474295640844688, |
| "grad_norm": 0.9048693180084229, |
| "learning_rate": 1.6618642217856364e-05, |
| "loss": 1.0503, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.34960099885999674, |
| "grad_norm": 1.2782031297683716, |
| "learning_rate": 1.6596812922942593e-05, |
| "loss": 1.0462, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.3517724336355247, |
| "grad_norm": 1.0912036895751953, |
| "learning_rate": 1.6574983628028814e-05, |
| "loss": 1.0488, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3539438684110526, |
| "grad_norm": 1.2449527978897095, |
| "learning_rate": 1.6553154333115043e-05, |
| "loss": 1.0283, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.3561153031865805, |
| "grad_norm": 1.367113471031189, |
| "learning_rate": 1.6531325038201268e-05, |
| "loss": 1.0687, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.35828673796210847, |
| "grad_norm": 1.4987077713012695, |
| "learning_rate": 1.6509495743287493e-05, |
| "loss": 1.0419, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3604581727376364, |
| "grad_norm": 1.0947941541671753, |
| "learning_rate": 1.6487666448373718e-05, |
| "loss": 1.0666, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3626296075131643, |
| "grad_norm": 1.199379801750183, |
| "learning_rate": 1.6465837153459946e-05, |
| "loss": 1.0062, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.36480104228869226, |
| "grad_norm": 1.281281590461731, |
| "learning_rate": 1.6444007858546168e-05, |
| "loss": 1.0054, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 1.00531804561615, |
| "learning_rate": 1.6422178563632396e-05, |
| "loss": 1.0951, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3691439118397481, |
| "grad_norm": 1.2501758337020874, |
| "learning_rate": 1.6400349268718624e-05, |
| "loss": 1.0289, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.37131534661527604, |
| "grad_norm": 1.1288474798202515, |
| "learning_rate": 1.6378519973804846e-05, |
| "loss": 1.0539, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.373486781390804, |
| "grad_norm": 1.2077093124389648, |
| "learning_rate": 1.6356690678891074e-05, |
| "loss": 1.0112, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.3756582161663319, |
| "grad_norm": 1.0771955251693726, |
| "learning_rate": 1.63348613839773e-05, |
| "loss": 1.0527, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.37782965094185983, |
| "grad_norm": 1.5062224864959717, |
| "learning_rate": 1.6313032089063524e-05, |
| "loss": 1.0023, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3800010857173878, |
| "grad_norm": 1.4642319679260254, |
| "learning_rate": 1.629120279414975e-05, |
| "loss": 1.0253, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.38217252049291567, |
| "grad_norm": 1.17564058303833, |
| "learning_rate": 1.6269373499235978e-05, |
| "loss": 1.0378, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.3843439552684436, |
| "grad_norm": 1.155928134918213, |
| "learning_rate": 1.6247544204322203e-05, |
| "loss": 1.0797, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.38651539004397156, |
| "grad_norm": 1.161272406578064, |
| "learning_rate": 1.6225714909408428e-05, |
| "loss": 1.0645, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.3886868248194995, |
| "grad_norm": 1.1190975904464722, |
| "learning_rate": 1.6203885614494653e-05, |
| "loss": 0.9895, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3908582595950274, |
| "grad_norm": 1.0364742279052734, |
| "learning_rate": 1.6182056319580877e-05, |
| "loss": 0.993, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.39302969437055535, |
| "grad_norm": 0.9722704887390137, |
| "learning_rate": 1.6160227024667106e-05, |
| "loss": 1.074, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.3952011291460833, |
| "grad_norm": 1.196349024772644, |
| "learning_rate": 1.613839772975333e-05, |
| "loss": 1.0192, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.3973725639216112, |
| "grad_norm": 1.2496604919433594, |
| "learning_rate": 1.6116568434839556e-05, |
| "loss": 1.0539, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.39954399869713914, |
| "grad_norm": 1.273461937904358, |
| "learning_rate": 1.609473913992578e-05, |
| "loss": 1.0283, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4017154334726671, |
| "grad_norm": 1.267354965209961, |
| "learning_rate": 1.607290984501201e-05, |
| "loss": 1.0265, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.403886868248195, |
| "grad_norm": 1.1388341188430786, |
| "learning_rate": 1.6051080550098234e-05, |
| "loss": 1.0998, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.4060583030237229, |
| "grad_norm": 1.7409948110580444, |
| "learning_rate": 1.602925125518446e-05, |
| "loss": 1.0246, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.40822973779925087, |
| "grad_norm": 1.0280303955078125, |
| "learning_rate": 1.6007421960270684e-05, |
| "loss": 1.075, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.41040117257477876, |
| "grad_norm": 1.093042016029358, |
| "learning_rate": 1.598559266535691e-05, |
| "loss": 1.018, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4125726073503067, |
| "grad_norm": 0.9621986746788025, |
| "learning_rate": 1.5963763370443134e-05, |
| "loss": 1.1104, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.41474404212583466, |
| "grad_norm": 1.0159006118774414, |
| "learning_rate": 1.5941934075529362e-05, |
| "loss": 1.0231, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.4169154769013626, |
| "grad_norm": 1.2041517496109009, |
| "learning_rate": 1.5920104780615587e-05, |
| "loss": 1.0426, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4190869116768905, |
| "grad_norm": 1.2012951374053955, |
| "learning_rate": 1.5898275485701812e-05, |
| "loss": 1.0376, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.42125834645241844, |
| "grad_norm": 1.207979679107666, |
| "learning_rate": 1.587644619078804e-05, |
| "loss": 1.0054, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4234297812279464, |
| "grad_norm": 1.1251835823059082, |
| "learning_rate": 1.5854616895874266e-05, |
| "loss": 1.068, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4256012160034743, |
| "grad_norm": 1.2626091241836548, |
| "learning_rate": 1.583278760096049e-05, |
| "loss": 1.0705, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.42777265077900223, |
| "grad_norm": 1.203305959701538, |
| "learning_rate": 1.5810958306046716e-05, |
| "loss": 1.0319, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4299440855545302, |
| "grad_norm": 1.3643816709518433, |
| "learning_rate": 1.578912901113294e-05, |
| "loss": 1.0383, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.43211552033005807, |
| "grad_norm": 1.3260042667388916, |
| "learning_rate": 1.5767299716219166e-05, |
| "loss": 1.0949, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.434286955105586, |
| "grad_norm": 1.0160613059997559, |
| "learning_rate": 1.5745470421305394e-05, |
| "loss": 1.0717, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.43645838988111396, |
| "grad_norm": 0.9759429693222046, |
| "learning_rate": 1.572364112639162e-05, |
| "loss": 1.023, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.43862982465664185, |
| "grad_norm": 1.268486738204956, |
| "learning_rate": 1.5701811831477844e-05, |
| "loss": 0.986, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.4408012594321698, |
| "grad_norm": 1.326611876487732, |
| "learning_rate": 1.5679982536564072e-05, |
| "loss": 1.0641, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.44297269420769775, |
| "grad_norm": 1.638113021850586, |
| "learning_rate": 1.5658153241650297e-05, |
| "loss": 1.0302, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.44514412898322564, |
| "grad_norm": 1.5037381649017334, |
| "learning_rate": 1.5636323946736522e-05, |
| "loss": 1.0291, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.4473155637587536, |
| "grad_norm": 1.1574699878692627, |
| "learning_rate": 1.5614494651822747e-05, |
| "loss": 1.0268, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.44948699853428153, |
| "grad_norm": 1.1230093240737915, |
| "learning_rate": 1.5592665356908972e-05, |
| "loss": 1.0471, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.4516584333098095, |
| "grad_norm": 1.134092092514038, |
| "learning_rate": 1.5570836061995197e-05, |
| "loss": 1.045, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4538298680853374, |
| "grad_norm": 1.4253817796707153, |
| "learning_rate": 1.5549006767081425e-05, |
| "loss": 1.0575, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4560013028608653, |
| "grad_norm": 1.322679877281189, |
| "learning_rate": 1.552717747216765e-05, |
| "loss": 1.005, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.45817273763639327, |
| "grad_norm": 1.190661072731018, |
| "learning_rate": 1.5505348177253875e-05, |
| "loss": 1.0392, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.46034417241192116, |
| "grad_norm": 1.10509192943573, |
| "learning_rate": 1.54835188823401e-05, |
| "loss": 1.046, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.4625156071874491, |
| "grad_norm": 1.305440902709961, |
| "learning_rate": 1.546168958742633e-05, |
| "loss": 1.067, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.46468704196297705, |
| "grad_norm": 0.9714760184288025, |
| "learning_rate": 1.5439860292512554e-05, |
| "loss": 1.0648, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.46685847673850495, |
| "grad_norm": 1.249341368675232, |
| "learning_rate": 1.541803099759878e-05, |
| "loss": 1.0326, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.4690299115140329, |
| "grad_norm": 1.182078242301941, |
| "learning_rate": 1.5396201702685004e-05, |
| "loss": 0.9649, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.47120134628956084, |
| "grad_norm": 1.1989095211029053, |
| "learning_rate": 1.537437240777123e-05, |
| "loss": 1.0324, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.47337278106508873, |
| "grad_norm": 1.1520359516143799, |
| "learning_rate": 1.5352543112857457e-05, |
| "loss": 1.0453, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4755442158406167, |
| "grad_norm": 0.9840512871742249, |
| "learning_rate": 1.5330713817943682e-05, |
| "loss": 1.028, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4777156506161446, |
| "grad_norm": 1.2729812860488892, |
| "learning_rate": 1.5308884523029907e-05, |
| "loss": 1.0866, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.4798870853916726, |
| "grad_norm": 1.2075546979904175, |
| "learning_rate": 1.5287055228116132e-05, |
| "loss": 1.0633, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.48205852016720047, |
| "grad_norm": 1.6592689752578735, |
| "learning_rate": 1.526522593320236e-05, |
| "loss": 1.0107, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4842299549427284, |
| "grad_norm": 1.2771036624908447, |
| "learning_rate": 1.5243396638288585e-05, |
| "loss": 0.9855, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.48640138971825636, |
| "grad_norm": 1.4246838092803955, |
| "learning_rate": 1.522156734337481e-05, |
| "loss": 1.0405, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.48857282449378425, |
| "grad_norm": 1.1746619939804077, |
| "learning_rate": 1.5199738048461037e-05, |
| "loss": 0.9615, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.4907442592693122, |
| "grad_norm": 0.9854568243026733, |
| "learning_rate": 1.5177908753547262e-05, |
| "loss": 1.0748, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.49291569404484015, |
| "grad_norm": 1.2437796592712402, |
| "learning_rate": 1.5156079458633489e-05, |
| "loss": 1.0428, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.49508712882036804, |
| "grad_norm": 1.4417718648910522, |
| "learning_rate": 1.5134250163719712e-05, |
| "loss": 1.0339, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.497258563595896, |
| "grad_norm": 1.5475140810012817, |
| "learning_rate": 1.5112420868805939e-05, |
| "loss": 1.0054, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.49942999837142393, |
| "grad_norm": 1.2441282272338867, |
| "learning_rate": 1.5090591573892164e-05, |
| "loss": 1.0406, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5016014331469518, |
| "grad_norm": 1.3758796453475952, |
| "learning_rate": 1.506876227897839e-05, |
| "loss": 1.0709, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5037728679224798, |
| "grad_norm": 1.412845492362976, |
| "learning_rate": 1.5046932984064615e-05, |
| "loss": 1.0111, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5059443026980077, |
| "grad_norm": 1.2830110788345337, |
| "learning_rate": 1.5025103689150842e-05, |
| "loss": 1.0142, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5081157374735357, |
| "grad_norm": 1.1173603534698486, |
| "learning_rate": 1.5003274394237068e-05, |
| "loss": 1.0425, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5102871722490636, |
| "grad_norm": 1.3206751346588135, |
| "learning_rate": 1.4981445099323293e-05, |
| "loss": 1.0147, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5124586070245915, |
| "grad_norm": 1.1469355821609497, |
| "learning_rate": 1.495961580440952e-05, |
| "loss": 1.1042, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5146300418001194, |
| "grad_norm": 1.5979527235031128, |
| "learning_rate": 1.4937786509495743e-05, |
| "loss": 1.0293, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5168014765756473, |
| "grad_norm": 1.1847728490829468, |
| "learning_rate": 1.491595721458197e-05, |
| "loss": 1.0589, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5189729113511753, |
| "grad_norm": 1.1340694427490234, |
| "learning_rate": 1.4894127919668195e-05, |
| "loss": 1.008, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5211443461267032, |
| "grad_norm": 1.3193624019622803, |
| "learning_rate": 1.4872298624754422e-05, |
| "loss": 1.0455, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5233157809022312, |
| "grad_norm": 1.3973023891448975, |
| "learning_rate": 1.4850469329840647e-05, |
| "loss": 1.0455, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5254872156777591, |
| "grad_norm": 1.4529467821121216, |
| "learning_rate": 1.4828640034926873e-05, |
| "loss": 1.0726, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.527658650453287, |
| "grad_norm": 1.2769255638122559, |
| "learning_rate": 1.4806810740013098e-05, |
| "loss": 1.0646, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5298300852288149, |
| "grad_norm": 1.4367311000823975, |
| "learning_rate": 1.4784981445099325e-05, |
| "loss": 0.9821, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5320015200043429, |
| "grad_norm": 1.6156344413757324, |
| "learning_rate": 1.4763152150185552e-05, |
| "loss": 1.0581, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5341729547798708, |
| "grad_norm": 1.093217372894287, |
| "learning_rate": 1.4741322855271775e-05, |
| "loss": 1.0125, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5363443895553988, |
| "grad_norm": 1.3095054626464844, |
| "learning_rate": 1.4719493560358002e-05, |
| "loss": 1.0268, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5385158243309267, |
| "grad_norm": 1.3275405168533325, |
| "learning_rate": 1.4697664265444227e-05, |
| "loss": 1.036, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5406872591064545, |
| "grad_norm": 1.1585111618041992, |
| "learning_rate": 1.4675834970530453e-05, |
| "loss": 1.0893, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5428586938819825, |
| "grad_norm": 1.1334049701690674, |
| "learning_rate": 1.4654005675616678e-05, |
| "loss": 1.0356, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5450301286575104, |
| "grad_norm": 1.1498132944107056, |
| "learning_rate": 1.4632176380702905e-05, |
| "loss": 1.0297, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5472015634330384, |
| "grad_norm": 1.3892987966537476, |
| "learning_rate": 1.461034708578913e-05, |
| "loss": 0.9885, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5493729982085663, |
| "grad_norm": 1.1444848775863647, |
| "learning_rate": 1.4588517790875357e-05, |
| "loss": 1.0221, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5515444329840943, |
| "grad_norm": 1.0999592542648315, |
| "learning_rate": 1.456668849596158e-05, |
| "loss": 1.0053, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5537158677596221, |
| "grad_norm": 1.2366653680801392, |
| "learning_rate": 1.4544859201047807e-05, |
| "loss": 0.9872, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5558873025351501, |
| "grad_norm": 1.066278100013733, |
| "learning_rate": 1.4523029906134035e-05, |
| "loss": 1.0833, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.558058737310678, |
| "grad_norm": 1.418614149093628, |
| "learning_rate": 1.4501200611220258e-05, |
| "loss": 1.0316, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.560230172086206, |
| "grad_norm": 1.2488312721252441, |
| "learning_rate": 1.4479371316306485e-05, |
| "loss": 1.0261, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5624016068617339, |
| "grad_norm": 1.1262556314468384, |
| "learning_rate": 1.445754202139271e-05, |
| "loss": 0.9926, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5645730416372619, |
| "grad_norm": 1.2547680139541626, |
| "learning_rate": 1.4435712726478936e-05, |
| "loss": 1.0211, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5667444764127898, |
| "grad_norm": 1.3836477994918823, |
| "learning_rate": 1.4413883431565161e-05, |
| "loss": 0.9906, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5689159111883176, |
| "grad_norm": 1.0498002767562866, |
| "learning_rate": 1.4392054136651388e-05, |
| "loss": 0.9824, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5710873459638456, |
| "grad_norm": 0.9665150046348572, |
| "learning_rate": 1.4370224841737611e-05, |
| "loss": 1.0113, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5732587807393735, |
| "grad_norm": 1.2889072895050049, |
| "learning_rate": 1.434839554682384e-05, |
| "loss": 0.9909, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5754302155149015, |
| "grad_norm": 1.5180598497390747, |
| "learning_rate": 1.4326566251910063e-05, |
| "loss": 1.0092, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5776016502904294, |
| "grad_norm": 1.6388850212097168, |
| "learning_rate": 1.430473695699629e-05, |
| "loss": 1.0116, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5797730850659574, |
| "grad_norm": 1.2516218423843384, |
| "learning_rate": 1.4282907662082516e-05, |
| "loss": 0.9775, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5819445198414852, |
| "grad_norm": 1.1634091138839722, |
| "learning_rate": 1.4261078367168741e-05, |
| "loss": 0.9898, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.5841159546170132, |
| "grad_norm": 1.1283944845199585, |
| "learning_rate": 1.4239249072254968e-05, |
| "loss": 1.0265, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5862873893925411, |
| "grad_norm": 1.3887890577316284, |
| "learning_rate": 1.4217419777341193e-05, |
| "loss": 1.0433, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.588458824168069, |
| "grad_norm": 1.2213870286941528, |
| "learning_rate": 1.419559048242742e-05, |
| "loss": 1.0116, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.590630258943597, |
| "grad_norm": 1.2879663705825806, |
| "learning_rate": 1.4173761187513645e-05, |
| "loss": 0.9866, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.592801693719125, |
| "grad_norm": 1.3769855499267578, |
| "learning_rate": 1.4151931892599871e-05, |
| "loss": 0.9735, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.5949731284946529, |
| "grad_norm": 1.347123146057129, |
| "learning_rate": 1.4130102597686095e-05, |
| "loss": 1.0169, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.5971445632701807, |
| "grad_norm": 1.093166708946228, |
| "learning_rate": 1.4108273302772321e-05, |
| "loss": 1.0288, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.5993159980457087, |
| "grad_norm": 1.3573272228240967, |
| "learning_rate": 1.4086444007858546e-05, |
| "loss": 1.0042, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6014874328212366, |
| "grad_norm": 1.400972604751587, |
| "learning_rate": 1.4064614712944773e-05, |
| "loss": 1.0181, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6036588675967646, |
| "grad_norm": 1.1371185779571533, |
| "learning_rate": 1.4042785418031e-05, |
| "loss": 1.0504, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6058303023722925, |
| "grad_norm": 1.31002676486969, |
| "learning_rate": 1.4020956123117225e-05, |
| "loss": 1.0615, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6080017371478205, |
| "grad_norm": 1.557403326034546, |
| "learning_rate": 1.3999126828203451e-05, |
| "loss": 1.0286, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6101731719233483, |
| "grad_norm": 1.2506225109100342, |
| "learning_rate": 1.3977297533289676e-05, |
| "loss": 1.0316, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6123446066988762, |
| "grad_norm": 1.2750262022018433, |
| "learning_rate": 1.3955468238375903e-05, |
| "loss": 1.0691, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6145160414744042, |
| "grad_norm": 1.3119608163833618, |
| "learning_rate": 1.3933638943462126e-05, |
| "loss": 1.0107, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6166874762499321, |
| "grad_norm": 1.269987940788269, |
| "learning_rate": 1.3911809648548353e-05, |
| "loss": 1.0298, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6188589110254601, |
| "grad_norm": 1.1371833086013794, |
| "learning_rate": 1.3889980353634578e-05, |
| "loss": 1.0541, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.621030345800988, |
| "grad_norm": 1.2296518087387085, |
| "learning_rate": 1.3868151058720804e-05, |
| "loss": 1.0225, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.623201780576516, |
| "grad_norm": 1.5416007041931152, |
| "learning_rate": 1.384632176380703e-05, |
| "loss": 0.9838, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6253732153520438, |
| "grad_norm": 1.2770878076553345, |
| "learning_rate": 1.3824492468893256e-05, |
| "loss": 0.9917, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6275446501275718, |
| "grad_norm": 1.3633027076721191, |
| "learning_rate": 1.3802663173979483e-05, |
| "loss": 1.0636, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6297160849030997, |
| "grad_norm": 1.2924447059631348, |
| "learning_rate": 1.3780833879065708e-05, |
| "loss": 1.0151, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6318875196786277, |
| "grad_norm": 1.3453025817871094, |
| "learning_rate": 1.3759004584151934e-05, |
| "loss": 1.0053, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6340589544541556, |
| "grad_norm": 1.497462511062622, |
| "learning_rate": 1.3737175289238158e-05, |
| "loss": 0.9936, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6362303892296836, |
| "grad_norm": 1.0469037294387817, |
| "learning_rate": 1.3715345994324384e-05, |
| "loss": 1.0465, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6384018240052114, |
| "grad_norm": 1.4272680282592773, |
| "learning_rate": 1.369351669941061e-05, |
| "loss": 1.0634, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6405732587807393, |
| "grad_norm": 1.065047264099121, |
| "learning_rate": 1.3671687404496836e-05, |
| "loss": 1.0464, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6427446935562673, |
| "grad_norm": 1.3233064413070679, |
| "learning_rate": 1.3649858109583061e-05, |
| "loss": 1.07, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6449161283317952, |
| "grad_norm": 1.792734980583191, |
| "learning_rate": 1.3628028814669288e-05, |
| "loss": 0.9722, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6470875631073232, |
| "grad_norm": 1.7977020740509033, |
| "learning_rate": 1.3606199519755514e-05, |
| "loss": 0.9811, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6492589978828511, |
| "grad_norm": 1.2973439693450928, |
| "learning_rate": 1.358437022484174e-05, |
| "loss": 0.9958, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6514304326583791, |
| "grad_norm": 1.249764323234558, |
| "learning_rate": 1.3562540929927966e-05, |
| "loss": 1.0675, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6536018674339069, |
| "grad_norm": 1.343056559562683, |
| "learning_rate": 1.354071163501419e-05, |
| "loss": 1.0493, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6557733022094349, |
| "grad_norm": 1.6171714067459106, |
| "learning_rate": 1.3518882340100416e-05, |
| "loss": 1.044, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6579447369849628, |
| "grad_norm": 1.2323534488677979, |
| "learning_rate": 1.3497053045186641e-05, |
| "loss": 1.0386, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6601161717604908, |
| "grad_norm": 1.1134217977523804, |
| "learning_rate": 1.3475223750272868e-05, |
| "loss": 1.0225, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.6622876065360187, |
| "grad_norm": 1.6027779579162598, |
| "learning_rate": 1.3453394455359093e-05, |
| "loss": 1.0195, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6644590413115466, |
| "grad_norm": 1.3403127193450928, |
| "learning_rate": 1.343156516044532e-05, |
| "loss": 0.9529, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6666304760870745, |
| "grad_norm": 1.3543404340744019, |
| "learning_rate": 1.3409735865531544e-05, |
| "loss": 0.9783, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6688019108626024, |
| "grad_norm": 1.1751652956008911, |
| "learning_rate": 1.3387906570617771e-05, |
| "loss": 1.0199, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6709733456381304, |
| "grad_norm": 1.44953453540802, |
| "learning_rate": 1.3366077275703998e-05, |
| "loss": 1.04, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6731447804136583, |
| "grad_norm": 1.2177844047546387, |
| "learning_rate": 1.334424798079022e-05, |
| "loss": 1.0228, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.6753162151891863, |
| "grad_norm": 1.2051985263824463, |
| "learning_rate": 1.3322418685876447e-05, |
| "loss": 0.9834, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.6774876499647142, |
| "grad_norm": 1.249619483947754, |
| "learning_rate": 1.3300589390962672e-05, |
| "loss": 1.0089, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6796590847402422, |
| "grad_norm": 1.3662210702896118, |
| "learning_rate": 1.3278760096048899e-05, |
| "loss": 1.0312, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.68183051951577, |
| "grad_norm": 1.2683398723602295, |
| "learning_rate": 1.3256930801135124e-05, |
| "loss": 0.9806, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.684001954291298, |
| "grad_norm": 1.3069689273834229, |
| "learning_rate": 1.323510150622135e-05, |
| "loss": 1.009, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6861733890668259, |
| "grad_norm": 1.4314109086990356, |
| "learning_rate": 1.3213272211307576e-05, |
| "loss": 0.9918, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6883448238423538, |
| "grad_norm": 1.2950971126556396, |
| "learning_rate": 1.3191442916393802e-05, |
| "loss": 1.0356, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.6905162586178818, |
| "grad_norm": 1.4553663730621338, |
| "learning_rate": 1.3169613621480026e-05, |
| "loss": 1.0255, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.6926876933934097, |
| "grad_norm": 1.439324140548706, |
| "learning_rate": 1.3147784326566252e-05, |
| "loss": 1.0561, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.6948591281689376, |
| "grad_norm": 1.1153829097747803, |
| "learning_rate": 1.3125955031652479e-05, |
| "loss": 1.017, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.6970305629444655, |
| "grad_norm": 1.1670260429382324, |
| "learning_rate": 1.3104125736738704e-05, |
| "loss": 1.0096, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.6992019977199935, |
| "grad_norm": 1.400228500366211, |
| "learning_rate": 1.308229644182493e-05, |
| "loss": 1.0309, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.7013734324955214, |
| "grad_norm": 1.1673344373703003, |
| "learning_rate": 1.3060467146911156e-05, |
| "loss": 1.0515, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.7035448672710494, |
| "grad_norm": 1.152686357498169, |
| "learning_rate": 1.3038637851997382e-05, |
| "loss": 0.9687, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.7057163020465773, |
| "grad_norm": 1.3322280645370483, |
| "learning_rate": 1.3016808557083607e-05, |
| "loss": 1.0255, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.7078877368221052, |
| "grad_norm": 1.0270700454711914, |
| "learning_rate": 1.2994979262169834e-05, |
| "loss": 1.0124, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.7100591715976331, |
| "grad_norm": 1.0481797456741333, |
| "learning_rate": 1.2973149967256057e-05, |
| "loss": 1.0298, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.712230606373161, |
| "grad_norm": 1.358763337135315, |
| "learning_rate": 1.2951320672342284e-05, |
| "loss": 1.0009, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.714402041148689, |
| "grad_norm": 1.3017981052398682, |
| "learning_rate": 1.2929491377428509e-05, |
| "loss": 1.0362, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.7165734759242169, |
| "grad_norm": 1.4643291234970093, |
| "learning_rate": 1.2907662082514736e-05, |
| "loss": 0.96, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7187449106997449, |
| "grad_norm": 1.158682942390442, |
| "learning_rate": 1.2885832787600962e-05, |
| "loss": 0.9807, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.7209163454752728, |
| "grad_norm": 1.2945632934570312, |
| "learning_rate": 1.2864003492687187e-05, |
| "loss": 0.977, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.7230877802508007, |
| "grad_norm": 1.6654890775680542, |
| "learning_rate": 1.2842174197773414e-05, |
| "loss": 1.0128, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7252592150263286, |
| "grad_norm": 1.2067387104034424, |
| "learning_rate": 1.2820344902859639e-05, |
| "loss": 1.0261, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7274306498018566, |
| "grad_norm": 1.4484736919403076, |
| "learning_rate": 1.2798515607945866e-05, |
| "loss": 1.0055, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7296020845773845, |
| "grad_norm": 1.428499698638916, |
| "learning_rate": 1.2776686313032089e-05, |
| "loss": 1.0584, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7317735193529125, |
| "grad_norm": 1.454953670501709, |
| "learning_rate": 1.2754857018118315e-05, |
| "loss": 1.0327, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 1.1868793964385986, |
| "learning_rate": 1.273302772320454e-05, |
| "loss": 1.019, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7361163889039682, |
| "grad_norm": 1.2822529077529907, |
| "learning_rate": 1.2711198428290767e-05, |
| "loss": 0.9966, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7382878236794962, |
| "grad_norm": 1.2787412405014038, |
| "learning_rate": 1.2689369133376992e-05, |
| "loss": 1.0473, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7404592584550241, |
| "grad_norm": 1.3772400617599487, |
| "learning_rate": 1.2667539838463219e-05, |
| "loss": 0.9689, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7426306932305521, |
| "grad_norm": 1.2161903381347656, |
| "learning_rate": 1.2645710543549445e-05, |
| "loss": 1.0082, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.74480212800608, |
| "grad_norm": 1.489033579826355, |
| "learning_rate": 1.262388124863567e-05, |
| "loss": 1.0139, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.746973562781608, |
| "grad_norm": 1.3982605934143066, |
| "learning_rate": 1.2602051953721897e-05, |
| "loss": 0.9921, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7491449975571359, |
| "grad_norm": 1.378158450126648, |
| "learning_rate": 1.258022265880812e-05, |
| "loss": 0.9678, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7513164323326638, |
| "grad_norm": 1.4947155714035034, |
| "learning_rate": 1.2558393363894347e-05, |
| "loss": 1.0051, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7534878671081917, |
| "grad_norm": 1.1531239748001099, |
| "learning_rate": 1.2536564068980572e-05, |
| "loss": 1.1186, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7556593018837197, |
| "grad_norm": 1.38021981716156, |
| "learning_rate": 1.2514734774066799e-05, |
| "loss": 0.9771, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.7578307366592476, |
| "grad_norm": 1.2459088563919067, |
| "learning_rate": 1.2492905479153024e-05, |
| "loss": 1.0216, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7600021714347756, |
| "grad_norm": 2.1082191467285156, |
| "learning_rate": 1.247107618423925e-05, |
| "loss": 0.9956, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7621736062103035, |
| "grad_norm": 1.1670981645584106, |
| "learning_rate": 1.2449246889325475e-05, |
| "loss": 1.0518, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7643450409858313, |
| "grad_norm": 1.453430414199829, |
| "learning_rate": 1.2427417594411702e-05, |
| "loss": 0.986, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7665164757613593, |
| "grad_norm": 0.9967979788780212, |
| "learning_rate": 1.2405588299497929e-05, |
| "loss": 1.0468, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7686879105368872, |
| "grad_norm": 1.5002816915512085, |
| "learning_rate": 1.2383759004584152e-05, |
| "loss": 1.0078, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7708593453124152, |
| "grad_norm": 1.4501177072525024, |
| "learning_rate": 1.236192970967038e-05, |
| "loss": 0.9615, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.7730307800879431, |
| "grad_norm": 1.377883791923523, |
| "learning_rate": 1.2340100414756604e-05, |
| "loss": 0.9432, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.7752022148634711, |
| "grad_norm": 1.2856801748275757, |
| "learning_rate": 1.231827111984283e-05, |
| "loss": 0.9958, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.777373649638999, |
| "grad_norm": 1.2119390964508057, |
| "learning_rate": 1.2296441824929055e-05, |
| "loss": 1.0029, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.7795450844145269, |
| "grad_norm": 1.4396144151687622, |
| "learning_rate": 1.2274612530015282e-05, |
| "loss": 1.0159, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.7817165191900548, |
| "grad_norm": 1.6045223474502563, |
| "learning_rate": 1.2252783235101507e-05, |
| "loss": 0.9767, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.7838879539655828, |
| "grad_norm": 1.2426387071609497, |
| "learning_rate": 1.2230953940187733e-05, |
| "loss": 1.0287, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.7860593887411107, |
| "grad_norm": 1.2435184717178345, |
| "learning_rate": 1.2209124645273957e-05, |
| "loss": 1.0052, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.7882308235166386, |
| "grad_norm": 1.6939178705215454, |
| "learning_rate": 1.2187295350360185e-05, |
| "loss": 1.0216, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.7904022582921666, |
| "grad_norm": 1.1843641996383667, |
| "learning_rate": 1.2165466055446412e-05, |
| "loss": 0.9738, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.7925736930676944, |
| "grad_norm": 1.3802050352096558, |
| "learning_rate": 1.2143636760532635e-05, |
| "loss": 0.9216, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.7947451278432224, |
| "grad_norm": 1.2471121549606323, |
| "learning_rate": 1.2121807465618862e-05, |
| "loss": 0.977, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.7969165626187503, |
| "grad_norm": 1.3608779907226562, |
| "learning_rate": 1.2099978170705087e-05, |
| "loss": 1.009, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.7990879973942783, |
| "grad_norm": 1.4472932815551758, |
| "learning_rate": 1.2078148875791313e-05, |
| "loss": 0.9946, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.8012594321698062, |
| "grad_norm": 1.7036590576171875, |
| "learning_rate": 1.2056319580877538e-05, |
| "loss": 0.999, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.8034308669453342, |
| "grad_norm": 1.258748173713684, |
| "learning_rate": 1.2034490285963765e-05, |
| "loss": 0.9968, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8056023017208621, |
| "grad_norm": 1.191994547843933, |
| "learning_rate": 1.2012660991049988e-05, |
| "loss": 0.9941, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.80777373649639, |
| "grad_norm": 1.9393503665924072, |
| "learning_rate": 1.1990831696136217e-05, |
| "loss": 1.0167, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8099451712719179, |
| "grad_norm": 1.5484780073165894, |
| "learning_rate": 1.196900240122244e-05, |
| "loss": 0.9962, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.8121166060474458, |
| "grad_norm": 1.2578662633895874, |
| "learning_rate": 1.1947173106308667e-05, |
| "loss": 1.0057, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8142880408229738, |
| "grad_norm": 1.3208587169647217, |
| "learning_rate": 1.1925343811394893e-05, |
| "loss": 1.0086, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8164594755985017, |
| "grad_norm": 1.2795675992965698, |
| "learning_rate": 1.1903514516481118e-05, |
| "loss": 1.051, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.8186309103740297, |
| "grad_norm": 1.1958470344543457, |
| "learning_rate": 1.1881685221567345e-05, |
| "loss": 0.9974, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8208023451495575, |
| "grad_norm": 1.1479548215866089, |
| "learning_rate": 1.185985592665357e-05, |
| "loss": 0.9678, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8229737799250855, |
| "grad_norm": 1.616144061088562, |
| "learning_rate": 1.1838026631739797e-05, |
| "loss": 1.0586, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.8251452147006134, |
| "grad_norm": 1.3224166631698608, |
| "learning_rate": 1.1816197336826022e-05, |
| "loss": 1.0533, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8273166494761414, |
| "grad_norm": 1.6158727407455444, |
| "learning_rate": 1.1794368041912248e-05, |
| "loss": 1.0128, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8294880842516693, |
| "grad_norm": 1.3982148170471191, |
| "learning_rate": 1.1772538746998472e-05, |
| "loss": 1.0249, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8316595190271973, |
| "grad_norm": 1.2071058750152588, |
| "learning_rate": 1.1750709452084698e-05, |
| "loss": 0.965, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8338309538027252, |
| "grad_norm": 1.3230708837509155, |
| "learning_rate": 1.1728880157170925e-05, |
| "loss": 0.9441, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.836002388578253, |
| "grad_norm": 1.106053113937378, |
| "learning_rate": 1.170705086225715e-05, |
| "loss": 1.0488, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.838173823353781, |
| "grad_norm": 1.5212702751159668, |
| "learning_rate": 1.1685221567343377e-05, |
| "loss": 1.0445, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8403452581293089, |
| "grad_norm": 1.3804950714111328, |
| "learning_rate": 1.1663392272429601e-05, |
| "loss": 1.0183, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8425166929048369, |
| "grad_norm": 1.3932008743286133, |
| "learning_rate": 1.1641562977515828e-05, |
| "loss": 1.0027, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8446881276803648, |
| "grad_norm": 1.3928159475326538, |
| "learning_rate": 1.1619733682602053e-05, |
| "loss": 0.9646, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8468595624558928, |
| "grad_norm": 1.3050851821899414, |
| "learning_rate": 1.159790438768828e-05, |
| "loss": 1.0099, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8490309972314206, |
| "grad_norm": 1.2780051231384277, |
| "learning_rate": 1.1576075092774503e-05, |
| "loss": 0.9859, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8512024320069486, |
| "grad_norm": 1.317460060119629, |
| "learning_rate": 1.155424579786073e-05, |
| "loss": 1.0021, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8533738667824765, |
| "grad_norm": 1.4765187501907349, |
| "learning_rate": 1.1532416502946955e-05, |
| "loss": 1.0305, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.8555453015580045, |
| "grad_norm": 1.1514675617218018, |
| "learning_rate": 1.1510587208033181e-05, |
| "loss": 0.946, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8577167363335324, |
| "grad_norm": 1.3265900611877441, |
| "learning_rate": 1.1488757913119408e-05, |
| "loss": 0.9427, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8598881711090604, |
| "grad_norm": 1.4531445503234863, |
| "learning_rate": 1.1466928618205633e-05, |
| "loss": 1.0272, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.8620596058845882, |
| "grad_norm": 1.0620979070663452, |
| "learning_rate": 1.144509932329186e-05, |
| "loss": 1.0114, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8642310406601161, |
| "grad_norm": 1.086349606513977, |
| "learning_rate": 1.1423270028378085e-05, |
| "loss": 0.9946, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8664024754356441, |
| "grad_norm": 1.3090065717697144, |
| "learning_rate": 1.1401440733464311e-05, |
| "loss": 0.9915, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.868573910211172, |
| "grad_norm": 1.1086080074310303, |
| "learning_rate": 1.1379611438550535e-05, |
| "loss": 0.9599, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8707453449867, |
| "grad_norm": 1.4512288570404053, |
| "learning_rate": 1.1357782143636761e-05, |
| "loss": 1.0143, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.8729167797622279, |
| "grad_norm": 1.2470262050628662, |
| "learning_rate": 1.1335952848722986e-05, |
| "loss": 0.9715, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.8750882145377559, |
| "grad_norm": 1.5051038265228271, |
| "learning_rate": 1.1314123553809213e-05, |
| "loss": 1.0206, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.8772596493132837, |
| "grad_norm": 1.607826828956604, |
| "learning_rate": 1.1292294258895438e-05, |
| "loss": 0.9833, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.8794310840888117, |
| "grad_norm": 1.431874394416809, |
| "learning_rate": 1.1270464963981665e-05, |
| "loss": 1.0264, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.8816025188643396, |
| "grad_norm": 1.440034031867981, |
| "learning_rate": 1.1248635669067891e-05, |
| "loss": 1.0013, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.8837739536398675, |
| "grad_norm": 1.4963476657867432, |
| "learning_rate": 1.1226806374154116e-05, |
| "loss": 0.9861, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.8859453884153955, |
| "grad_norm": 1.5683997869491577, |
| "learning_rate": 1.1204977079240343e-05, |
| "loss": 1.0247, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.8881168231909234, |
| "grad_norm": 1.4047991037368774, |
| "learning_rate": 1.1183147784326566e-05, |
| "loss": 0.9966, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.8902882579664513, |
| "grad_norm": 1.3178616762161255, |
| "learning_rate": 1.1161318489412793e-05, |
| "loss": 1.0107, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.8924596927419792, |
| "grad_norm": 1.5227705240249634, |
| "learning_rate": 1.1139489194499018e-05, |
| "loss": 0.9826, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.8946311275175072, |
| "grad_norm": 1.4800081253051758, |
| "learning_rate": 1.1117659899585244e-05, |
| "loss": 1.0544, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.8968025622930351, |
| "grad_norm": 1.3340637683868408, |
| "learning_rate": 1.109583060467147e-05, |
| "loss": 1.0342, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.8989739970685631, |
| "grad_norm": 1.6699985265731812, |
| "learning_rate": 1.1074001309757696e-05, |
| "loss": 0.9726, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.901145431844091, |
| "grad_norm": 1.466199517250061, |
| "learning_rate": 1.1052172014843921e-05, |
| "loss": 0.9623, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.903316866619619, |
| "grad_norm": 1.6779991388320923, |
| "learning_rate": 1.1030342719930148e-05, |
| "loss": 1.033, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.9054883013951468, |
| "grad_norm": 1.338218331336975, |
| "learning_rate": 1.1008513425016374e-05, |
| "loss": 0.983, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.9076597361706747, |
| "grad_norm": 1.430690884590149, |
| "learning_rate": 1.0986684130102598e-05, |
| "loss": 0.9673, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.9098311709462027, |
| "grad_norm": 1.381343126296997, |
| "learning_rate": 1.0964854835188824e-05, |
| "loss": 0.9552, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.9120026057217306, |
| "grad_norm": 1.2798620462417603, |
| "learning_rate": 1.094302554027505e-05, |
| "loss": 1.0115, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9141740404972586, |
| "grad_norm": 1.5903421640396118, |
| "learning_rate": 1.0921196245361276e-05, |
| "loss": 0.9903, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9163454752727865, |
| "grad_norm": 1.1908365488052368, |
| "learning_rate": 1.0899366950447501e-05, |
| "loss": 1.0046, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9185169100483144, |
| "grad_norm": 1.1967812776565552, |
| "learning_rate": 1.0877537655533728e-05, |
| "loss": 0.9842, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9206883448238423, |
| "grad_norm": 1.2975422143936157, |
| "learning_rate": 1.0855708360619953e-05, |
| "loss": 1.1223, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9228597795993703, |
| "grad_norm": 1.213766098022461, |
| "learning_rate": 1.083387906570618e-05, |
| "loss": 1.0106, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9250312143748982, |
| "grad_norm": 1.301695704460144, |
| "learning_rate": 1.0812049770792403e-05, |
| "loss": 0.9959, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9272026491504262, |
| "grad_norm": 1.3527394533157349, |
| "learning_rate": 1.079022047587863e-05, |
| "loss": 1.0124, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.9293740839259541, |
| "grad_norm": 1.3432750701904297, |
| "learning_rate": 1.0768391180964856e-05, |
| "loss": 1.0047, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9315455187014821, |
| "grad_norm": 1.329483151435852, |
| "learning_rate": 1.0746561886051081e-05, |
| "loss": 1.0124, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9337169534770099, |
| "grad_norm": 1.430738091468811, |
| "learning_rate": 1.0724732591137308e-05, |
| "loss": 0.9462, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9358883882525378, |
| "grad_norm": 1.491452693939209, |
| "learning_rate": 1.0702903296223533e-05, |
| "loss": 0.9885, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.9380598230280658, |
| "grad_norm": 1.4353605508804321, |
| "learning_rate": 1.068107400130976e-05, |
| "loss": 1.0131, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9402312578035937, |
| "grad_norm": 1.1809788942337036, |
| "learning_rate": 1.0659244706395984e-05, |
| "loss": 0.9926, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9424026925791217, |
| "grad_norm": 1.2355526685714722, |
| "learning_rate": 1.0637415411482211e-05, |
| "loss": 0.9945, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9445741273546496, |
| "grad_norm": 1.3314152956008911, |
| "learning_rate": 1.0615586116568434e-05, |
| "loss": 1.0037, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9467455621301775, |
| "grad_norm": 1.2427114248275757, |
| "learning_rate": 1.059375682165466e-05, |
| "loss": 1.0048, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9489169969057054, |
| "grad_norm": 1.298858642578125, |
| "learning_rate": 1.0571927526740886e-05, |
| "loss": 1.068, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.9510884316812334, |
| "grad_norm": 1.432786226272583, |
| "learning_rate": 1.0550098231827112e-05, |
| "loss": 0.9922, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.9532598664567613, |
| "grad_norm": 1.3567193746566772, |
| "learning_rate": 1.0528268936913339e-05, |
| "loss": 1.0097, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9554313012322893, |
| "grad_norm": 1.4737164974212646, |
| "learning_rate": 1.0506439641999564e-05, |
| "loss": 1.0053, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9576027360078172, |
| "grad_norm": 1.1993675231933594, |
| "learning_rate": 1.048461034708579e-05, |
| "loss": 1.0553, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.9597741707833451, |
| "grad_norm": 1.483333945274353, |
| "learning_rate": 1.0462781052172016e-05, |
| "loss": 1.0163, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.961945605558873, |
| "grad_norm": 1.4248449802398682, |
| "learning_rate": 1.0440951757258242e-05, |
| "loss": 1.0013, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.9641170403344009, |
| "grad_norm": 1.4888718128204346, |
| "learning_rate": 1.0419122462344466e-05, |
| "loss": 1.0428, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.9662884751099289, |
| "grad_norm": 1.2882726192474365, |
| "learning_rate": 1.0397293167430692e-05, |
| "loss": 0.9764, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.9684599098854568, |
| "grad_norm": 1.3666644096374512, |
| "learning_rate": 1.0375463872516917e-05, |
| "loss": 1.0235, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.9706313446609848, |
| "grad_norm": 1.5665643215179443, |
| "learning_rate": 1.0353634577603144e-05, |
| "loss": 0.9966, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.9728027794365127, |
| "grad_norm": 1.4171271324157715, |
| "learning_rate": 1.0331805282689369e-05, |
| "loss": 1.021, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.9749742142120406, |
| "grad_norm": 1.4926506280899048, |
| "learning_rate": 1.0309975987775596e-05, |
| "loss": 0.9794, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9771456489875685, |
| "grad_norm": 1.1166307926177979, |
| "learning_rate": 1.0288146692861822e-05, |
| "loss": 1.0138, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.9793170837630965, |
| "grad_norm": 1.515855312347412, |
| "learning_rate": 1.0266317397948047e-05, |
| "loss": 0.9691, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.9814885185386244, |
| "grad_norm": 1.421080231666565, |
| "learning_rate": 1.0244488103034274e-05, |
| "loss": 0.9646, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.9836599533141523, |
| "grad_norm": 1.4241400957107544, |
| "learning_rate": 1.0222658808120497e-05, |
| "loss": 0.9447, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.9858313880896803, |
| "grad_norm": 1.6205312013626099, |
| "learning_rate": 1.0200829513206724e-05, |
| "loss": 0.9843, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.9880028228652082, |
| "grad_norm": 1.3039618730545044, |
| "learning_rate": 1.0179000218292949e-05, |
| "loss": 1.0065, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.9901742576407361, |
| "grad_norm": 1.4685053825378418, |
| "learning_rate": 1.0157170923379176e-05, |
| "loss": 0.9925, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.992345692416264, |
| "grad_norm": 1.2964003086090088, |
| "learning_rate": 1.01353416284654e-05, |
| "loss": 1.0104, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.994517127191792, |
| "grad_norm": 1.4937127828598022, |
| "learning_rate": 1.0113512333551627e-05, |
| "loss": 1.0642, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.9966885619673199, |
| "grad_norm": 1.2731589078903198, |
| "learning_rate": 1.0091683038637854e-05, |
| "loss": 1.0486, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.9988599967428479, |
| "grad_norm": 1.3573518991470337, |
| "learning_rate": 1.0069853743724079e-05, |
| "loss": 0.9839, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.0008685739102112, |
| "grad_norm": 1.6150940656661987, |
| "learning_rate": 1.0048024448810306e-05, |
| "loss": 1.0595, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.0030400086857392, |
| "grad_norm": 1.435672640800476, |
| "learning_rate": 1.0026195153896529e-05, |
| "loss": 1.0018, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.0052114434612671, |
| "grad_norm": 1.3522926568984985, |
| "learning_rate": 1.0004365858982757e-05, |
| "loss": 0.9845, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.007382878236795, |
| "grad_norm": 1.327671766281128, |
| "learning_rate": 9.98253656406898e-06, |
| "loss": 0.947, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.0095543130123228, |
| "grad_norm": 1.40632164478302, |
| "learning_rate": 9.960707269155207e-06, |
| "loss": 0.955, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.0117257477878507, |
| "grad_norm": 1.7449159622192383, |
| "learning_rate": 9.938877974241434e-06, |
| "loss": 0.9406, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.0138971825633787, |
| "grad_norm": 1.410897135734558, |
| "learning_rate": 9.917048679327659e-06, |
| "loss": 1.0253, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.0160686173389066, |
| "grad_norm": 1.3368771076202393, |
| "learning_rate": 9.895219384413884e-06, |
| "loss": 0.9922, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.0182400521144346, |
| "grad_norm": 1.2922542095184326, |
| "learning_rate": 9.87339008950011e-06, |
| "loss": 1.0153, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.0204114868899625, |
| "grad_norm": 1.4930267333984375, |
| "learning_rate": 9.851560794586335e-06, |
| "loss": 0.9725, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.0225829216654905, |
| "grad_norm": 1.2955012321472168, |
| "learning_rate": 9.829731499672562e-06, |
| "loss": 0.9858, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.0247543564410184, |
| "grad_norm": 1.5806477069854736, |
| "learning_rate": 9.807902204758787e-06, |
| "loss": 0.9046, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.0269257912165464, |
| "grad_norm": 1.3869348764419556, |
| "learning_rate": 9.786072909845012e-06, |
| "loss": 1.0147, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.0290972259920743, |
| "grad_norm": 1.4592316150665283, |
| "learning_rate": 9.764243614931239e-06, |
| "loss": 0.9497, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.0312686607676023, |
| "grad_norm": 1.9150491952896118, |
| "learning_rate": 9.742414320017464e-06, |
| "loss": 0.9584, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.0334400955431302, |
| "grad_norm": 1.2069435119628906, |
| "learning_rate": 9.72058502510369e-06, |
| "loss": 0.9853, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.0356115303186582, |
| "grad_norm": 1.521933674812317, |
| "learning_rate": 9.698755730189915e-06, |
| "loss": 0.9611, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.0377829650941859, |
| "grad_norm": 1.9448108673095703, |
| "learning_rate": 9.676926435276142e-06, |
| "loss": 1.0092, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.0399543998697138, |
| "grad_norm": 1.577696442604065, |
| "learning_rate": 9.655097140362367e-06, |
| "loss": 1.0072, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.0421258346452418, |
| "grad_norm": 1.9846240282058716, |
| "learning_rate": 9.633267845448594e-06, |
| "loss": 0.9533, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.0442972694207697, |
| "grad_norm": 1.4275234937667847, |
| "learning_rate": 9.611438550534819e-06, |
| "loss": 0.9662, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.0464687041962977, |
| "grad_norm": 1.548954963684082, |
| "learning_rate": 9.589609255621044e-06, |
| "loss": 0.9454, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.0486401389718256, |
| "grad_norm": 1.8117595911026, |
| "learning_rate": 9.56777996070727e-06, |
| "loss": 0.9623, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.0508115737473536, |
| "grad_norm": 1.417375087738037, |
| "learning_rate": 9.545950665793495e-06, |
| "loss": 0.9791, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.0529830085228815, |
| "grad_norm": 1.2770414352416992, |
| "learning_rate": 9.52412137087972e-06, |
| "loss": 0.8828, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.0551544432984095, |
| "grad_norm": 1.3013825416564941, |
| "learning_rate": 9.502292075965947e-06, |
| "loss": 1.029, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.0573258780739374, |
| "grad_norm": 1.5322422981262207, |
| "learning_rate": 9.480462781052174e-06, |
| "loss": 0.9592, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.0594973128494654, |
| "grad_norm": 1.7801984548568726, |
| "learning_rate": 9.458633486138398e-06, |
| "loss": 0.9531, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.0616687476249933, |
| "grad_norm": 2.0160224437713623, |
| "learning_rate": 9.436804191224625e-06, |
| "loss": 0.9471, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.0638401824005213, |
| "grad_norm": 1.4919092655181885, |
| "learning_rate": 9.41497489631085e-06, |
| "loss": 0.9719, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.066011617176049, |
| "grad_norm": 1.379225730895996, |
| "learning_rate": 9.393145601397075e-06, |
| "loss": 0.9406, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.068183051951577, |
| "grad_norm": 1.4247862100601196, |
| "learning_rate": 9.371316306483302e-06, |
| "loss": 0.975, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.0703544867271049, |
| "grad_norm": 1.290443778038025, |
| "learning_rate": 9.349487011569527e-06, |
| "loss": 0.9636, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.0725259215026328, |
| "grad_norm": 1.2737443447113037, |
| "learning_rate": 9.327657716655752e-06, |
| "loss": 0.9779, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.0746973562781608, |
| "grad_norm": 1.1298906803131104, |
| "learning_rate": 9.305828421741978e-06, |
| "loss": 0.9705, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.0768687910536887, |
| "grad_norm": 1.368236780166626, |
| "learning_rate": 9.283999126828203e-06, |
| "loss": 0.9791, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.0790402258292167, |
| "grad_norm": 1.3343724012374878, |
| "learning_rate": 9.26216983191443e-06, |
| "loss": 1.0074, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.0812116606047446, |
| "grad_norm": 1.547235369682312, |
| "learning_rate": 9.240340537000657e-06, |
| "loss": 0.9545, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.0833830953802726, |
| "grad_norm": 1.8547582626342773, |
| "learning_rate": 9.218511242086882e-06, |
| "loss": 0.9981, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0855545301558005, |
| "grad_norm": 1.3031221628189087, |
| "learning_rate": 9.196681947173107e-06, |
| "loss": 0.9537, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0877259649313284, |
| "grad_norm": 1.4998575448989868, |
| "learning_rate": 9.174852652259333e-06, |
| "loss": 0.9865, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.0898973997068564, |
| "grad_norm": 1.4449200630187988, |
| "learning_rate": 9.153023357345558e-06, |
| "loss": 0.9968, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.0920688344823843, |
| "grad_norm": 1.510918140411377, |
| "learning_rate": 9.131194062431783e-06, |
| "loss": 0.9809, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.094240269257912, |
| "grad_norm": 1.3494285345077515, |
| "learning_rate": 9.10936476751801e-06, |
| "loss": 0.9516, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.09641170403344, |
| "grad_norm": 1.1680203676223755, |
| "learning_rate": 9.087535472604235e-06, |
| "loss": 0.9255, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.098583138808968, |
| "grad_norm": 1.4286588430404663, |
| "learning_rate": 9.065706177690462e-06, |
| "loss": 0.9937, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.100754573584496, |
| "grad_norm": 1.3385002613067627, |
| "learning_rate": 9.043876882776687e-06, |
| "loss": 1.0129, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.1029260083600239, |
| "grad_norm": 1.4162006378173828, |
| "learning_rate": 9.022047587862913e-06, |
| "loss": 1.0075, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.1050974431355518, |
| "grad_norm": 1.34634530544281, |
| "learning_rate": 9.000218292949138e-06, |
| "loss": 0.9958, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.1072688779110798, |
| "grad_norm": 1.282800316810608, |
| "learning_rate": 8.978388998035365e-06, |
| "loss": 0.9748, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.1094403126866077, |
| "grad_norm": 1.486527681350708, |
| "learning_rate": 8.95655970312159e-06, |
| "loss": 0.9168, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.1116117474621356, |
| "grad_norm": 1.440211296081543, |
| "learning_rate": 8.934730408207815e-06, |
| "loss": 0.9616, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.1137831822376636, |
| "grad_norm": 1.3290151357650757, |
| "learning_rate": 8.912901113294042e-06, |
| "loss": 1.0013, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.1159546170131915, |
| "grad_norm": 1.5006564855575562, |
| "learning_rate": 8.891071818380266e-06, |
| "loss": 0.9543, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.1181260517887195, |
| "grad_norm": 1.648809552192688, |
| "learning_rate": 8.869242523466493e-06, |
| "loss": 0.9857, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.1202974865642474, |
| "grad_norm": 1.7172517776489258, |
| "learning_rate": 8.847413228552718e-06, |
| "loss": 0.9609, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.1224689213397752, |
| "grad_norm": 1.3127663135528564, |
| "learning_rate": 8.825583933638943e-06, |
| "loss": 0.9664, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.124640356115303, |
| "grad_norm": 1.3861876726150513, |
| "learning_rate": 8.80375463872517e-06, |
| "loss": 0.9773, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.126811790890831, |
| "grad_norm": 1.5689133405685425, |
| "learning_rate": 8.781925343811396e-06, |
| "loss": 0.9393, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.128983225666359, |
| "grad_norm": 1.965571403503418, |
| "learning_rate": 8.760096048897621e-06, |
| "loss": 0.944, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.131154660441887, |
| "grad_norm": 1.4053082466125488, |
| "learning_rate": 8.738266753983848e-06, |
| "loss": 0.9558, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.133326095217415, |
| "grad_norm": 1.3182475566864014, |
| "learning_rate": 8.716437459070073e-06, |
| "loss": 0.9919, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.1354975299929428, |
| "grad_norm": 1.2196260690689087, |
| "learning_rate": 8.694608164156298e-06, |
| "loss": 0.9866, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.1376689647684708, |
| "grad_norm": 1.732273817062378, |
| "learning_rate": 8.672778869242525e-06, |
| "loss": 0.9488, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.1398403995439987, |
| "grad_norm": 1.4350320100784302, |
| "learning_rate": 8.65094957432875e-06, |
| "loss": 0.9596, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.1420118343195267, |
| "grad_norm": 1.7085026502609253, |
| "learning_rate": 8.629120279414975e-06, |
| "loss": 0.9465, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.1441832690950546, |
| "grad_norm": 1.3016654253005981, |
| "learning_rate": 8.607290984501201e-06, |
| "loss": 0.9365, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.1463547038705826, |
| "grad_norm": 1.6259874105453491, |
| "learning_rate": 8.585461689587426e-06, |
| "loss": 0.9706, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.1485261386461105, |
| "grad_norm": 1.6753475666046143, |
| "learning_rate": 8.563632394673653e-06, |
| "loss": 1.0492, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.1506975734216383, |
| "grad_norm": 1.366428256034851, |
| "learning_rate": 8.54180309975988e-06, |
| "loss": 1.0705, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.1528690081971662, |
| "grad_norm": 1.7461377382278442, |
| "learning_rate": 8.519973804846105e-06, |
| "loss": 0.9744, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.1550404429726941, |
| "grad_norm": 1.429442048072815, |
| "learning_rate": 8.49814450993233e-06, |
| "loss": 0.9746, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.157211877748222, |
| "grad_norm": 1.3650249242782593, |
| "learning_rate": 8.476315215018556e-06, |
| "loss": 0.9756, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.15938331252375, |
| "grad_norm": 1.6081992387771606, |
| "learning_rate": 8.454485920104781e-06, |
| "loss": 0.9154, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.161554747299278, |
| "grad_norm": 1.4884233474731445, |
| "learning_rate": 8.432656625191006e-06, |
| "loss": 0.9344, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.163726182074806, |
| "grad_norm": 1.4585130214691162, |
| "learning_rate": 8.410827330277233e-06, |
| "loss": 0.9016, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.1658976168503339, |
| "grad_norm": 1.4983640909194946, |
| "learning_rate": 8.388998035363458e-06, |
| "loss": 0.9738, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.1680690516258618, |
| "grad_norm": 1.2359704971313477, |
| "learning_rate": 8.367168740449685e-06, |
| "loss": 0.9453, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.1702404864013898, |
| "grad_norm": 1.409009337425232, |
| "learning_rate": 8.34533944553591e-06, |
| "loss": 0.9638, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.1724119211769177, |
| "grad_norm": 1.4821068048477173, |
| "learning_rate": 8.323510150622136e-06, |
| "loss": 1.023, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.1745833559524457, |
| "grad_norm": 1.5033917427062988, |
| "learning_rate": 8.301680855708361e-06, |
| "loss": 0.9806, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.1767547907279736, |
| "grad_norm": 1.2823528051376343, |
| "learning_rate": 8.279851560794588e-06, |
| "loss": 0.9765, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.1789262255035013, |
| "grad_norm": 1.3664206266403198, |
| "learning_rate": 8.258022265880813e-06, |
| "loss": 0.9866, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.1810976602790293, |
| "grad_norm": 1.7016454935073853, |
| "learning_rate": 8.236192970967038e-06, |
| "loss": 0.9749, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.1832690950545572, |
| "grad_norm": 1.618396282196045, |
| "learning_rate": 8.214363676053264e-06, |
| "loss": 0.9674, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.1854405298300852, |
| "grad_norm": 1.5501645803451538, |
| "learning_rate": 8.19253438113949e-06, |
| "loss": 0.9688, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.1876119646056131, |
| "grad_norm": 1.6597671508789062, |
| "learning_rate": 8.170705086225716e-06, |
| "loss": 1.0288, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.189783399381141, |
| "grad_norm": 1.2608513832092285, |
| "learning_rate": 8.148875791311941e-06, |
| "loss": 0.9671, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.191954834156669, |
| "grad_norm": 1.4896721839904785, |
| "learning_rate": 8.127046496398166e-06, |
| "loss": 0.9767, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.194126268932197, |
| "grad_norm": 1.6779398918151855, |
| "learning_rate": 8.105217201484393e-06, |
| "loss": 0.9558, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.196297703707725, |
| "grad_norm": 1.3445826768875122, |
| "learning_rate": 8.08338790657062e-06, |
| "loss": 0.8943, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.1984691384832529, |
| "grad_norm": 1.7102388143539429, |
| "learning_rate": 8.061558611656844e-06, |
| "loss": 0.9557, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.2006405732587808, |
| "grad_norm": 1.313062310218811, |
| "learning_rate": 8.03972931674307e-06, |
| "loss": 0.9922, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.2028120080343088, |
| "grad_norm": 1.4992449283599854, |
| "learning_rate": 8.017900021829296e-06, |
| "loss": 1.0354, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.2049834428098367, |
| "grad_norm": 1.6408883333206177, |
| "learning_rate": 7.996070726915521e-06, |
| "loss": 1.0373, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.2071548775853644, |
| "grad_norm": 1.3670636415481567, |
| "learning_rate": 7.974241432001748e-06, |
| "loss": 0.9704, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.2093263123608924, |
| "grad_norm": 1.5695348978042603, |
| "learning_rate": 7.952412137087973e-06, |
| "loss": 0.9529, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.2114977471364203, |
| "grad_norm": 1.2964723110198975, |
| "learning_rate": 7.930582842174198e-06, |
| "loss": 0.9229, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.2136691819119483, |
| "grad_norm": 1.584224820137024, |
| "learning_rate": 7.908753547260424e-06, |
| "loss": 1.0124, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.2158406166874762, |
| "grad_norm": 1.318955898284912, |
| "learning_rate": 7.88692425234665e-06, |
| "loss": 0.9382, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.2180120514630042, |
| "grad_norm": 1.5667856931686401, |
| "learning_rate": 7.865094957432876e-06, |
| "loss": 0.9997, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 1.5935567617416382, |
| "learning_rate": 7.843265662519103e-06, |
| "loss": 0.9562, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.22235492101406, |
| "grad_norm": 1.4892035722732544, |
| "learning_rate": 7.821436367605328e-06, |
| "loss": 0.9373, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.224526355789588, |
| "grad_norm": 1.356696367263794, |
| "learning_rate": 7.799607072691552e-06, |
| "loss": 0.9768, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.226697790565116, |
| "grad_norm": 1.4946991205215454, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.9854, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.228869225340644, |
| "grad_norm": 1.5212732553482056, |
| "learning_rate": 7.755948482864004e-06, |
| "loss": 0.9444, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.2310406601161716, |
| "grad_norm": 1.7982673645019531, |
| "learning_rate": 7.734119187950229e-06, |
| "loss": 0.9486, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.2332120948916998, |
| "grad_norm": 1.639281988143921, |
| "learning_rate": 7.712289893036456e-06, |
| "loss": 1.0026, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.2353835296672275, |
| "grad_norm": 1.2702381610870361, |
| "learning_rate": 7.69046059812268e-06, |
| "loss": 0.9557, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.2375549644427555, |
| "grad_norm": 1.752441167831421, |
| "learning_rate": 7.668631303208907e-06, |
| "loss": 0.9728, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.2397263992182834, |
| "grad_norm": 1.62156343460083, |
| "learning_rate": 7.646802008295132e-06, |
| "loss": 0.9605, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.2418978339938114, |
| "grad_norm": 1.4404542446136475, |
| "learning_rate": 7.624972713381359e-06, |
| "loss": 0.9865, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.2440692687693393, |
| "grad_norm": 1.5573837757110596, |
| "learning_rate": 7.603143418467585e-06, |
| "loss": 0.9545, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.2462407035448673, |
| "grad_norm": 1.2491865158081055, |
| "learning_rate": 7.58131412355381e-06, |
| "loss": 1.0013, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.2484121383203952, |
| "grad_norm": 1.7407690286636353, |
| "learning_rate": 7.559484828640036e-06, |
| "loss": 0.9483, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.2505835730959232, |
| "grad_norm": 1.5740070343017578, |
| "learning_rate": 7.5376555337262615e-06, |
| "loss": 0.9544, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.252755007871451, |
| "grad_norm": 1.5384747982025146, |
| "learning_rate": 7.515826238812487e-06, |
| "loss": 0.9672, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.254926442646979, |
| "grad_norm": 1.4094239473342896, |
| "learning_rate": 7.493996943898712e-06, |
| "loss": 0.9728, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.257097877422507, |
| "grad_norm": 1.4427862167358398, |
| "learning_rate": 7.472167648984938e-06, |
| "loss": 0.9432, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.2592693121980347, |
| "grad_norm": 1.3636986017227173, |
| "learning_rate": 7.450338354071164e-06, |
| "loss": 1.016, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.261440746973563, |
| "grad_norm": 1.4819283485412598, |
| "learning_rate": 7.42850905915739e-06, |
| "loss": 0.9781, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.2636121817490906, |
| "grad_norm": 1.6170705556869507, |
| "learning_rate": 7.406679764243615e-06, |
| "loss": 0.9835, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.2657836165246186, |
| "grad_norm": 1.6234104633331299, |
| "learning_rate": 7.384850469329841e-06, |
| "loss": 0.9556, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.2679550513001465, |
| "grad_norm": 2.1033241748809814, |
| "learning_rate": 7.363021174416067e-06, |
| "loss": 0.9656, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.2701264860756745, |
| "grad_norm": 1.4805753231048584, |
| "learning_rate": 7.341191879502293e-06, |
| "loss": 0.9171, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.2722979208512024, |
| "grad_norm": 1.6896517276763916, |
| "learning_rate": 7.319362584588519e-06, |
| "loss": 0.9795, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.2744693556267304, |
| "grad_norm": 1.509596347808838, |
| "learning_rate": 7.297533289674744e-06, |
| "loss": 1.015, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.2766407904022583, |
| "grad_norm": 1.7531147003173828, |
| "learning_rate": 7.27570399476097e-06, |
| "loss": 0.9729, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.2788122251777863, |
| "grad_norm": 1.7400082349777222, |
| "learning_rate": 7.2538746998471955e-06, |
| "loss": 0.9386, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.2809836599533142, |
| "grad_norm": 1.4365743398666382, |
| "learning_rate": 7.232045404933421e-06, |
| "loss": 1.0021, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.2831550947288421, |
| "grad_norm": 1.7008932828903198, |
| "learning_rate": 7.210216110019646e-06, |
| "loss": 0.9894, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.28532652950437, |
| "grad_norm": 1.5813168287277222, |
| "learning_rate": 7.188386815105872e-06, |
| "loss": 1.0056, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.2874979642798978, |
| "grad_norm": 1.5991625785827637, |
| "learning_rate": 7.166557520192098e-06, |
| "loss": 0.9539, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.289669399055426, |
| "grad_norm": 1.7708544731140137, |
| "learning_rate": 7.144728225278325e-06, |
| "loss": 0.9237, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.2918408338309537, |
| "grad_norm": 1.7181541919708252, |
| "learning_rate": 7.1228989303645504e-06, |
| "loss": 0.9012, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.2940122686064817, |
| "grad_norm": 1.8336186408996582, |
| "learning_rate": 7.101069635450775e-06, |
| "loss": 1.0283, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.2961837033820096, |
| "grad_norm": 1.503780722618103, |
| "learning_rate": 7.079240340537001e-06, |
| "loss": 0.9811, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.2983551381575376, |
| "grad_norm": 1.4553003311157227, |
| "learning_rate": 7.057411045623227e-06, |
| "loss": 0.9755, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.3005265729330655, |
| "grad_norm": 1.4497318267822266, |
| "learning_rate": 7.035581750709453e-06, |
| "loss": 1.0373, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.3026980077085935, |
| "grad_norm": 1.4905823469161987, |
| "learning_rate": 7.013752455795678e-06, |
| "loss": 0.9659, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.3048694424841214, |
| "grad_norm": 1.5055607557296753, |
| "learning_rate": 6.991923160881904e-06, |
| "loss": 0.961, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.3070408772596493, |
| "grad_norm": 1.3909130096435547, |
| "learning_rate": 6.9700938659681295e-06, |
| "loss": 0.9277, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.3092123120351773, |
| "grad_norm": 1.7493101358413696, |
| "learning_rate": 6.948264571054355e-06, |
| "loss": 0.9792, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.3113837468107052, |
| "grad_norm": 1.5306856632232666, |
| "learning_rate": 6.926435276140582e-06, |
| "loss": 1.0049, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.3135551815862332, |
| "grad_norm": 1.4845426082611084, |
| "learning_rate": 6.904605981226807e-06, |
| "loss": 0.9964, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.315726616361761, |
| "grad_norm": 1.822367787361145, |
| "learning_rate": 6.882776686313033e-06, |
| "loss": 0.9871, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.317898051137289, |
| "grad_norm": 1.3956915140151978, |
| "learning_rate": 6.860947391399259e-06, |
| "loss": 0.9814, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.3200694859128168, |
| "grad_norm": 1.5434895753860474, |
| "learning_rate": 6.8391180964854844e-06, |
| "loss": 0.9556, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.3222409206883448, |
| "grad_norm": 1.4856796264648438, |
| "learning_rate": 6.817288801571709e-06, |
| "loss": 0.9417, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.3244123554638727, |
| "grad_norm": 1.610314965248108, |
| "learning_rate": 6.795459506657935e-06, |
| "loss": 0.9577, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.3265837902394007, |
| "grad_norm": 1.4283864498138428, |
| "learning_rate": 6.773630211744161e-06, |
| "loss": 0.9527, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.3287552250149286, |
| "grad_norm": 1.904032588005066, |
| "learning_rate": 6.751800916830387e-06, |
| "loss": 0.9741, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.3309266597904565, |
| "grad_norm": 1.8102325201034546, |
| "learning_rate": 6.729971621916612e-06, |
| "loss": 0.9539, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.3330980945659845, |
| "grad_norm": 1.4815595149993896, |
| "learning_rate": 6.708142327002838e-06, |
| "loss": 0.9535, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.3352695293415124, |
| "grad_norm": 1.5197336673736572, |
| "learning_rate": 6.686313032089064e-06, |
| "loss": 0.9952, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.3374409641170404, |
| "grad_norm": 1.321304202079773, |
| "learning_rate": 6.66448373717529e-06, |
| "loss": 0.923, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.3396123988925683, |
| "grad_norm": 1.5680395364761353, |
| "learning_rate": 6.642654442261516e-06, |
| "loss": 0.9991, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.3417838336680963, |
| "grad_norm": 1.1677354574203491, |
| "learning_rate": 6.620825147347742e-06, |
| "loss": 1.0366, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.343955268443624, |
| "grad_norm": 1.2969460487365723, |
| "learning_rate": 6.598995852433967e-06, |
| "loss": 0.9402, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.3461267032191522, |
| "grad_norm": 1.4528319835662842, |
| "learning_rate": 6.577166557520193e-06, |
| "loss": 0.9493, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.34829813799468, |
| "grad_norm": 1.8320257663726807, |
| "learning_rate": 6.5553372626064184e-06, |
| "loss": 0.9956, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.3504695727702078, |
| "grad_norm": 1.7417082786560059, |
| "learning_rate": 6.533507967692644e-06, |
| "loss": 0.993, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.3526410075457358, |
| "grad_norm": 1.6727395057678223, |
| "learning_rate": 6.511678672778869e-06, |
| "loss": 0.951, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.3548124423212637, |
| "grad_norm": 1.3247034549713135, |
| "learning_rate": 6.489849377865095e-06, |
| "loss": 0.9232, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.3569838770967917, |
| "grad_norm": 1.8623456954956055, |
| "learning_rate": 6.468020082951321e-06, |
| "loss": 0.966, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.3591553118723196, |
| "grad_norm": 1.731749176979065, |
| "learning_rate": 6.4461907880375475e-06, |
| "loss": 0.9016, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.3613267466478476, |
| "grad_norm": 1.585140585899353, |
| "learning_rate": 6.424361493123773e-06, |
| "loss": 0.972, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.3634981814233755, |
| "grad_norm": 1.373619556427002, |
| "learning_rate": 6.402532198209998e-06, |
| "loss": 0.9557, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.3656696161989035, |
| "grad_norm": 1.7933697700500488, |
| "learning_rate": 6.380702903296224e-06, |
| "loss": 0.9536, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.3678410509744314, |
| "grad_norm": 1.4509533643722534, |
| "learning_rate": 6.35887360838245e-06, |
| "loss": 0.9419, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.3700124857499594, |
| "grad_norm": 1.4209648370742798, |
| "learning_rate": 6.337044313468676e-06, |
| "loss": 1.0347, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.372183920525487, |
| "grad_norm": 1.4169176816940308, |
| "learning_rate": 6.315215018554901e-06, |
| "loss": 0.9228, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.3743553553010153, |
| "grad_norm": 1.69527268409729, |
| "learning_rate": 6.293385723641127e-06, |
| "loss": 1.0121, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.376526790076543, |
| "grad_norm": 1.8310434818267822, |
| "learning_rate": 6.271556428727352e-06, |
| "loss": 0.9577, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.378698224852071, |
| "grad_norm": 1.7507007122039795, |
| "learning_rate": 6.249727133813578e-06, |
| "loss": 0.9554, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.3808696596275989, |
| "grad_norm": 1.412238359451294, |
| "learning_rate": 6.227897838899805e-06, |
| "loss": 1.0007, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.3830410944031268, |
| "grad_norm": 1.2443082332611084, |
| "learning_rate": 6.20606854398603e-06, |
| "loss": 0.9527, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.3852125291786548, |
| "grad_norm": 1.5881346464157104, |
| "learning_rate": 6.184239249072256e-06, |
| "loss": 0.9555, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.3873839639541827, |
| "grad_norm": 1.6012235879898071, |
| "learning_rate": 6.1624099541584815e-06, |
| "loss": 0.9611, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.3895553987297107, |
| "grad_norm": 1.7886101007461548, |
| "learning_rate": 6.140580659244707e-06, |
| "loss": 0.9756, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.3917268335052386, |
| "grad_norm": 1.587950348854065, |
| "learning_rate": 6.118751364330932e-06, |
| "loss": 0.9576, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.3938982682807666, |
| "grad_norm": 1.5552079677581787, |
| "learning_rate": 6.096922069417158e-06, |
| "loss": 0.8917, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.3960697030562945, |
| "grad_norm": 1.6492691040039062, |
| "learning_rate": 6.075092774503384e-06, |
| "loss": 0.9034, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.3982411378318225, |
| "grad_norm": 1.698364496231079, |
| "learning_rate": 6.05326347958961e-06, |
| "loss": 0.9729, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.4004125726073502, |
| "grad_norm": 1.5736496448516846, |
| "learning_rate": 6.031434184675835e-06, |
| "loss": 0.9537, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.4025840073828784, |
| "grad_norm": 1.3969451189041138, |
| "learning_rate": 6.009604889762061e-06, |
| "loss": 0.9684, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.404755442158406, |
| "grad_norm": 1.5517948865890503, |
| "learning_rate": 5.987775594848287e-06, |
| "loss": 0.9696, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.406926876933934, |
| "grad_norm": 1.4348918199539185, |
| "learning_rate": 5.965946299934513e-06, |
| "loss": 0.9291, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.409098311709462, |
| "grad_norm": 1.358702301979065, |
| "learning_rate": 5.944117005020739e-06, |
| "loss": 0.9296, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.41126974648499, |
| "grad_norm": 1.4291132688522339, |
| "learning_rate": 5.922287710106964e-06, |
| "loss": 1.0199, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.4134411812605179, |
| "grad_norm": 1.5938302278518677, |
| "learning_rate": 5.90045841519319e-06, |
| "loss": 0.9917, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.4156126160360458, |
| "grad_norm": 1.4601033926010132, |
| "learning_rate": 5.8786291202794155e-06, |
| "loss": 0.9581, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.4177840508115738, |
| "grad_norm": 1.5433522462844849, |
| "learning_rate": 5.856799825365641e-06, |
| "loss": 0.9976, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.4199554855871017, |
| "grad_norm": 1.7822600603103638, |
| "learning_rate": 5.834970530451866e-06, |
| "loss": 0.9801, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.4221269203626297, |
| "grad_norm": 1.4417017698287964, |
| "learning_rate": 5.813141235538092e-06, |
| "loss": 0.9437, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.4242983551381576, |
| "grad_norm": 1.5107430219650269, |
| "learning_rate": 5.791311940624318e-06, |
| "loss": 0.9998, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.4264697899136856, |
| "grad_norm": 1.5540251731872559, |
| "learning_rate": 5.769482645710544e-06, |
| "loss": 0.9528, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.4286412246892133, |
| "grad_norm": 1.925389289855957, |
| "learning_rate": 5.7476533507967705e-06, |
| "loss": 0.9634, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.4308126594647415, |
| "grad_norm": 1.6938945055007935, |
| "learning_rate": 5.7258240558829954e-06, |
| "loss": 1.065, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.4329840942402692, |
| "grad_norm": 1.2665691375732422, |
| "learning_rate": 5.703994760969221e-06, |
| "loss": 0.9263, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.4351555290157971, |
| "grad_norm": 1.5301671028137207, |
| "learning_rate": 5.682165466055447e-06, |
| "loss": 0.9717, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.437326963791325, |
| "grad_norm": 2.1193597316741943, |
| "learning_rate": 5.660336171141673e-06, |
| "loss": 0.9717, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.439498398566853, |
| "grad_norm": 1.8903181552886963, |
| "learning_rate": 5.638506876227898e-06, |
| "loss": 0.9512, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.441669833342381, |
| "grad_norm": 2.0378239154815674, |
| "learning_rate": 5.616677581314124e-06, |
| "loss": 0.9899, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.443841268117909, |
| "grad_norm": 1.3769041299819946, |
| "learning_rate": 5.5948482864003495e-06, |
| "loss": 0.9777, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.4460127028934369, |
| "grad_norm": 1.34968900680542, |
| "learning_rate": 5.573018991486575e-06, |
| "loss": 0.916, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.4481841376689648, |
| "grad_norm": 1.6950223445892334, |
| "learning_rate": 5.5511896965728e-06, |
| "loss": 0.9987, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.4503555724444928, |
| "grad_norm": 1.458634614944458, |
| "learning_rate": 5.529360401659026e-06, |
| "loss": 0.972, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.4525270072200207, |
| "grad_norm": 1.482413649559021, |
| "learning_rate": 5.507531106745253e-06, |
| "loss": 0.9164, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.4546984419955487, |
| "grad_norm": 1.7807501554489136, |
| "learning_rate": 5.485701811831479e-06, |
| "loss": 0.9818, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.4568698767710764, |
| "grad_norm": 1.684687614440918, |
| "learning_rate": 5.4638725169177044e-06, |
| "loss": 0.9411, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.4590413115466045, |
| "grad_norm": 2.081648111343384, |
| "learning_rate": 5.44204322200393e-06, |
| "loss": 0.9671, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.4612127463221323, |
| "grad_norm": 1.752733588218689, |
| "learning_rate": 5.420213927090155e-06, |
| "loss": 0.9348, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.4633841810976602, |
| "grad_norm": 1.5472769737243652, |
| "learning_rate": 5.398384632176381e-06, |
| "loss": 0.9464, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.4655556158731882, |
| "grad_norm": 2.3873612880706787, |
| "learning_rate": 5.376555337262607e-06, |
| "loss": 0.9512, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.4677270506487161, |
| "grad_norm": 1.4972560405731201, |
| "learning_rate": 5.354726042348833e-06, |
| "loss": 1.0031, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.469898485424244, |
| "grad_norm": 1.573939561843872, |
| "learning_rate": 5.332896747435058e-06, |
| "loss": 0.9543, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.472069920199772, |
| "grad_norm": 1.7186089754104614, |
| "learning_rate": 5.3110674525212835e-06, |
| "loss": 0.9652, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.4742413549753, |
| "grad_norm": 1.6311200857162476, |
| "learning_rate": 5.28923815760751e-06, |
| "loss": 1.0011, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.476412789750828, |
| "grad_norm": 1.5969362258911133, |
| "learning_rate": 5.267408862693736e-06, |
| "loss": 0.9143, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.4785842245263559, |
| "grad_norm": 1.665818214416504, |
| "learning_rate": 5.245579567779962e-06, |
| "loss": 0.9897, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.4807556593018838, |
| "grad_norm": 1.508557677268982, |
| "learning_rate": 5.223750272866187e-06, |
| "loss": 0.9839, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.4829270940774117, |
| "grad_norm": 1.4313687086105347, |
| "learning_rate": 5.201920977952413e-06, |
| "loss": 1.0097, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.4850985288529395, |
| "grad_norm": 1.4540635347366333, |
| "learning_rate": 5.1800916830386384e-06, |
| "loss": 0.9428, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.4872699636284676, |
| "grad_norm": 1.5450880527496338, |
| "learning_rate": 5.158262388124864e-06, |
| "loss": 0.9578, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.4894413984039954, |
| "grad_norm": 1.4908329248428345, |
| "learning_rate": 5.136433093211089e-06, |
| "loss": 1.017, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.4916128331795233, |
| "grad_norm": 1.3568364381790161, |
| "learning_rate": 5.114603798297315e-06, |
| "loss": 0.9472, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.4937842679550513, |
| "grad_norm": 1.5948452949523926, |
| "learning_rate": 5.092774503383541e-06, |
| "loss": 0.9694, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.4959557027305792, |
| "grad_norm": 1.792996883392334, |
| "learning_rate": 5.070945208469767e-06, |
| "loss": 0.9238, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.4981271375061072, |
| "grad_norm": 1.4522697925567627, |
| "learning_rate": 5.049115913555993e-06, |
| "loss": 0.945, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.500298572281635, |
| "grad_norm": 1.746430516242981, |
| "learning_rate": 5.027286618642218e-06, |
| "loss": 0.9512, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.502470007057163, |
| "grad_norm": 1.7606338262557983, |
| "learning_rate": 5.005457323728444e-06, |
| "loss": 0.9116, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.504641441832691, |
| "grad_norm": 1.6519626379013062, |
| "learning_rate": 4.98362802881467e-06, |
| "loss": 0.955, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.506812876608219, |
| "grad_norm": 1.4089261293411255, |
| "learning_rate": 4.961798733900896e-06, |
| "loss": 0.9799, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.5089843113837467, |
| "grad_norm": 1.4645596742630005, |
| "learning_rate": 4.939969438987121e-06, |
| "loss": 0.9904, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.5111557461592748, |
| "grad_norm": 1.4212929010391235, |
| "learning_rate": 4.918140144073347e-06, |
| "loss": 0.9945, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.5133271809348026, |
| "grad_norm": 1.3888839483261108, |
| "learning_rate": 4.8963108491595724e-06, |
| "loss": 0.9592, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.5154986157103307, |
| "grad_norm": 1.5533506870269775, |
| "learning_rate": 4.874481554245798e-06, |
| "loss": 0.9563, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.5176700504858585, |
| "grad_norm": 1.6060916185379028, |
| "learning_rate": 4.852652259332024e-06, |
| "loss": 0.9471, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.5198414852613866, |
| "grad_norm": 1.3494884967803955, |
| "learning_rate": 4.83082296441825e-06, |
| "loss": 0.9111, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.5220129200369144, |
| "grad_norm": 1.79092538356781, |
| "learning_rate": 4.808993669504476e-06, |
| "loss": 0.9162, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.5241843548124423, |
| "grad_norm": 1.3956743478775024, |
| "learning_rate": 4.787164374590701e-06, |
| "loss": 0.9319, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.5263557895879702, |
| "grad_norm": 1.627583622932434, |
| "learning_rate": 4.765335079676927e-06, |
| "loss": 0.952, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.5285272243634982, |
| "grad_norm": 1.6400254964828491, |
| "learning_rate": 4.743505784763152e-06, |
| "loss": 0.918, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.5306986591390261, |
| "grad_norm": 1.651705026626587, |
| "learning_rate": 4.721676489849378e-06, |
| "loss": 0.985, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.532870093914554, |
| "grad_norm": 1.5370523929595947, |
| "learning_rate": 4.699847194935604e-06, |
| "loss": 0.9642, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.535041528690082, |
| "grad_norm": 1.4414498805999756, |
| "learning_rate": 4.67801790002183e-06, |
| "loss": 0.9698, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.5372129634656098, |
| "grad_norm": 1.5597195625305176, |
| "learning_rate": 4.656188605108055e-06, |
| "loss": 0.9501, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.539384398241138, |
| "grad_norm": 1.970476508140564, |
| "learning_rate": 4.6343593101942814e-06, |
| "loss": 0.9574, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.5415558330166657, |
| "grad_norm": 1.7327702045440674, |
| "learning_rate": 4.612530015280507e-06, |
| "loss": 0.9611, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.5437272677921938, |
| "grad_norm": 1.668805718421936, |
| "learning_rate": 4.590700720366732e-06, |
| "loss": 1.0165, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.5458987025677215, |
| "grad_norm": 1.6926709413528442, |
| "learning_rate": 4.568871425452958e-06, |
| "loss": 1.0008, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.5480701373432495, |
| "grad_norm": 1.7615026235580444, |
| "learning_rate": 4.547042130539184e-06, |
| "loss": 0.9524, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.5502415721187774, |
| "grad_norm": 1.4678000211715698, |
| "learning_rate": 4.52521283562541e-06, |
| "loss": 0.9722, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.5524130068943054, |
| "grad_norm": 1.8274987936019897, |
| "learning_rate": 4.5033835407116355e-06, |
| "loss": 0.9828, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.5545844416698333, |
| "grad_norm": 1.6088035106658936, |
| "learning_rate": 4.481554245797861e-06, |
| "loss": 1.0088, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.5567558764453613, |
| "grad_norm": 1.3067991733551025, |
| "learning_rate": 4.459724950884086e-06, |
| "loss": 0.9769, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.5589273112208892, |
| "grad_norm": 1.530804991722107, |
| "learning_rate": 4.437895655970312e-06, |
| "loss": 0.9796, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.5610987459964172, |
| "grad_norm": 1.7773782014846802, |
| "learning_rate": 4.416066361056538e-06, |
| "loss": 1.0223, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.5632701807719451, |
| "grad_norm": 1.6125026941299438, |
| "learning_rate": 4.394237066142764e-06, |
| "loss": 0.9303, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.5654416155474729, |
| "grad_norm": 1.6378635168075562, |
| "learning_rate": 4.37240777122899e-06, |
| "loss": 1.0128, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.567613050323001, |
| "grad_norm": 1.9466278553009033, |
| "learning_rate": 4.3505784763152154e-06, |
| "loss": 0.9841, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.5697844850985287, |
| "grad_norm": 1.7690411806106567, |
| "learning_rate": 4.328749181401441e-06, |
| "loss": 0.9333, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.571955919874057, |
| "grad_norm": 1.6015644073486328, |
| "learning_rate": 4.306919886487666e-06, |
| "loss": 0.967, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.5741273546495846, |
| "grad_norm": 1.633703351020813, |
| "learning_rate": 4.285090591573893e-06, |
| "loss": 0.9398, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.5762987894251126, |
| "grad_norm": 1.7742903232574463, |
| "learning_rate": 4.263261296660119e-06, |
| "loss": 0.9658, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.5784702242006405, |
| "grad_norm": 1.6973427534103394, |
| "learning_rate": 4.241432001746344e-06, |
| "loss": 1.0063, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.5806416589761685, |
| "grad_norm": 2.0019545555114746, |
| "learning_rate": 4.2196027068325695e-06, |
| "loss": 0.8667, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.5828130937516964, |
| "grad_norm": 1.683061957359314, |
| "learning_rate": 4.197773411918795e-06, |
| "loss": 0.9789, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.5849845285272244, |
| "grad_norm": 1.658921718597412, |
| "learning_rate": 4.175944117005021e-06, |
| "loss": 0.9502, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 1.679726481437683, |
| "learning_rate": 4.154114822091247e-06, |
| "loss": 0.9554, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.5893273980782803, |
| "grad_norm": 1.7617816925048828, |
| "learning_rate": 4.132285527177473e-06, |
| "loss": 0.9207, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.5914988328538082, |
| "grad_norm": 1.8308563232421875, |
| "learning_rate": 4.110456232263698e-06, |
| "loss": 0.959, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.593670267629336, |
| "grad_norm": 2.4028689861297607, |
| "learning_rate": 4.088626937349924e-06, |
| "loss": 0.929, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.5958417024048641, |
| "grad_norm": 1.3485941886901855, |
| "learning_rate": 4.0667976424361494e-06, |
| "loss": 0.9269, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.5980131371803918, |
| "grad_norm": 1.8243391513824463, |
| "learning_rate": 4.044968347522375e-06, |
| "loss": 0.9056, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.60018457195592, |
| "grad_norm": 2.0518813133239746, |
| "learning_rate": 4.023139052608601e-06, |
| "loss": 0.9539, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.6023560067314477, |
| "grad_norm": 1.7341514825820923, |
| "learning_rate": 4.001309757694827e-06, |
| "loss": 0.9594, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.6045274415069757, |
| "grad_norm": 1.882917881011963, |
| "learning_rate": 3.979480462781053e-06, |
| "loss": 0.9583, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.6066988762825036, |
| "grad_norm": 1.558897852897644, |
| "learning_rate": 3.957651167867278e-06, |
| "loss": 1.0286, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.6088703110580316, |
| "grad_norm": 2.0862538814544678, |
| "learning_rate": 3.935821872953504e-06, |
| "loss": 0.9831, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.6110417458335595, |
| "grad_norm": 1.9152121543884277, |
| "learning_rate": 3.91399257803973e-06, |
| "loss": 0.9263, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.6132131806090875, |
| "grad_norm": 1.6685307025909424, |
| "learning_rate": 3.892163283125955e-06, |
| "loss": 0.963, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 1.6865928173065186, |
| "learning_rate": 3.870333988212181e-06, |
| "loss": 0.996, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.6175560501601434, |
| "grad_norm": 1.4440916776657104, |
| "learning_rate": 3.848504693298407e-06, |
| "loss": 0.9636, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.6197274849356713, |
| "grad_norm": 1.647320032119751, |
| "learning_rate": 3.826675398384633e-06, |
| "loss": 0.9382, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.621898919711199, |
| "grad_norm": 1.929983377456665, |
| "learning_rate": 3.8048461034708584e-06, |
| "loss": 0.9606, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.6240703544867272, |
| "grad_norm": 1.5853888988494873, |
| "learning_rate": 3.783016808557084e-06, |
| "loss": 0.9552, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.626241789262255, |
| "grad_norm": 1.439434289932251, |
| "learning_rate": 3.7611875136433097e-06, |
| "loss": 0.9317, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.628413224037783, |
| "grad_norm": 1.5905818939208984, |
| "learning_rate": 3.739358218729535e-06, |
| "loss": 0.95, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.6305846588133108, |
| "grad_norm": 1.6980335712432861, |
| "learning_rate": 3.717528923815761e-06, |
| "loss": 0.9717, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.6327560935888388, |
| "grad_norm": 1.9424737691879272, |
| "learning_rate": 3.695699628901987e-06, |
| "loss": 0.9267, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.6349275283643667, |
| "grad_norm": 1.472120761871338, |
| "learning_rate": 3.6738703339882125e-06, |
| "loss": 0.9566, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.6370989631398947, |
| "grad_norm": 1.7071336507797241, |
| "learning_rate": 3.6520410390744384e-06, |
| "loss": 1.0075, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.6392703979154226, |
| "grad_norm": 1.565332055091858, |
| "learning_rate": 3.6302117441606638e-06, |
| "loss": 0.9537, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.6414418326909506, |
| "grad_norm": 1.3084553480148315, |
| "learning_rate": 3.6083824492468896e-06, |
| "loss": 0.9728, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.6436132674664785, |
| "grad_norm": 1.5982389450073242, |
| "learning_rate": 3.5865531543331154e-06, |
| "loss": 0.9467, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.6457847022420065, |
| "grad_norm": 1.4233120679855347, |
| "learning_rate": 3.5647238594193412e-06, |
| "loss": 1.008, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.6479561370175344, |
| "grad_norm": 1.7453457117080688, |
| "learning_rate": 3.5428945645055666e-06, |
| "loss": 0.9035, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.6501275717930621, |
| "grad_norm": 1.5516306161880493, |
| "learning_rate": 3.5210652695917924e-06, |
| "loss": 0.9891, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.6522990065685903, |
| "grad_norm": 1.4049649238586426, |
| "learning_rate": 3.499235974678018e-06, |
| "loss": 0.8802, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.654470441344118, |
| "grad_norm": 1.6847800016403198, |
| "learning_rate": 3.477406679764244e-06, |
| "loss": 0.8882, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.6566418761196462, |
| "grad_norm": 1.3729580640792847, |
| "learning_rate": 3.45557738485047e-06, |
| "loss": 0.9374, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.658813310895174, |
| "grad_norm": 1.2749695777893066, |
| "learning_rate": 3.4337480899366953e-06, |
| "loss": 0.9659, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.6609847456707019, |
| "grad_norm": 1.9031591415405273, |
| "learning_rate": 3.411918795022921e-06, |
| "loss": 0.9691, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.6631561804462298, |
| "grad_norm": 1.8668582439422607, |
| "learning_rate": 3.3900895001091465e-06, |
| "loss": 0.9711, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.6653276152217578, |
| "grad_norm": 1.6673099994659424, |
| "learning_rate": 3.3682602051953723e-06, |
| "loss": 0.9406, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.6674990499972857, |
| "grad_norm": 2.0432651042938232, |
| "learning_rate": 3.3464309102815986e-06, |
| "loss": 0.9949, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.6696704847728137, |
| "grad_norm": 1.369315505027771, |
| "learning_rate": 3.324601615367824e-06, |
| "loss": 0.982, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.6718419195483416, |
| "grad_norm": 1.6572550535202026, |
| "learning_rate": 3.30277232045405e-06, |
| "loss": 0.9421, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.6740133543238696, |
| "grad_norm": 1.5291762351989746, |
| "learning_rate": 3.2809430255402752e-06, |
| "loss": 0.9731, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.6761847890993975, |
| "grad_norm": 1.5826210975646973, |
| "learning_rate": 3.259113730626501e-06, |
| "loss": 0.9502, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.6783562238749252, |
| "grad_norm": 1.612889289855957, |
| "learning_rate": 3.237284435712727e-06, |
| "loss": 0.9811, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.6805276586504534, |
| "grad_norm": 1.2216670513153076, |
| "learning_rate": 3.2154551407989527e-06, |
| "loss": 0.9568, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.6826990934259811, |
| "grad_norm": 1.4847832918167114, |
| "learning_rate": 3.193625845885178e-06, |
| "loss": 0.893, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.6848705282015093, |
| "grad_norm": 1.8466966152191162, |
| "learning_rate": 3.171796550971404e-06, |
| "loss": 0.9238, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.687041962977037, |
| "grad_norm": 1.310369610786438, |
| "learning_rate": 3.1499672560576293e-06, |
| "loss": 0.9779, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.689213397752565, |
| "grad_norm": 1.733928918838501, |
| "learning_rate": 3.128137961143855e-06, |
| "loss": 0.9537, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.691384832528093, |
| "grad_norm": 1.4799144268035889, |
| "learning_rate": 3.1063086662300814e-06, |
| "loss": 1.02, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.6935562673036209, |
| "grad_norm": 1.4825830459594727, |
| "learning_rate": 3.0844793713163068e-06, |
| "loss": 0.9233, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.6957277020791488, |
| "grad_norm": 1.588759183883667, |
| "learning_rate": 3.0626500764025326e-06, |
| "loss": 0.9953, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.6978991368546767, |
| "grad_norm": 1.9546958208084106, |
| "learning_rate": 3.040820781488758e-06, |
| "loss": 0.9447, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.7000705716302047, |
| "grad_norm": 1.918961763381958, |
| "learning_rate": 3.018991486574984e-06, |
| "loss": 0.9533, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.7022420064057324, |
| "grad_norm": 2.0883421897888184, |
| "learning_rate": 2.9971621916612096e-06, |
| "loss": 0.9603, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.7044134411812606, |
| "grad_norm": 1.5004432201385498, |
| "learning_rate": 2.9753328967474354e-06, |
| "loss": 0.9976, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.7065848759567883, |
| "grad_norm": 1.8973866701126099, |
| "learning_rate": 2.953503601833661e-06, |
| "loss": 0.9765, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.7087563107323165, |
| "grad_norm": 1.310237169265747, |
| "learning_rate": 2.9316743069198867e-06, |
| "loss": 0.966, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.7109277455078442, |
| "grad_norm": 1.6480714082717896, |
| "learning_rate": 2.909845012006112e-06, |
| "loss": 0.9958, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.7130991802833724, |
| "grad_norm": 1.6958471536636353, |
| "learning_rate": 2.8880157170923383e-06, |
| "loss": 0.9398, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.7152706150589, |
| "grad_norm": 1.5303900241851807, |
| "learning_rate": 2.866186422178564e-06, |
| "loss": 0.9077, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.717442049834428, |
| "grad_norm": 1.3479477167129517, |
| "learning_rate": 2.8443571272647895e-06, |
| "loss": 0.9325, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.719613484609956, |
| "grad_norm": 1.7498183250427246, |
| "learning_rate": 2.8225278323510154e-06, |
| "loss": 0.9554, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.721784919385484, |
| "grad_norm": 1.8304494619369507, |
| "learning_rate": 2.8006985374372408e-06, |
| "loss": 0.9859, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.723956354161012, |
| "grad_norm": 1.8340808153152466, |
| "learning_rate": 2.7788692425234666e-06, |
| "loss": 0.8868, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.7261277889365398, |
| "grad_norm": 1.6533386707305908, |
| "learning_rate": 2.757039947609693e-06, |
| "loss": 0.9277, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.7282992237120678, |
| "grad_norm": 1.6567628383636475, |
| "learning_rate": 2.7352106526959182e-06, |
| "loss": 0.9515, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.7304706584875955, |
| "grad_norm": 1.504021167755127, |
| "learning_rate": 2.713381357782144e-06, |
| "loss": 1.044, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.7326420932631237, |
| "grad_norm": 1.9097148180007935, |
| "learning_rate": 2.6915520628683694e-06, |
| "loss": 0.9439, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.7348135280386514, |
| "grad_norm": 1.5938329696655273, |
| "learning_rate": 2.6697227679545953e-06, |
| "loss": 0.9751, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.7369849628141796, |
| "grad_norm": 1.3853548765182495, |
| "learning_rate": 2.647893473040821e-06, |
| "loss": 0.936, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.7391563975897073, |
| "grad_norm": 1.4751615524291992, |
| "learning_rate": 2.626064178127047e-06, |
| "loss": 0.9537, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.7413278323652355, |
| "grad_norm": 1.5007750988006592, |
| "learning_rate": 2.6042348832132723e-06, |
| "loss": 0.9741, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.7434992671407632, |
| "grad_norm": 1.8922457695007324, |
| "learning_rate": 2.582405588299498e-06, |
| "loss": 1.0043, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.7456707019162911, |
| "grad_norm": 1.6276966333389282, |
| "learning_rate": 2.5605762933857235e-06, |
| "loss": 0.934, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.747842136691819, |
| "grad_norm": 1.489065408706665, |
| "learning_rate": 2.5387469984719498e-06, |
| "loss": 0.9517, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.750013571467347, |
| "grad_norm": 1.7199853658676147, |
| "learning_rate": 2.5169177035581756e-06, |
| "loss": 0.9172, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.752185006242875, |
| "grad_norm": 1.7385406494140625, |
| "learning_rate": 2.495088408644401e-06, |
| "loss": 0.9905, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.754356441018403, |
| "grad_norm": 1.5712602138519287, |
| "learning_rate": 2.473259113730627e-06, |
| "loss": 0.8985, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.7565278757939309, |
| "grad_norm": 1.84058678150177, |
| "learning_rate": 2.4514298188168526e-06, |
| "loss": 0.9504, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.7586993105694586, |
| "grad_norm": 1.3999656438827515, |
| "learning_rate": 2.429600523903078e-06, |
| "loss": 1.0007, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.7608707453449868, |
| "grad_norm": 1.5545423030853271, |
| "learning_rate": 2.407771228989304e-06, |
| "loss": 0.9533, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.7630421801205145, |
| "grad_norm": 1.9049336910247803, |
| "learning_rate": 2.3859419340755297e-06, |
| "loss": 0.9397, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.7652136148960427, |
| "grad_norm": 2.200523853302002, |
| "learning_rate": 2.364112639161755e-06, |
| "loss": 1.0015, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.7673850496715704, |
| "grad_norm": 1.9797515869140625, |
| "learning_rate": 2.342283344247981e-06, |
| "loss": 0.964, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.7695564844470986, |
| "grad_norm": 1.7944706678390503, |
| "learning_rate": 2.3204540493342067e-06, |
| "loss": 0.9688, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.7717279192226263, |
| "grad_norm": 1.6373084783554077, |
| "learning_rate": 2.2986247544204325e-06, |
| "loss": 0.911, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.7738993539981542, |
| "grad_norm": 1.4920001029968262, |
| "learning_rate": 2.2767954595066584e-06, |
| "loss": 0.9798, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.7760707887736822, |
| "grad_norm": 1.9720458984375, |
| "learning_rate": 2.2549661645928838e-06, |
| "loss": 0.969, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.7782422235492101, |
| "grad_norm": 1.419999122619629, |
| "learning_rate": 2.2331368696791096e-06, |
| "loss": 0.9628, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.780413658324738, |
| "grad_norm": 1.7255007028579712, |
| "learning_rate": 2.2113075747653354e-06, |
| "loss": 0.982, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.782585093100266, |
| "grad_norm": 1.8855762481689453, |
| "learning_rate": 2.189478279851561e-06, |
| "loss": 0.9635, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.784756527875794, |
| "grad_norm": 1.6322499513626099, |
| "learning_rate": 2.1676489849377866e-06, |
| "loss": 0.9703, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.7869279626513217, |
| "grad_norm": 1.456443190574646, |
| "learning_rate": 2.1458196900240125e-06, |
| "loss": 0.9452, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.7890993974268499, |
| "grad_norm": 1.7495192289352417, |
| "learning_rate": 2.1239903951102383e-06, |
| "loss": 0.9856, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.7912708322023776, |
| "grad_norm": 1.9030083417892456, |
| "learning_rate": 2.1021611001964637e-06, |
| "loss": 0.9021, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.7934422669779058, |
| "grad_norm": 1.4469859600067139, |
| "learning_rate": 2.0803318052826895e-06, |
| "loss": 0.9625, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.7956137017534335, |
| "grad_norm": 2.0707876682281494, |
| "learning_rate": 2.0585025103689153e-06, |
| "loss": 0.9462, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.7977851365289617, |
| "grad_norm": 1.8715201616287231, |
| "learning_rate": 2.036673215455141e-06, |
| "loss": 0.9055, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.7999565713044894, |
| "grad_norm": 1.6146141290664673, |
| "learning_rate": 2.0148439205413665e-06, |
| "loss": 0.9832, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.8021280060800173, |
| "grad_norm": 1.6612919569015503, |
| "learning_rate": 1.9930146256275924e-06, |
| "loss": 0.9325, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.8042994408555453, |
| "grad_norm": 1.871055006980896, |
| "learning_rate": 1.971185330713818e-06, |
| "loss": 0.9322, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.8064708756310732, |
| "grad_norm": 1.4436935186386108, |
| "learning_rate": 1.9493560358000436e-06, |
| "loss": 0.9671, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.8086423104066012, |
| "grad_norm": 1.7494090795516968, |
| "learning_rate": 1.9275267408862694e-06, |
| "loss": 0.9291, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.8108137451821291, |
| "grad_norm": 1.585222840309143, |
| "learning_rate": 1.9056974459724952e-06, |
| "loss": 0.9813, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.812985179957657, |
| "grad_norm": 1.5499629974365234, |
| "learning_rate": 1.8838681510587208e-06, |
| "loss": 0.9175, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.8151566147331848, |
| "grad_norm": 1.9947984218597412, |
| "learning_rate": 1.8620388561449469e-06, |
| "loss": 0.932, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.817328049508713, |
| "grad_norm": 1.6416243314743042, |
| "learning_rate": 1.8402095612311725e-06, |
| "loss": 0.9398, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.8194994842842407, |
| "grad_norm": 1.8674947023391724, |
| "learning_rate": 1.818380266317398e-06, |
| "loss": 0.9342, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.8216709190597689, |
| "grad_norm": 1.5783942937850952, |
| "learning_rate": 1.796550971403624e-06, |
| "loss": 0.993, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.8238423538352966, |
| "grad_norm": 1.6426745653152466, |
| "learning_rate": 1.7747216764898495e-06, |
| "loss": 0.947, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.8260137886108248, |
| "grad_norm": 1.656149983406067, |
| "learning_rate": 1.7528923815760751e-06, |
| "loss": 0.9387, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.8281852233863525, |
| "grad_norm": 1.6734038591384888, |
| "learning_rate": 1.731063086662301e-06, |
| "loss": 0.9499, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.8303566581618804, |
| "grad_norm": 1.6861449480056763, |
| "learning_rate": 1.7092337917485266e-06, |
| "loss": 0.9644, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.8325280929374084, |
| "grad_norm": 1.4830578565597534, |
| "learning_rate": 1.6874044968347526e-06, |
| "loss": 0.9685, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.8346995277129363, |
| "grad_norm": 1.5387077331542969, |
| "learning_rate": 1.6655752019209782e-06, |
| "loss": 0.9699, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.8368709624884643, |
| "grad_norm": 1.4972219467163086, |
| "learning_rate": 1.6437459070072038e-06, |
| "loss": 0.9466, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.8390423972639922, |
| "grad_norm": 1.2072150707244873, |
| "learning_rate": 1.6219166120934296e-06, |
| "loss": 0.9622, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.8412138320395202, |
| "grad_norm": 1.353187918663025, |
| "learning_rate": 1.6000873171796552e-06, |
| "loss": 0.9516, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.8433852668150479, |
| "grad_norm": 1.8663513660430908, |
| "learning_rate": 1.5782580222658809e-06, |
| "loss": 0.9928, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.845556701590576, |
| "grad_norm": 1.659698247909546, |
| "learning_rate": 1.5564287273521067e-06, |
| "loss": 0.953, |
| "step": 8500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 9212, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0544695061409792e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|