| { | |
| "best_global_step": 638, | |
| "best_metric": 0.313894122838974, | |
| "best_model_checkpoint": "outputs/checkpoint-638", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 638, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003144036156415799, | |
| "grad_norm": 2.18092679977417, | |
| "learning_rate": 0.0, | |
| "loss": 1.3508, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006288072312831598, | |
| "grad_norm": 2.135878562927246, | |
| "learning_rate": 2e-05, | |
| "loss": 1.277, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009432108469247396, | |
| "grad_norm": 1.8312653303146362, | |
| "learning_rate": 4e-05, | |
| "loss": 1.2121, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.012576144625663196, | |
| "grad_norm": 1.2045841217041016, | |
| "learning_rate": 6e-05, | |
| "loss": 1.1445, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.015720180782078996, | |
| "grad_norm": 0.8778926730155945, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0126, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.018864216938494792, | |
| "grad_norm": 0.9060773849487305, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8732, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02200825309491059, | |
| "grad_norm": 0.8855406045913696, | |
| "learning_rate": 9.995497523638001e-05, | |
| "loss": 0.7976, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02515228925132639, | |
| "grad_norm": 0.8502305746078491, | |
| "learning_rate": 9.990995047276002e-05, | |
| "loss": 0.6693, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.028296325407742188, | |
| "grad_norm": 2.569504737854004, | |
| "learning_rate": 9.986492570914003e-05, | |
| "loss": 0.5713, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03144036156415799, | |
| "grad_norm": 0.60772705078125, | |
| "learning_rate": 9.981990094552004e-05, | |
| "loss": 0.5188, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.034584397720573784, | |
| "grad_norm": 0.3977140784263611, | |
| "learning_rate": 9.977487618190005e-05, | |
| "loss": 0.5009, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.037728433876989584, | |
| "grad_norm": 0.4928475022315979, | |
| "learning_rate": 9.972985141828006e-05, | |
| "loss": 0.4721, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.040872470033405384, | |
| "grad_norm": 0.3859867453575134, | |
| "learning_rate": 9.968482665466006e-05, | |
| "loss": 0.4744, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04401650618982118, | |
| "grad_norm": 0.24680837988853455, | |
| "learning_rate": 9.963980189104007e-05, | |
| "loss": 0.4696, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04716054234623698, | |
| "grad_norm": 0.23483239114284515, | |
| "learning_rate": 9.95947771274201e-05, | |
| "loss": 0.4319, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05030457850265278, | |
| "grad_norm": 0.21595372259616852, | |
| "learning_rate": 9.954975236380009e-05, | |
| "loss": 0.4489, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.053448614659068576, | |
| "grad_norm": 0.5875914096832275, | |
| "learning_rate": 9.95047276001801e-05, | |
| "loss": 0.4522, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.056592650815484376, | |
| "grad_norm": 0.22523947060108185, | |
| "learning_rate": 9.945970283656011e-05, | |
| "loss": 0.4459, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.059736686971900176, | |
| "grad_norm": 0.20356932282447815, | |
| "learning_rate": 9.941467807294013e-05, | |
| "loss": 0.4596, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06288072312831598, | |
| "grad_norm": 1.1203888654708862, | |
| "learning_rate": 9.936965330932014e-05, | |
| "loss": 0.446, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06602475928473177, | |
| "grad_norm": 0.2615947723388672, | |
| "learning_rate": 9.932462854570013e-05, | |
| "loss": 0.4723, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06916879544114757, | |
| "grad_norm": 0.21939712762832642, | |
| "learning_rate": 9.927960378208014e-05, | |
| "loss": 0.4203, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07231283159756337, | |
| "grad_norm": 0.18068519234657288, | |
| "learning_rate": 9.923457901846016e-05, | |
| "loss": 0.4117, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07545686775397917, | |
| "grad_norm": 0.1733531355857849, | |
| "learning_rate": 9.918955425484017e-05, | |
| "loss": 0.4386, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07860090391039497, | |
| "grad_norm": 0.18101659417152405, | |
| "learning_rate": 9.914452949122017e-05, | |
| "loss": 0.4291, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08174494006681077, | |
| "grad_norm": 0.18338626623153687, | |
| "learning_rate": 9.909950472760019e-05, | |
| "loss": 0.4189, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08488897622322657, | |
| "grad_norm": 0.17645250260829926, | |
| "learning_rate": 9.90544799639802e-05, | |
| "loss": 0.3872, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08803301237964237, | |
| "grad_norm": 0.1902536153793335, | |
| "learning_rate": 9.900945520036021e-05, | |
| "loss": 0.4055, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09117704853605817, | |
| "grad_norm": 0.18971717357635498, | |
| "learning_rate": 9.89644304367402e-05, | |
| "loss": 0.4148, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.09432108469247397, | |
| "grad_norm": 0.1757958084344864, | |
| "learning_rate": 9.891940567312022e-05, | |
| "loss": 0.4019, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09746512084888977, | |
| "grad_norm": 0.18116620182991028, | |
| "learning_rate": 9.887438090950023e-05, | |
| "loss": 0.409, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10060915700530557, | |
| "grad_norm": 0.16721461713314056, | |
| "learning_rate": 9.882935614588024e-05, | |
| "loss": 0.4118, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.10375319316172137, | |
| "grad_norm": 0.18521425127983093, | |
| "learning_rate": 9.878433138226025e-05, | |
| "loss": 0.4358, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.10689722931813715, | |
| "grad_norm": 0.18263505399227142, | |
| "learning_rate": 9.873930661864026e-05, | |
| "loss": 0.3956, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11004126547455295, | |
| "grad_norm": 0.1665913313627243, | |
| "learning_rate": 9.869428185502027e-05, | |
| "loss": 0.3826, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.11318530163096875, | |
| "grad_norm": 0.16498151421546936, | |
| "learning_rate": 9.864925709140028e-05, | |
| "loss": 0.3556, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.11632933778738455, | |
| "grad_norm": 0.17468655109405518, | |
| "learning_rate": 9.860423232778028e-05, | |
| "loss": 0.3702, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.11947337394380035, | |
| "grad_norm": 0.1699349582195282, | |
| "learning_rate": 9.855920756416029e-05, | |
| "loss": 0.3763, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.12261741010021615, | |
| "grad_norm": 0.17979387938976288, | |
| "learning_rate": 9.85141828005403e-05, | |
| "loss": 0.4059, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.12576144625663196, | |
| "grad_norm": 0.17460626363754272, | |
| "learning_rate": 9.846915803692031e-05, | |
| "loss": 0.4032, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12890548241304775, | |
| "grad_norm": 0.1785130649805069, | |
| "learning_rate": 9.842413327330032e-05, | |
| "loss": 0.3939, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.13204951856946354, | |
| "grad_norm": 0.1852668821811676, | |
| "learning_rate": 9.837910850968033e-05, | |
| "loss": 0.3746, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.13519355472587935, | |
| "grad_norm": 0.18407292664051056, | |
| "learning_rate": 9.833408374606034e-05, | |
| "loss": 0.3851, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.13833759088229514, | |
| "grad_norm": 0.18785783648490906, | |
| "learning_rate": 9.828905898244036e-05, | |
| "loss": 0.3895, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.14148162703871095, | |
| "grad_norm": 0.18965557217597961, | |
| "learning_rate": 9.824403421882035e-05, | |
| "loss": 0.3746, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.14462566319512674, | |
| "grad_norm": 0.1771431416273117, | |
| "learning_rate": 9.819900945520036e-05, | |
| "loss": 0.3483, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.14776969935154255, | |
| "grad_norm": 0.18872253596782684, | |
| "learning_rate": 9.815398469158037e-05, | |
| "loss": 0.3533, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.15091373550795834, | |
| "grad_norm": 0.18482953310012817, | |
| "learning_rate": 9.810895992796039e-05, | |
| "loss": 0.3758, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.15405777166437415, | |
| "grad_norm": 0.18910518288612366, | |
| "learning_rate": 9.806393516434039e-05, | |
| "loss": 0.3804, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.15720180782078993, | |
| "grad_norm": 0.187296524643898, | |
| "learning_rate": 9.80189104007204e-05, | |
| "loss": 0.3676, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16034584397720575, | |
| "grad_norm": 0.19214150309562683, | |
| "learning_rate": 9.797388563710042e-05, | |
| "loss": 0.3921, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.16348988013362153, | |
| "grad_norm": 0.19029422104358673, | |
| "learning_rate": 9.792886087348043e-05, | |
| "loss": 0.3778, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.16663391629003735, | |
| "grad_norm": 0.1980220377445221, | |
| "learning_rate": 9.788383610986042e-05, | |
| "loss": 0.3917, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.16977795244645313, | |
| "grad_norm": 0.19083669781684875, | |
| "learning_rate": 9.783881134624043e-05, | |
| "loss": 0.3798, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.17292198860286892, | |
| "grad_norm": 0.25795647501945496, | |
| "learning_rate": 9.779378658262045e-05, | |
| "loss": 0.3877, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.17606602475928473, | |
| "grad_norm": 0.19090382754802704, | |
| "learning_rate": 9.774876181900046e-05, | |
| "loss": 0.3696, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.17921006091570052, | |
| "grad_norm": 0.19982369244098663, | |
| "learning_rate": 9.770373705538046e-05, | |
| "loss": 0.3734, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.18235409707211633, | |
| "grad_norm": 0.1944751739501953, | |
| "learning_rate": 9.765871229176046e-05, | |
| "loss": 0.3572, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.18549813322853212, | |
| "grad_norm": 0.1942175179719925, | |
| "learning_rate": 9.761368752814049e-05, | |
| "loss": 0.4059, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.18864216938494793, | |
| "grad_norm": 0.177927166223526, | |
| "learning_rate": 9.75686627645205e-05, | |
| "loss": 0.3645, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19178620554136372, | |
| "grad_norm": 0.18761321902275085, | |
| "learning_rate": 9.752363800090049e-05, | |
| "loss": 0.3735, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.19493024169777953, | |
| "grad_norm": 0.21108420193195343, | |
| "learning_rate": 9.747861323728051e-05, | |
| "loss": 0.3603, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.19807427785419532, | |
| "grad_norm": 0.18813803791999817, | |
| "learning_rate": 9.743358847366052e-05, | |
| "loss": 0.366, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.20121831401061113, | |
| "grad_norm": 0.1801685392856598, | |
| "learning_rate": 9.738856371004053e-05, | |
| "loss": 0.3585, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.20436235016702692, | |
| "grad_norm": 0.1869877278804779, | |
| "learning_rate": 9.734353894642053e-05, | |
| "loss": 0.3787, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.20750638632344273, | |
| "grad_norm": 0.18504877388477325, | |
| "learning_rate": 9.729851418280055e-05, | |
| "loss": 0.3442, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.21065042247985852, | |
| "grad_norm": 0.19591134786605835, | |
| "learning_rate": 9.725348941918056e-05, | |
| "loss": 0.3876, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2137944586362743, | |
| "grad_norm": 0.1981891542673111, | |
| "learning_rate": 9.720846465556056e-05, | |
| "loss": 0.3507, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.21693849479269012, | |
| "grad_norm": 0.20417073369026184, | |
| "learning_rate": 9.716343989194057e-05, | |
| "loss": 0.3667, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2200825309491059, | |
| "grad_norm": 0.19462363421916962, | |
| "learning_rate": 9.711841512832058e-05, | |
| "loss": 0.3595, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22322656710552172, | |
| "grad_norm": 0.17222774028778076, | |
| "learning_rate": 9.707339036470059e-05, | |
| "loss": 0.3451, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2263706032619375, | |
| "grad_norm": 0.1774955689907074, | |
| "learning_rate": 9.70283656010806e-05, | |
| "loss": 0.3386, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.22951463941835332, | |
| "grad_norm": 0.189998060464859, | |
| "learning_rate": 9.698334083746061e-05, | |
| "loss": 0.3522, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2326586755747691, | |
| "grad_norm": 0.1920982450246811, | |
| "learning_rate": 9.693831607384062e-05, | |
| "loss": 0.3733, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.23580271173118492, | |
| "grad_norm": 0.1971607357263565, | |
| "learning_rate": 9.689329131022062e-05, | |
| "loss": 0.3504, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2389467478876007, | |
| "grad_norm": 0.20512360334396362, | |
| "learning_rate": 9.684826654660063e-05, | |
| "loss": 0.3464, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.24209078404401652, | |
| "grad_norm": 0.2119520902633667, | |
| "learning_rate": 9.680324178298064e-05, | |
| "loss": 0.3686, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2452348202004323, | |
| "grad_norm": 0.22858689725399017, | |
| "learning_rate": 9.675821701936065e-05, | |
| "loss": 0.3438, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.24837885635684812, | |
| "grad_norm": 0.1901649832725525, | |
| "learning_rate": 9.671319225574066e-05, | |
| "loss": 0.3607, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.25152289251326393, | |
| "grad_norm": 0.1896492838859558, | |
| "learning_rate": 9.666816749212068e-05, | |
| "loss": 0.3728, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2546669286696797, | |
| "grad_norm": 0.18334272503852844, | |
| "learning_rate": 9.662314272850068e-05, | |
| "loss": 0.3617, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2578109648260955, | |
| "grad_norm": 0.20095829665660858, | |
| "learning_rate": 9.657811796488068e-05, | |
| "loss": 0.3632, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2609550009825113, | |
| "grad_norm": 0.17583882808685303, | |
| "learning_rate": 9.653309320126069e-05, | |
| "loss": 0.3582, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2640990371389271, | |
| "grad_norm": 0.19473253190517426, | |
| "learning_rate": 9.648806843764072e-05, | |
| "loss": 0.3643, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2672430732953429, | |
| "grad_norm": 0.1956205517053604, | |
| "learning_rate": 9.644304367402071e-05, | |
| "loss": 0.336, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2703871094517587, | |
| "grad_norm": 0.19884824752807617, | |
| "learning_rate": 9.639801891040072e-05, | |
| "loss": 0.3626, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2735311456081745, | |
| "grad_norm": 0.18607290089130402, | |
| "learning_rate": 9.635299414678074e-05, | |
| "loss": 0.3274, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.27667518176459027, | |
| "grad_norm": 0.18494442105293274, | |
| "learning_rate": 9.630796938316075e-05, | |
| "loss": 0.3437, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2798192179210061, | |
| "grad_norm": 0.18413978815078735, | |
| "learning_rate": 9.626294461954074e-05, | |
| "loss": 0.3604, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2829632540774219, | |
| "grad_norm": 0.19610458612442017, | |
| "learning_rate": 9.621791985592075e-05, | |
| "loss": 0.3729, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2861072902338377, | |
| "grad_norm": 0.19458866119384766, | |
| "learning_rate": 9.617289509230078e-05, | |
| "loss": 0.3493, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.28925132639025347, | |
| "grad_norm": 0.20431379973888397, | |
| "learning_rate": 9.612787032868078e-05, | |
| "loss": 0.3464, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2923953625466693, | |
| "grad_norm": 0.1833576112985611, | |
| "learning_rate": 9.608284556506079e-05, | |
| "loss": 0.3434, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2955393987030851, | |
| "grad_norm": 0.18712273240089417, | |
| "learning_rate": 9.603782080144079e-05, | |
| "loss": 0.3497, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2986834348595009, | |
| "grad_norm": 0.19049568474292755, | |
| "learning_rate": 9.599279603782081e-05, | |
| "loss": 0.3579, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.30182747101591667, | |
| "grad_norm": 0.18482261896133423, | |
| "learning_rate": 9.594777127420082e-05, | |
| "loss": 0.3714, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.30497150717233246, | |
| "grad_norm": 0.23206727206707, | |
| "learning_rate": 9.590274651058083e-05, | |
| "loss": 0.3457, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3081155433287483, | |
| "grad_norm": 0.18018409609794617, | |
| "learning_rate": 9.585772174696084e-05, | |
| "loss": 0.3507, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3112595794851641, | |
| "grad_norm": 0.1778680980205536, | |
| "learning_rate": 9.581269698334084e-05, | |
| "loss": 0.3455, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.31440361564157987, | |
| "grad_norm": 0.19341802597045898, | |
| "learning_rate": 9.576767221972085e-05, | |
| "loss": 0.3522, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.31754765179799566, | |
| "grad_norm": 0.18757164478302002, | |
| "learning_rate": 9.572264745610086e-05, | |
| "loss": 0.331, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3206916879544115, | |
| "grad_norm": 0.1873527616262436, | |
| "learning_rate": 9.567762269248087e-05, | |
| "loss": 0.3341, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3238357241108273, | |
| "grad_norm": 0.19451723992824554, | |
| "learning_rate": 9.563259792886088e-05, | |
| "loss": 0.3726, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.32697976026724307, | |
| "grad_norm": 0.19554930925369263, | |
| "learning_rate": 9.558757316524089e-05, | |
| "loss": 0.3568, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.33012379642365886, | |
| "grad_norm": 0.18672047555446625, | |
| "learning_rate": 9.55425484016209e-05, | |
| "loss": 0.3666, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3332678325800747, | |
| "grad_norm": 0.18587937951087952, | |
| "learning_rate": 9.54975236380009e-05, | |
| "loss": 0.3239, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3364118687364905, | |
| "grad_norm": 0.18024438619613647, | |
| "learning_rate": 9.545249887438091e-05, | |
| "loss": 0.3492, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.33955590489290627, | |
| "grad_norm": 0.18240226805210114, | |
| "learning_rate": 9.540747411076092e-05, | |
| "loss": 0.3351, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.34269994104932205, | |
| "grad_norm": 0.16186107695102692, | |
| "learning_rate": 9.536244934714093e-05, | |
| "loss": 0.3139, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.34584397720573784, | |
| "grad_norm": 0.16864165663719177, | |
| "learning_rate": 9.531742458352094e-05, | |
| "loss": 0.3321, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3489880133621537, | |
| "grad_norm": 0.1825931966304779, | |
| "learning_rate": 9.527239981990095e-05, | |
| "loss": 0.3531, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.35213204951856947, | |
| "grad_norm": 0.17394313216209412, | |
| "learning_rate": 9.522737505628096e-05, | |
| "loss": 0.3626, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.35527608567498525, | |
| "grad_norm": 0.18193919956684113, | |
| "learning_rate": 9.518235029266098e-05, | |
| "loss": 0.342, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.35842012183140104, | |
| "grad_norm": 0.18491008877754211, | |
| "learning_rate": 9.513732552904097e-05, | |
| "loss": 0.348, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3615641579878169, | |
| "grad_norm": 0.19338466227054596, | |
| "learning_rate": 9.509230076542098e-05, | |
| "loss": 0.3531, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.36470819414423267, | |
| "grad_norm": 0.18233619630336761, | |
| "learning_rate": 9.5047276001801e-05, | |
| "loss": 0.3147, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.36785223030064845, | |
| "grad_norm": 0.1774706244468689, | |
| "learning_rate": 9.500225123818101e-05, | |
| "loss": 0.3442, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.37099626645706424, | |
| "grad_norm": 0.1780499666929245, | |
| "learning_rate": 9.495722647456101e-05, | |
| "loss": 0.3368, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3741403026134801, | |
| "grad_norm": 0.18548406660556793, | |
| "learning_rate": 9.491220171094102e-05, | |
| "loss": 0.3429, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.37728433876989587, | |
| "grad_norm": 0.18113106489181519, | |
| "learning_rate": 9.486717694732104e-05, | |
| "loss": 0.3278, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.38042837492631165, | |
| "grad_norm": 0.18168263137340546, | |
| "learning_rate": 9.482215218370105e-05, | |
| "loss": 0.3127, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.38357241108272744, | |
| "grad_norm": 0.17432525753974915, | |
| "learning_rate": 9.477712742008104e-05, | |
| "loss": 0.3594, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3867164472391432, | |
| "grad_norm": 0.187408447265625, | |
| "learning_rate": 9.473210265646106e-05, | |
| "loss": 0.355, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.38986048339555907, | |
| "grad_norm": 0.17972330749034882, | |
| "learning_rate": 9.468707789284107e-05, | |
| "loss": 0.3493, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.39300451955197485, | |
| "grad_norm": 0.17262862622737885, | |
| "learning_rate": 9.464205312922108e-05, | |
| "loss": 0.323, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.39614855570839064, | |
| "grad_norm": 0.26467645168304443, | |
| "learning_rate": 9.459702836560108e-05, | |
| "loss": 0.3643, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3992925918648064, | |
| "grad_norm": 0.24252085387706757, | |
| "learning_rate": 9.45520036019811e-05, | |
| "loss": 0.3573, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.40243662802122226, | |
| "grad_norm": 0.18881508708000183, | |
| "learning_rate": 9.45069788383611e-05, | |
| "loss": 0.325, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.40558066417763805, | |
| "grad_norm": 0.22236384451389313, | |
| "learning_rate": 9.446195407474112e-05, | |
| "loss": 0.3806, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.40872470033405384, | |
| "grad_norm": 0.19904322922229767, | |
| "learning_rate": 9.441692931112111e-05, | |
| "loss": 0.4056, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4118687364904696, | |
| "grad_norm": 0.18705110251903534, | |
| "learning_rate": 9.437190454750113e-05, | |
| "loss": 0.3346, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.41501277264688546, | |
| "grad_norm": 0.18179073929786682, | |
| "learning_rate": 9.432687978388114e-05, | |
| "loss": 0.3136, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.41815680880330125, | |
| "grad_norm": 0.17651726305484772, | |
| "learning_rate": 9.428185502026115e-05, | |
| "loss": 0.3267, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.42130084495971704, | |
| "grad_norm": 0.1757514625787735, | |
| "learning_rate": 9.423683025664116e-05, | |
| "loss": 0.3442, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.4244448811161328, | |
| "grad_norm": 0.18630896508693695, | |
| "learning_rate": 9.419180549302117e-05, | |
| "loss": 0.3357, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4275889172725486, | |
| "grad_norm": 0.17533083260059357, | |
| "learning_rate": 9.414678072940118e-05, | |
| "loss": 0.3253, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.43073295342896445, | |
| "grad_norm": 0.17959101498126984, | |
| "learning_rate": 9.410175596578118e-05, | |
| "loss": 0.3185, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.43387698958538023, | |
| "grad_norm": 0.1812899112701416, | |
| "learning_rate": 9.405673120216119e-05, | |
| "loss": 0.3502, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.437021025741796, | |
| "grad_norm": 0.18919287621974945, | |
| "learning_rate": 9.40117064385412e-05, | |
| "loss": 0.3326, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4401650618982118, | |
| "grad_norm": 0.19101746380329132, | |
| "learning_rate": 9.396668167492121e-05, | |
| "loss": 0.2964, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.44330909805462765, | |
| "grad_norm": 7.04909086227417, | |
| "learning_rate": 9.392165691130123e-05, | |
| "loss": 0.366, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.44645313421104343, | |
| "grad_norm": 0.23961827158927917, | |
| "learning_rate": 9.387663214768123e-05, | |
| "loss": 0.3251, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4495971703674592, | |
| "grad_norm": 0.18671870231628418, | |
| "learning_rate": 9.383160738406124e-05, | |
| "loss": 0.3513, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.452741206523875, | |
| "grad_norm": 0.25953730940818787, | |
| "learning_rate": 9.378658262044124e-05, | |
| "loss": 0.3189, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.45588524268029085, | |
| "grad_norm": 0.17804424464702606, | |
| "learning_rate": 9.374155785682127e-05, | |
| "loss": 0.3426, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.45902927883670663, | |
| "grad_norm": 0.19183290004730225, | |
| "learning_rate": 9.369653309320126e-05, | |
| "loss": 0.3388, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4621733149931224, | |
| "grad_norm": 0.1751260757446289, | |
| "learning_rate": 9.365150832958127e-05, | |
| "loss": 0.3465, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4653173511495382, | |
| "grad_norm": 0.17152872681617737, | |
| "learning_rate": 9.360648356596128e-05, | |
| "loss": 0.3009, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.468461387305954, | |
| "grad_norm": 0.17340736091136932, | |
| "learning_rate": 9.35614588023413e-05, | |
| "loss": 0.3377, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.47160542346236983, | |
| "grad_norm": 0.17497164011001587, | |
| "learning_rate": 9.35164340387213e-05, | |
| "loss": 0.3512, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4747494596187856, | |
| "grad_norm": 0.18566282093524933, | |
| "learning_rate": 9.34714092751013e-05, | |
| "loss": 0.3208, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4778934957752014, | |
| "grad_norm": 0.20263151824474335, | |
| "learning_rate": 9.342638451148133e-05, | |
| "loss": 0.3543, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4810375319316172, | |
| "grad_norm": 0.19179081916809082, | |
| "learning_rate": 9.338135974786133e-05, | |
| "loss": 0.3387, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.48418156808803303, | |
| "grad_norm": 0.19308720529079437, | |
| "learning_rate": 9.333633498424133e-05, | |
| "loss": 0.3679, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4873256042444488, | |
| "grad_norm": 0.1667911857366562, | |
| "learning_rate": 9.329131022062134e-05, | |
| "loss": 0.3243, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4904696404008646, | |
| "grad_norm": 0.17789964377880096, | |
| "learning_rate": 9.324628545700136e-05, | |
| "loss": 0.3291, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4936136765572804, | |
| "grad_norm": 0.17497336864471436, | |
| "learning_rate": 9.320126069338137e-05, | |
| "loss": 0.3347, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.49675771271369623, | |
| "grad_norm": 0.1668512523174286, | |
| "learning_rate": 9.315623592976136e-05, | |
| "loss": 0.3074, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.499901748870112, | |
| "grad_norm": 0.18032796680927277, | |
| "learning_rate": 9.311121116614139e-05, | |
| "loss": 0.3242, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5030457850265279, | |
| "grad_norm": 0.19095478951931, | |
| "learning_rate": 9.30661864025214e-05, | |
| "loss": 0.3309, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5061898211829436, | |
| "grad_norm": 0.17513571679592133, | |
| "learning_rate": 9.30211616389014e-05, | |
| "loss": 0.354, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5093338573393594, | |
| "grad_norm": 0.17440561950206757, | |
| "learning_rate": 9.29761368752814e-05, | |
| "loss": 0.3447, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5124778934957752, | |
| "grad_norm": 0.17587585747241974, | |
| "learning_rate": 9.293111211166142e-05, | |
| "loss": 0.347, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.515621929652191, | |
| "grad_norm": 0.17777486145496368, | |
| "learning_rate": 9.288608734804143e-05, | |
| "loss": 0.3216, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5187659658086068, | |
| "grad_norm": 0.17235027253627777, | |
| "learning_rate": 9.284106258442144e-05, | |
| "loss": 0.342, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5219100019650226, | |
| "grad_norm": 0.17032384872436523, | |
| "learning_rate": 9.279603782080145e-05, | |
| "loss": 0.3235, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5250540381214384, | |
| "grad_norm": 0.1659417450428009, | |
| "learning_rate": 9.275101305718146e-05, | |
| "loss": 0.3294, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5281980742778541, | |
| "grad_norm": 0.1650734841823578, | |
| "learning_rate": 9.270598829356146e-05, | |
| "loss": 0.3179, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.53134211043427, | |
| "grad_norm": 0.1897146999835968, | |
| "learning_rate": 9.266096352994147e-05, | |
| "loss": 0.3436, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5344861465906858, | |
| "grad_norm": 0.18100985884666443, | |
| "learning_rate": 9.261593876632148e-05, | |
| "loss": 0.3378, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5376301827471016, | |
| "grad_norm": 0.18976901471614838, | |
| "learning_rate": 9.257091400270149e-05, | |
| "loss": 0.3329, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5407742189035174, | |
| "grad_norm": 0.18210701644420624, | |
| "learning_rate": 9.25258892390815e-05, | |
| "loss": 0.3566, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5439182550599332, | |
| "grad_norm": 0.1779012680053711, | |
| "learning_rate": 9.24808644754615e-05, | |
| "loss": 0.3063, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.547062291216349, | |
| "grad_norm": 0.16529639065265656, | |
| "learning_rate": 9.243583971184152e-05, | |
| "loss": 0.3234, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5502063273727648, | |
| "grad_norm": 0.24405060708522797, | |
| "learning_rate": 9.239081494822152e-05, | |
| "loss": 0.3321, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5533503635291805, | |
| "grad_norm": 0.16497737169265747, | |
| "learning_rate": 9.234579018460153e-05, | |
| "loss": 0.3337, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5564943996855963, | |
| "grad_norm": 0.47097891569137573, | |
| "learning_rate": 9.230076542098155e-05, | |
| "loss": 0.3378, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5596384358420122, | |
| "grad_norm": 0.18612946569919586, | |
| "learning_rate": 9.225574065736155e-05, | |
| "loss": 0.3134, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.562782471998428, | |
| "grad_norm": 0.2161218822002411, | |
| "learning_rate": 9.221071589374156e-05, | |
| "loss": 0.3345, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5659265081548438, | |
| "grad_norm": 0.19805237650871277, | |
| "learning_rate": 9.216569113012157e-05, | |
| "loss": 0.3342, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5690705443112596, | |
| "grad_norm": 0.17592518031597137, | |
| "learning_rate": 9.212066636650159e-05, | |
| "loss": 0.3454, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5722145804676754, | |
| "grad_norm": 0.18876737356185913, | |
| "learning_rate": 9.207564160288158e-05, | |
| "loss": 0.344, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5753586166240912, | |
| "grad_norm": 0.18281705677509308, | |
| "learning_rate": 9.203061683926159e-05, | |
| "loss": 0.3277, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5785026527805069, | |
| "grad_norm": 0.18671815097332, | |
| "learning_rate": 9.19855920756416e-05, | |
| "loss": 0.319, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5816466889369227, | |
| "grad_norm": 0.1737174689769745, | |
| "learning_rate": 9.194056731202162e-05, | |
| "loss": 0.3554, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5847907250933386, | |
| "grad_norm": 0.16264449059963226, | |
| "learning_rate": 9.189554254840163e-05, | |
| "loss": 0.3404, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5879347612497544, | |
| "grad_norm": 0.16205957531929016, | |
| "learning_rate": 9.185051778478163e-05, | |
| "loss": 0.3345, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5910787974061702, | |
| "grad_norm": 0.16299399733543396, | |
| "learning_rate": 9.180549302116165e-05, | |
| "loss": 0.3406, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.594222833562586, | |
| "grad_norm": 0.16929860413074493, | |
| "learning_rate": 9.176046825754166e-05, | |
| "loss": 0.3351, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5973668697190018, | |
| "grad_norm": 0.18242709338665009, | |
| "learning_rate": 9.171544349392167e-05, | |
| "loss": 0.3277, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6005109058754176, | |
| "grad_norm": 0.1715114861726761, | |
| "learning_rate": 9.167041873030166e-05, | |
| "loss": 0.3341, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6036549420318333, | |
| "grad_norm": 0.1673378199338913, | |
| "learning_rate": 9.162539396668168e-05, | |
| "loss": 0.316, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6067989781882491, | |
| "grad_norm": 0.1861652284860611, | |
| "learning_rate": 9.158036920306169e-05, | |
| "loss": 0.3425, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6099430143446649, | |
| "grad_norm": 0.170218825340271, | |
| "learning_rate": 9.15353444394417e-05, | |
| "loss": 0.3495, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6130870505010808, | |
| "grad_norm": 0.16409920156002045, | |
| "learning_rate": 9.149031967582171e-05, | |
| "loss": 0.3216, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6162310866574966, | |
| "grad_norm": 0.1930875927209854, | |
| "learning_rate": 9.144529491220172e-05, | |
| "loss": 0.3221, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6193751228139124, | |
| "grad_norm": 0.168474480509758, | |
| "learning_rate": 9.140027014858173e-05, | |
| "loss": 0.3483, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6225191589703282, | |
| "grad_norm": 0.1649659276008606, | |
| "learning_rate": 9.135524538496173e-05, | |
| "loss": 0.3281, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.625663195126744, | |
| "grad_norm": 0.16725848615169525, | |
| "learning_rate": 9.131022062134174e-05, | |
| "loss": 0.349, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6288072312831597, | |
| "grad_norm": 0.16848574578762054, | |
| "learning_rate": 9.126519585772175e-05, | |
| "loss": 0.3467, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6319512674395755, | |
| "grad_norm": 0.17817632853984833, | |
| "learning_rate": 9.122017109410176e-05, | |
| "loss": 0.3468, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6350953035959913, | |
| "grad_norm": 0.16884905099868774, | |
| "learning_rate": 9.117514633048177e-05, | |
| "loss": 0.3197, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6382393397524071, | |
| "grad_norm": 0.16829445958137512, | |
| "learning_rate": 9.113012156686178e-05, | |
| "loss": 0.3495, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.641383375908823, | |
| "grad_norm": 0.1753387451171875, | |
| "learning_rate": 9.108509680324179e-05, | |
| "loss": 0.3549, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6445274120652388, | |
| "grad_norm": 0.17498289048671722, | |
| "learning_rate": 9.10400720396218e-05, | |
| "loss": 0.3169, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6476714482216546, | |
| "grad_norm": 0.17499548196792603, | |
| "learning_rate": 9.09950472760018e-05, | |
| "loss": 0.3226, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6508154843780704, | |
| "grad_norm": 0.17783628404140472, | |
| "learning_rate": 9.095002251238181e-05, | |
| "loss": 0.3355, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6539595205344861, | |
| "grad_norm": 0.16701580584049225, | |
| "learning_rate": 9.090499774876182e-05, | |
| "loss": 0.3348, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6571035566909019, | |
| "grad_norm": 0.1692950576543808, | |
| "learning_rate": 9.085997298514183e-05, | |
| "loss": 0.3117, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6602475928473177, | |
| "grad_norm": 0.17733407020568848, | |
| "learning_rate": 9.081494822152185e-05, | |
| "loss": 0.3254, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6633916290037335, | |
| "grad_norm": 0.18444949388504028, | |
| "learning_rate": 9.076992345790185e-05, | |
| "loss": 0.3243, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6665356651601494, | |
| "grad_norm": 0.1709858626127243, | |
| "learning_rate": 9.072489869428186e-05, | |
| "loss": 0.3437, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6696797013165652, | |
| "grad_norm": 0.16070497035980225, | |
| "learning_rate": 9.067987393066188e-05, | |
| "loss": 0.3205, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.672823737472981, | |
| "grad_norm": 0.16293945908546448, | |
| "learning_rate": 9.063484916704189e-05, | |
| "loss": 0.3178, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6759677736293968, | |
| "grad_norm": 0.17348802089691162, | |
| "learning_rate": 9.058982440342188e-05, | |
| "loss": 0.3434, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6791118097858125, | |
| "grad_norm": 0.16067078709602356, | |
| "learning_rate": 9.054479963980189e-05, | |
| "loss": 0.3051, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6822558459422283, | |
| "grad_norm": 0.1788797527551651, | |
| "learning_rate": 9.049977487618191e-05, | |
| "loss": 0.3311, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6853998820986441, | |
| "grad_norm": 0.17016440629959106, | |
| "learning_rate": 9.045475011256192e-05, | |
| "loss": 0.3248, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6885439182550599, | |
| "grad_norm": 0.18454566597938538, | |
| "learning_rate": 9.040972534894192e-05, | |
| "loss": 0.3144, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6916879544114757, | |
| "grad_norm": 0.1694164127111435, | |
| "learning_rate": 9.036470058532192e-05, | |
| "loss": 0.3263, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6948319905678916, | |
| "grad_norm": 0.1772613525390625, | |
| "learning_rate": 9.031967582170195e-05, | |
| "loss": 0.3468, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6979760267243074, | |
| "grad_norm": 0.16401882469654083, | |
| "learning_rate": 9.027465105808195e-05, | |
| "loss": 0.342, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7011200628807231, | |
| "grad_norm": 0.16261254251003265, | |
| "learning_rate": 9.022962629446195e-05, | |
| "loss": 0.3249, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7042640990371389, | |
| "grad_norm": 0.17154066264629364, | |
| "learning_rate": 9.018460153084197e-05, | |
| "loss": 0.3631, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7074081351935547, | |
| "grad_norm": 0.18076153099536896, | |
| "learning_rate": 9.013957676722198e-05, | |
| "loss": 0.3282, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7105521713499705, | |
| "grad_norm": 0.15930242836475372, | |
| "learning_rate": 9.009455200360199e-05, | |
| "loss": 0.3233, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7136962075063863, | |
| "grad_norm": 0.16669179499149323, | |
| "learning_rate": 9.004952723998198e-05, | |
| "loss": 0.307, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7168402436628021, | |
| "grad_norm": 0.18358565866947174, | |
| "learning_rate": 9.0004502476362e-05, | |
| "loss": 0.3719, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.719984279819218, | |
| "grad_norm": 0.16769863665103912, | |
| "learning_rate": 8.995947771274201e-05, | |
| "loss": 0.3081, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7231283159756338, | |
| "grad_norm": 0.1651238203048706, | |
| "learning_rate": 8.991445294912202e-05, | |
| "loss": 0.3229, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7262723521320495, | |
| "grad_norm": 0.18452374637126923, | |
| "learning_rate": 8.986942818550203e-05, | |
| "loss": 0.3249, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7294163882884653, | |
| "grad_norm": 0.17209681868553162, | |
| "learning_rate": 8.982440342188204e-05, | |
| "loss": 0.3444, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.7325604244448811, | |
| "grad_norm": 0.17528848350048065, | |
| "learning_rate": 8.977937865826205e-05, | |
| "loss": 0.3217, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7357044606012969, | |
| "grad_norm": 0.16508957743644714, | |
| "learning_rate": 8.973435389464206e-05, | |
| "loss": 0.3098, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7388484967577127, | |
| "grad_norm": 0.171140655875206, | |
| "learning_rate": 8.968932913102207e-05, | |
| "loss": 0.367, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7419925329141285, | |
| "grad_norm": 0.16529837250709534, | |
| "learning_rate": 8.964430436740207e-05, | |
| "loss": 0.2911, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7451365690705443, | |
| "grad_norm": 0.1798229068517685, | |
| "learning_rate": 8.959927960378208e-05, | |
| "loss": 0.3264, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7482806052269602, | |
| "grad_norm": 0.17085868120193481, | |
| "learning_rate": 8.955425484016209e-05, | |
| "loss": 0.3109, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.751424641383376, | |
| "grad_norm": 0.17515264451503754, | |
| "learning_rate": 8.95092300765421e-05, | |
| "loss": 0.317, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7545686775397917, | |
| "grad_norm": 0.18475565314292908, | |
| "learning_rate": 8.946420531292211e-05, | |
| "loss": 0.3683, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7577127136962075, | |
| "grad_norm": 0.16714327037334442, | |
| "learning_rate": 8.941918054930212e-05, | |
| "loss": 0.3429, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7608567498526233, | |
| "grad_norm": 0.15969350934028625, | |
| "learning_rate": 8.937415578568214e-05, | |
| "loss": 0.3331, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7640007860090391, | |
| "grad_norm": 0.16738007962703705, | |
| "learning_rate": 8.932913102206213e-05, | |
| "loss": 0.3332, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7671448221654549, | |
| "grad_norm": 0.15596827864646912, | |
| "learning_rate": 8.928410625844214e-05, | |
| "loss": 0.2864, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7702888583218707, | |
| "grad_norm": 0.16769914329051971, | |
| "learning_rate": 8.923908149482215e-05, | |
| "loss": 0.3411, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7734328944782864, | |
| "grad_norm": 0.1581619828939438, | |
| "learning_rate": 8.919405673120217e-05, | |
| "loss": 0.3271, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7765769306347023, | |
| "grad_norm": 0.18669439852237701, | |
| "learning_rate": 8.914903196758217e-05, | |
| "loss": 0.3363, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7797209667911181, | |
| "grad_norm": 0.1833750307559967, | |
| "learning_rate": 8.910400720396218e-05, | |
| "loss": 0.3425, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7828650029475339, | |
| "grad_norm": 0.16842873394489288, | |
| "learning_rate": 8.90589824403422e-05, | |
| "loss": 0.3019, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7860090391039497, | |
| "grad_norm": 0.1643659919500351, | |
| "learning_rate": 8.901395767672221e-05, | |
| "loss": 0.3301, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7891530752603655, | |
| "grad_norm": 0.17208907008171082, | |
| "learning_rate": 8.89689329131022e-05, | |
| "loss": 0.3469, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7922971114167813, | |
| "grad_norm": 0.16336563229560852, | |
| "learning_rate": 8.892390814948221e-05, | |
| "loss": 0.325, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7954411475731971, | |
| "grad_norm": 0.17350764572620392, | |
| "learning_rate": 8.887888338586223e-05, | |
| "loss": 0.3486, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7985851837296128, | |
| "grad_norm": 0.15856927633285522, | |
| "learning_rate": 8.883385862224224e-05, | |
| "loss": 0.3223, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8017292198860287, | |
| "grad_norm": 0.16306869685649872, | |
| "learning_rate": 8.878883385862224e-05, | |
| "loss": 0.3164, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8048732560424445, | |
| "grad_norm": 0.1610950231552124, | |
| "learning_rate": 8.874380909500225e-05, | |
| "loss": 0.3146, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8080172921988603, | |
| "grad_norm": 0.18235592544078827, | |
| "learning_rate": 8.869878433138227e-05, | |
| "loss": 0.3259, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8111613283552761, | |
| "grad_norm": 0.1566954404115677, | |
| "learning_rate": 8.865375956776228e-05, | |
| "loss": 0.29, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8143053645116919, | |
| "grad_norm": 0.17046710848808289, | |
| "learning_rate": 8.860873480414229e-05, | |
| "loss": 0.3419, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.8174494006681077, | |
| "grad_norm": 0.1749659776687622, | |
| "learning_rate": 8.85637100405223e-05, | |
| "loss": 0.3038, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8205934368245235, | |
| "grad_norm": 0.1782928705215454, | |
| "learning_rate": 8.85186852769023e-05, | |
| "loss": 0.3123, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8237374729809392, | |
| "grad_norm": 0.16543257236480713, | |
| "learning_rate": 8.847366051328231e-05, | |
| "loss": 0.2995, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.826881509137355, | |
| "grad_norm": 0.17038169503211975, | |
| "learning_rate": 8.842863574966232e-05, | |
| "loss": 0.3437, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8300255452937709, | |
| "grad_norm": 0.16956864297389984, | |
| "learning_rate": 8.838361098604233e-05, | |
| "loss": 0.3208, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8331695814501867, | |
| "grad_norm": 0.16261757910251617, | |
| "learning_rate": 8.833858622242234e-05, | |
| "loss": 0.323, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8363136176066025, | |
| "grad_norm": 0.1713312268257141, | |
| "learning_rate": 8.829356145880235e-05, | |
| "loss": 0.3261, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8394576537630183, | |
| "grad_norm": 0.16358059644699097, | |
| "learning_rate": 8.824853669518235e-05, | |
| "loss": 0.3207, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8426016899194341, | |
| "grad_norm": 0.1711219847202301, | |
| "learning_rate": 8.820351193156236e-05, | |
| "loss": 0.3343, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8457457260758499, | |
| "grad_norm": 0.15430651605129242, | |
| "learning_rate": 8.815848716794237e-05, | |
| "loss": 0.3004, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8488897622322656, | |
| "grad_norm": 0.1770448386669159, | |
| "learning_rate": 8.811346240432238e-05, | |
| "loss": 0.3483, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8520337983886814, | |
| "grad_norm": 0.17468826472759247, | |
| "learning_rate": 8.806843764070239e-05, | |
| "loss": 0.3382, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.8551778345450972, | |
| "grad_norm": 0.16027683019638062, | |
| "learning_rate": 8.80234128770824e-05, | |
| "loss": 0.3178, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8583218707015131, | |
| "grad_norm": 0.1797255426645279, | |
| "learning_rate": 8.79783881134624e-05, | |
| "loss": 0.339, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.8614659068579289, | |
| "grad_norm": 0.17427705228328705, | |
| "learning_rate": 8.793336334984241e-05, | |
| "loss": 0.334, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8646099430143447, | |
| "grad_norm": 0.16854874789714813, | |
| "learning_rate": 8.788833858622242e-05, | |
| "loss": 0.3512, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8677539791707605, | |
| "grad_norm": 0.1548936814069748, | |
| "learning_rate": 8.784331382260243e-05, | |
| "loss": 0.2863, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8708980153271763, | |
| "grad_norm": 0.17063087224960327, | |
| "learning_rate": 8.779828905898244e-05, | |
| "loss": 0.3299, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.874042051483592, | |
| "grad_norm": 0.17423272132873535, | |
| "learning_rate": 8.775326429536246e-05, | |
| "loss": 0.3161, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8771860876400078, | |
| "grad_norm": 0.16254863142967224, | |
| "learning_rate": 8.770823953174246e-05, | |
| "loss": 0.3236, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8803301237964236, | |
| "grad_norm": 0.16803030669689178, | |
| "learning_rate": 8.766321476812247e-05, | |
| "loss": 0.3288, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8834741599528395, | |
| "grad_norm": 0.1748911589384079, | |
| "learning_rate": 8.761819000450247e-05, | |
| "loss": 0.2984, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8866181961092553, | |
| "grad_norm": 0.16277071833610535, | |
| "learning_rate": 8.75731652408825e-05, | |
| "loss": 0.2983, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.8897622322656711, | |
| "grad_norm": 0.16375455260276794, | |
| "learning_rate": 8.75281404772625e-05, | |
| "loss": 0.3339, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8929062684220869, | |
| "grad_norm": 0.15943491458892822, | |
| "learning_rate": 8.74831157136425e-05, | |
| "loss": 0.3414, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8960503045785027, | |
| "grad_norm": 0.16314157843589783, | |
| "learning_rate": 8.743809095002252e-05, | |
| "loss": 0.3207, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8991943407349184, | |
| "grad_norm": 0.16236723959445953, | |
| "learning_rate": 8.739306618640253e-05, | |
| "loss": 0.3349, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9023383768913342, | |
| "grad_norm": 0.17521819472312927, | |
| "learning_rate": 8.734804142278254e-05, | |
| "loss": 0.3275, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.90548241304775, | |
| "grad_norm": 0.2164747565984726, | |
| "learning_rate": 8.730301665916253e-05, | |
| "loss": 0.3288, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.9086264492041658, | |
| "grad_norm": 0.16042940318584442, | |
| "learning_rate": 8.725799189554256e-05, | |
| "loss": 0.3214, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9117704853605817, | |
| "grad_norm": 0.16976606845855713, | |
| "learning_rate": 8.721296713192257e-05, | |
| "loss": 0.3112, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9149145215169975, | |
| "grad_norm": 0.17349150776863098, | |
| "learning_rate": 8.716794236830257e-05, | |
| "loss": 0.2979, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9180585576734133, | |
| "grad_norm": 0.15189234912395477, | |
| "learning_rate": 8.712291760468257e-05, | |
| "loss": 0.3083, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.921202593829829, | |
| "grad_norm": 0.15766362845897675, | |
| "learning_rate": 8.707789284106259e-05, | |
| "loss": 0.3337, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.9243466299862448, | |
| "grad_norm": 0.15773652493953705, | |
| "learning_rate": 8.70328680774426e-05, | |
| "loss": 0.3161, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9274906661426606, | |
| "grad_norm": 0.15952229499816895, | |
| "learning_rate": 8.698784331382261e-05, | |
| "loss": 0.3131, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9306347022990764, | |
| "grad_norm": 0.16705040633678436, | |
| "learning_rate": 8.694281855020262e-05, | |
| "loss": 0.321, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9337787384554922, | |
| "grad_norm": 0.16729433834552765, | |
| "learning_rate": 8.689779378658263e-05, | |
| "loss": 0.2895, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.936922774611908, | |
| "grad_norm": 0.17739711701869965, | |
| "learning_rate": 8.685276902296263e-05, | |
| "loss": 0.3236, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.9400668107683239, | |
| "grad_norm": 0.16125445067882538, | |
| "learning_rate": 8.680774425934264e-05, | |
| "loss": 0.3227, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.9432108469247397, | |
| "grad_norm": 0.19061018526554108, | |
| "learning_rate": 8.676271949572265e-05, | |
| "loss": 0.3327, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9463548830811555, | |
| "grad_norm": 0.17478956282138824, | |
| "learning_rate": 8.671769473210266e-05, | |
| "loss": 0.3325, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.9494989192375712, | |
| "grad_norm": 0.1599021852016449, | |
| "learning_rate": 8.667266996848267e-05, | |
| "loss": 0.3091, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.952642955393987, | |
| "grad_norm": 0.16696953773498535, | |
| "learning_rate": 8.662764520486268e-05, | |
| "loss": 0.3211, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.9557869915504028, | |
| "grad_norm": 0.16814808547496796, | |
| "learning_rate": 8.658262044124269e-05, | |
| "loss": 0.3284, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9589310277068186, | |
| "grad_norm": 0.15857313573360443, | |
| "learning_rate": 8.65375956776227e-05, | |
| "loss": 0.3224, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9620750638632344, | |
| "grad_norm": 0.15295511484146118, | |
| "learning_rate": 8.64925709140027e-05, | |
| "loss": 0.3315, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9652191000196503, | |
| "grad_norm": 0.21399492025375366, | |
| "learning_rate": 8.644754615038273e-05, | |
| "loss": 0.2922, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.9683631361760661, | |
| "grad_norm": 0.17268632352352142, | |
| "learning_rate": 8.640252138676272e-05, | |
| "loss": 0.3564, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9715071723324818, | |
| "grad_norm": 0.17499002814292908, | |
| "learning_rate": 8.635749662314273e-05, | |
| "loss": 0.3385, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.9746512084888976, | |
| "grad_norm": 0.170021191239357, | |
| "learning_rate": 8.631247185952274e-05, | |
| "loss": 0.3305, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9777952446453134, | |
| "grad_norm": 0.17455638945102692, | |
| "learning_rate": 8.626744709590276e-05, | |
| "loss": 0.3107, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9809392808017292, | |
| "grad_norm": 0.16129587590694427, | |
| "learning_rate": 8.622242233228275e-05, | |
| "loss": 0.3182, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.984083316958145, | |
| "grad_norm": 0.161848783493042, | |
| "learning_rate": 8.617739756866276e-05, | |
| "loss": 0.3368, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9872273531145608, | |
| "grad_norm": 0.14891745150089264, | |
| "learning_rate": 8.613237280504279e-05, | |
| "loss": 0.2923, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9903713892709766, | |
| "grad_norm": 0.1604168862104416, | |
| "learning_rate": 8.60873480414228e-05, | |
| "loss": 0.3197, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9935154254273925, | |
| "grad_norm": 0.15211592614650726, | |
| "learning_rate": 8.604232327780279e-05, | |
| "loss": 0.3197, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9966594615838082, | |
| "grad_norm": 0.1654754877090454, | |
| "learning_rate": 8.59972985141828e-05, | |
| "loss": 0.3108, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.999803497740224, | |
| "grad_norm": 0.1642957627773285, | |
| "learning_rate": 8.595227375056282e-05, | |
| "loss": 0.3337, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6600321531295776, | |
| "learning_rate": 8.590724898694283e-05, | |
| "loss": 0.3815, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.3256175220012665, | |
| "eval_runtime": 102.4846, | |
| "eval_samples_per_second": 12.412, | |
| "eval_steps_per_second": 12.412, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.003144036156416, | |
| "grad_norm": 0.15727423131465912, | |
| "learning_rate": 8.586222422332282e-05, | |
| "loss": 0.2867, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0062880723128316, | |
| "grad_norm": 0.17558008432388306, | |
| "learning_rate": 8.581719945970285e-05, | |
| "loss": 0.3426, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0094321084692475, | |
| "grad_norm": 0.1863006204366684, | |
| "learning_rate": 8.577217469608285e-05, | |
| "loss": 0.2946, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0125761446256631, | |
| "grad_norm": 0.20207758247852325, | |
| "learning_rate": 8.572714993246286e-05, | |
| "loss": 0.2872, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.015720180782079, | |
| "grad_norm": 0.17089968919754028, | |
| "learning_rate": 8.568212516884286e-05, | |
| "loss": 0.3098, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.0188642169384947, | |
| "grad_norm": 0.18078500032424927, | |
| "learning_rate": 8.563710040522288e-05, | |
| "loss": 0.2901, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0220082530949106, | |
| "grad_norm": 0.18292267620563507, | |
| "learning_rate": 8.559207564160289e-05, | |
| "loss": 0.332, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0251522892513263, | |
| "grad_norm": 0.16480115056037903, | |
| "learning_rate": 8.55470508779829e-05, | |
| "loss": 0.3042, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.0282963254077422, | |
| "grad_norm": 0.16814446449279785, | |
| "learning_rate": 8.550202611436289e-05, | |
| "loss": 0.319, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.031440361564158, | |
| "grad_norm": 0.16136884689331055, | |
| "learning_rate": 8.545700135074291e-05, | |
| "loss": 0.291, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.0345843977205738, | |
| "grad_norm": 0.17144669592380524, | |
| "learning_rate": 8.541197658712292e-05, | |
| "loss": 0.2852, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0377284338769897, | |
| "grad_norm": 0.1635693609714508, | |
| "learning_rate": 8.536695182350293e-05, | |
| "loss": 0.2684, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.0408724700334053, | |
| "grad_norm": 0.1604490727186203, | |
| "learning_rate": 8.532192705988294e-05, | |
| "loss": 0.3093, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.0440165061898212, | |
| "grad_norm": 0.15917396545410156, | |
| "learning_rate": 8.527690229626295e-05, | |
| "loss": 0.2697, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.047160542346237, | |
| "grad_norm": 0.17525093257427216, | |
| "learning_rate": 8.523187753264296e-05, | |
| "loss": 0.3084, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.0503045785026528, | |
| "grad_norm": 0.1677919328212738, | |
| "learning_rate": 8.518685276902297e-05, | |
| "loss": 0.2974, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.0534486146590685, | |
| "grad_norm": 0.17789426445960999, | |
| "learning_rate": 8.514182800540297e-05, | |
| "loss": 0.3062, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.0565926508154844, | |
| "grad_norm": 0.16536547243595123, | |
| "learning_rate": 8.509680324178298e-05, | |
| "loss": 0.2872, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.0597366869719003, | |
| "grad_norm": 0.17476080358028412, | |
| "learning_rate": 8.505177847816299e-05, | |
| "loss": 0.306, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.062880723128316, | |
| "grad_norm": 0.1762908548116684, | |
| "learning_rate": 8.500675371454301e-05, | |
| "loss": 0.2878, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.0660247592847318, | |
| "grad_norm": 0.17144866287708282, | |
| "learning_rate": 8.496172895092301e-05, | |
| "loss": 0.2915, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0691687954411475, | |
| "grad_norm": 0.16622525453567505, | |
| "learning_rate": 8.491670418730302e-05, | |
| "loss": 0.3169, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.0723128315975634, | |
| "grad_norm": 0.17786164581775665, | |
| "learning_rate": 8.487167942368303e-05, | |
| "loss": 0.2731, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.075456867753979, | |
| "grad_norm": 0.17761558294296265, | |
| "learning_rate": 8.482665466006305e-05, | |
| "loss": 0.2967, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.078600903910395, | |
| "grad_norm": 0.16161416471004486, | |
| "learning_rate": 8.478162989644304e-05, | |
| "loss": 0.2887, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.0817449400668107, | |
| "grad_norm": 0.1788141131401062, | |
| "learning_rate": 8.473660513282305e-05, | |
| "loss": 0.3456, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.0848889762232266, | |
| "grad_norm": 0.17762236297130585, | |
| "learning_rate": 8.469158036920306e-05, | |
| "loss": 0.3078, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.0880330123796425, | |
| "grad_norm": 0.16463209688663483, | |
| "learning_rate": 8.464655560558308e-05, | |
| "loss": 0.285, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.0911770485360581, | |
| "grad_norm": 0.17661692202091217, | |
| "learning_rate": 8.460153084196308e-05, | |
| "loss": 0.3139, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.094321084692474, | |
| "grad_norm": 0.170676589012146, | |
| "learning_rate": 8.455650607834309e-05, | |
| "loss": 0.3009, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.0974651208488897, | |
| "grad_norm": 0.17777417600154877, | |
| "learning_rate": 8.451148131472311e-05, | |
| "loss": 0.2889, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1006091570053056, | |
| "grad_norm": 0.17952531576156616, | |
| "learning_rate": 8.446645655110312e-05, | |
| "loss": 0.3181, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.1037531931617213, | |
| "grad_norm": 0.17377127707004547, | |
| "learning_rate": 8.442143178748311e-05, | |
| "loss": 0.3003, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1068972293181372, | |
| "grad_norm": 0.17013375461101532, | |
| "learning_rate": 8.437640702386312e-05, | |
| "loss": 0.3086, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.1100412654745528, | |
| "grad_norm": 0.17330169677734375, | |
| "learning_rate": 8.433138226024314e-05, | |
| "loss": 0.314, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.1131853016309687, | |
| "grad_norm": 0.17327344417572021, | |
| "learning_rate": 8.428635749662315e-05, | |
| "loss": 0.3227, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.1163293377873846, | |
| "grad_norm": 0.17580825090408325, | |
| "learning_rate": 8.424133273300316e-05, | |
| "loss": 0.3104, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.1194733739438003, | |
| "grad_norm": 0.17389941215515137, | |
| "learning_rate": 8.419630796938317e-05, | |
| "loss": 0.285, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.1226174101002162, | |
| "grad_norm": 0.1775561273097992, | |
| "learning_rate": 8.415128320576318e-05, | |
| "loss": 0.2969, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.1257614462566319, | |
| "grad_norm": 0.18656259775161743, | |
| "learning_rate": 8.410625844214319e-05, | |
| "loss": 0.3193, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.1289054824130478, | |
| "grad_norm": 0.1777326762676239, | |
| "learning_rate": 8.40612336785232e-05, | |
| "loss": 0.3024, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1320495185694635, | |
| "grad_norm": 0.17319585382938385, | |
| "learning_rate": 8.40162089149032e-05, | |
| "loss": 0.2978, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.1351935547258794, | |
| "grad_norm": 0.17653490602970123, | |
| "learning_rate": 8.397118415128321e-05, | |
| "loss": 0.308, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.138337590882295, | |
| "grad_norm": 0.160200297832489, | |
| "learning_rate": 8.392615938766322e-05, | |
| "loss": 0.2694, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.141481627038711, | |
| "grad_norm": 0.16492871940135956, | |
| "learning_rate": 8.388113462404323e-05, | |
| "loss": 0.2824, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.1446256631951268, | |
| "grad_norm": 0.17109255492687225, | |
| "learning_rate": 8.383610986042324e-05, | |
| "loss": 0.2841, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.1477696993515425, | |
| "grad_norm": 0.23076315224170685, | |
| "learning_rate": 8.379108509680325e-05, | |
| "loss": 0.314, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.1509137355079584, | |
| "grad_norm": 0.1704353392124176, | |
| "learning_rate": 8.374606033318325e-05, | |
| "loss": 0.2962, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.154057771664374, | |
| "grad_norm": 0.17220115661621094, | |
| "learning_rate": 8.370103556956326e-05, | |
| "loss": 0.2904, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.15720180782079, | |
| "grad_norm": 0.17528584599494934, | |
| "learning_rate": 8.365601080594327e-05, | |
| "loss": 0.3263, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.1603458439772059, | |
| "grad_norm": 0.18602944910526276, | |
| "learning_rate": 8.361098604232328e-05, | |
| "loss": 0.2989, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.1634898801336215, | |
| "grad_norm": 0.18739493191242218, | |
| "learning_rate": 8.356596127870329e-05, | |
| "loss": 0.3148, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.1666339162900374, | |
| "grad_norm": 0.1813725382089615, | |
| "learning_rate": 8.35209365150833e-05, | |
| "loss": 0.2989, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.169777952446453, | |
| "grad_norm": 0.1674114614725113, | |
| "learning_rate": 8.34759117514633e-05, | |
| "loss": 0.2668, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.172921988602869, | |
| "grad_norm": 0.1844543069601059, | |
| "learning_rate": 8.343088698784331e-05, | |
| "loss": 0.3003, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.1760660247592847, | |
| "grad_norm": 0.17155998945236206, | |
| "learning_rate": 8.338586222422334e-05, | |
| "loss": 0.2931, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.1792100609157006, | |
| "grad_norm": 0.1664140224456787, | |
| "learning_rate": 8.334083746060334e-05, | |
| "loss": 0.2896, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.1823540970721163, | |
| "grad_norm": 0.18443046510219574, | |
| "learning_rate": 8.329581269698334e-05, | |
| "loss": 0.3098, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.1854981332285321, | |
| "grad_norm": 0.16364677250385284, | |
| "learning_rate": 8.325078793336335e-05, | |
| "loss": 0.2849, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.188642169384948, | |
| "grad_norm": 0.1778181493282318, | |
| "learning_rate": 8.320576316974337e-05, | |
| "loss": 0.2951, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.1917862055413637, | |
| "grad_norm": 0.17129847407341003, | |
| "learning_rate": 8.316073840612338e-05, | |
| "loss": 0.2917, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.1949302416977796, | |
| "grad_norm": 0.17360500991344452, | |
| "learning_rate": 8.311571364250337e-05, | |
| "loss": 0.3078, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.1980742778541953, | |
| "grad_norm": 0.17020374536514282, | |
| "learning_rate": 8.307068887888338e-05, | |
| "loss": 0.2966, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.2012183140106112, | |
| "grad_norm": 0.1838023066520691, | |
| "learning_rate": 8.30256641152634e-05, | |
| "loss": 0.2991, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2043623501670269, | |
| "grad_norm": 0.18513008952140808, | |
| "learning_rate": 8.298063935164341e-05, | |
| "loss": 0.3455, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.2075063863234428, | |
| "grad_norm": 0.17663338780403137, | |
| "learning_rate": 8.293561458802341e-05, | |
| "loss": 0.3059, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.2106504224798584, | |
| "grad_norm": 0.17648449540138245, | |
| "learning_rate": 8.289058982440343e-05, | |
| "loss": 0.2991, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.2137944586362743, | |
| "grad_norm": 0.18601331114768982, | |
| "learning_rate": 8.284556506078344e-05, | |
| "loss": 0.2936, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.2169384947926902, | |
| "grad_norm": 0.18048390746116638, | |
| "learning_rate": 8.280054029716345e-05, | |
| "loss": 0.2837, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.220082530949106, | |
| "grad_norm": 0.17065560817718506, | |
| "learning_rate": 8.275551553354344e-05, | |
| "loss": 0.283, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.2232265671055218, | |
| "grad_norm": 0.1708894670009613, | |
| "learning_rate": 8.271049076992347e-05, | |
| "loss": 0.2718, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2263706032619375, | |
| "grad_norm": 0.17479634284973145, | |
| "learning_rate": 8.266546600630347e-05, | |
| "loss": 0.3003, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.2295146394183534, | |
| "grad_norm": 0.22091898322105408, | |
| "learning_rate": 8.262044124268348e-05, | |
| "loss": 0.3324, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.232658675574769, | |
| "grad_norm": 0.17981559038162231, | |
| "learning_rate": 8.257541647906349e-05, | |
| "loss": 0.3042, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.235802711731185, | |
| "grad_norm": 0.16127324104309082, | |
| "learning_rate": 8.25303917154435e-05, | |
| "loss": 0.2662, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.2389467478876006, | |
| "grad_norm": 0.18422247469425201, | |
| "learning_rate": 8.248536695182351e-05, | |
| "loss": 0.31, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.2420907840440165, | |
| "grad_norm": 0.18198904395103455, | |
| "learning_rate": 8.244034218820352e-05, | |
| "loss": 0.3242, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.2452348202004324, | |
| "grad_norm": 0.17157980799674988, | |
| "learning_rate": 8.239531742458353e-05, | |
| "loss": 0.3025, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.248378856356848, | |
| "grad_norm": 0.17674268782138824, | |
| "learning_rate": 8.235029266096353e-05, | |
| "loss": 0.3072, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.251522892513264, | |
| "grad_norm": 0.18540705740451813, | |
| "learning_rate": 8.230526789734354e-05, | |
| "loss": 0.3159, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.2546669286696797, | |
| "grad_norm": 0.18816250562667847, | |
| "learning_rate": 8.226024313372355e-05, | |
| "loss": 0.328, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2578109648260956, | |
| "grad_norm": 0.1927611082792282, | |
| "learning_rate": 8.221521837010356e-05, | |
| "loss": 0.3032, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.2609550009825112, | |
| "grad_norm": 0.18845967948436737, | |
| "learning_rate": 8.217019360648357e-05, | |
| "loss": 0.2793, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.2640990371389271, | |
| "grad_norm": 0.19096308946609497, | |
| "learning_rate": 8.212516884286358e-05, | |
| "loss": 0.3369, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.2672430732953428, | |
| "grad_norm": 0.16917437314987183, | |
| "learning_rate": 8.20801440792436e-05, | |
| "loss": 0.2785, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.2703871094517587, | |
| "grad_norm": 0.16734306514263153, | |
| "learning_rate": 8.20351193156236e-05, | |
| "loss": 0.2914, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.2735311456081746, | |
| "grad_norm": 0.17491504549980164, | |
| "learning_rate": 8.19900945520036e-05, | |
| "loss": 0.2905, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.2766751817645903, | |
| "grad_norm": 0.18531963229179382, | |
| "learning_rate": 8.194506978838361e-05, | |
| "loss": 0.3365, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.2798192179210062, | |
| "grad_norm": 0.1812233179807663, | |
| "learning_rate": 8.190004502476363e-05, | |
| "loss": 0.3051, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.2829632540774218, | |
| "grad_norm": 0.17402727901935577, | |
| "learning_rate": 8.185502026114363e-05, | |
| "loss": 0.2906, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.2861072902338377, | |
| "grad_norm": 0.18160969018936157, | |
| "learning_rate": 8.180999549752364e-05, | |
| "loss": 0.295, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.2892513263902534, | |
| "grad_norm": 0.17364852130413055, | |
| "learning_rate": 8.176497073390366e-05, | |
| "loss": 0.2789, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.2923953625466693, | |
| "grad_norm": 0.17983028292655945, | |
| "learning_rate": 8.171994597028367e-05, | |
| "loss": 0.2976, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.295539398703085, | |
| "grad_norm": 0.18376639485359192, | |
| "learning_rate": 8.167492120666366e-05, | |
| "loss": 0.2984, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.2986834348595009, | |
| "grad_norm": 0.16966019570827484, | |
| "learning_rate": 8.162989644304367e-05, | |
| "loss": 0.2825, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.3018274710159168, | |
| "grad_norm": 0.18048398196697235, | |
| "learning_rate": 8.15848716794237e-05, | |
| "loss": 0.3105, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.3049715071723325, | |
| "grad_norm": 0.1738966405391693, | |
| "learning_rate": 8.15398469158037e-05, | |
| "loss": 0.27, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.3081155433287484, | |
| "grad_norm": 0.22127372026443481, | |
| "learning_rate": 8.14948221521837e-05, | |
| "loss": 0.3156, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.311259579485164, | |
| "grad_norm": 0.17313317954540253, | |
| "learning_rate": 8.14497973885637e-05, | |
| "loss": 0.2921, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.31440361564158, | |
| "grad_norm": 0.17622841894626617, | |
| "learning_rate": 8.140477262494373e-05, | |
| "loss": 0.3026, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.3175476517979956, | |
| "grad_norm": 0.17847168445587158, | |
| "learning_rate": 8.135974786132374e-05, | |
| "loss": 0.3021, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3206916879544115, | |
| "grad_norm": 0.18637776374816895, | |
| "learning_rate": 8.131472309770373e-05, | |
| "loss": 0.3186, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.3238357241108272, | |
| "grad_norm": 0.16532807052135468, | |
| "learning_rate": 8.126969833408375e-05, | |
| "loss": 0.2878, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.326979760267243, | |
| "grad_norm": 0.16804370284080505, | |
| "learning_rate": 8.122467357046376e-05, | |
| "loss": 0.2868, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.330123796423659, | |
| "grad_norm": 0.1693575531244278, | |
| "learning_rate": 8.117964880684377e-05, | |
| "loss": 0.2898, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.3332678325800746, | |
| "grad_norm": 0.17773057520389557, | |
| "learning_rate": 8.113462404322378e-05, | |
| "loss": 0.2741, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.3364118687364905, | |
| "grad_norm": 0.1866486817598343, | |
| "learning_rate": 8.108959927960379e-05, | |
| "loss": 0.298, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.3395559048929062, | |
| "grad_norm": 0.18073201179504395, | |
| "learning_rate": 8.10445745159838e-05, | |
| "loss": 0.2933, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.342699941049322, | |
| "grad_norm": 0.17505986988544464, | |
| "learning_rate": 8.09995497523638e-05, | |
| "loss": 0.2936, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.3458439772057378, | |
| "grad_norm": 0.17242185771465302, | |
| "learning_rate": 8.095452498874381e-05, | |
| "loss": 0.2827, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.3489880133621537, | |
| "grad_norm": 0.16698665916919708, | |
| "learning_rate": 8.090950022512382e-05, | |
| "loss": 0.2742, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.3521320495185694, | |
| "grad_norm": 0.18763364851474762, | |
| "learning_rate": 8.086447546150383e-05, | |
| "loss": 0.3192, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.3552760856749853, | |
| "grad_norm": 0.18754689395427704, | |
| "learning_rate": 8.081945069788384e-05, | |
| "loss": 0.2932, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.3584201218314012, | |
| "grad_norm": 0.18708984553813934, | |
| "learning_rate": 8.077442593426385e-05, | |
| "loss": 0.328, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.3615641579878168, | |
| "grad_norm": 0.18035311996936798, | |
| "learning_rate": 8.072940117064386e-05, | |
| "loss": 0.2699, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.3647081941442327, | |
| "grad_norm": 0.17291460931301117, | |
| "learning_rate": 8.068437640702387e-05, | |
| "loss": 0.2729, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.3678522303006484, | |
| "grad_norm": 0.1894587129354477, | |
| "learning_rate": 8.063935164340387e-05, | |
| "loss": 0.2792, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.3709962664570643, | |
| "grad_norm": 0.17740470170974731, | |
| "learning_rate": 8.059432687978388e-05, | |
| "loss": 0.3022, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.3741403026134802, | |
| "grad_norm": 0.17968855798244476, | |
| "learning_rate": 8.054930211616389e-05, | |
| "loss": 0.3095, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.3772843387698959, | |
| "grad_norm": 0.1771247237920761, | |
| "learning_rate": 8.05042773525439e-05, | |
| "loss": 0.3052, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.3804283749263115, | |
| "grad_norm": 0.164938822388649, | |
| "learning_rate": 8.045925258892392e-05, | |
| "loss": 0.2912, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.3835724110827274, | |
| "grad_norm": 0.17572474479675293, | |
| "learning_rate": 8.041422782530392e-05, | |
| "loss": 0.2942, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.3867164472391433, | |
| "grad_norm": 0.16663512587547302, | |
| "learning_rate": 8.036920306168393e-05, | |
| "loss": 0.2838, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.389860483395559, | |
| "grad_norm": 0.1684209108352661, | |
| "learning_rate": 8.032417829806393e-05, | |
| "loss": 0.3024, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.393004519551975, | |
| "grad_norm": 0.1704261749982834, | |
| "learning_rate": 8.027915353444396e-05, | |
| "loss": 0.3055, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.3961485557083906, | |
| "grad_norm": 0.17855525016784668, | |
| "learning_rate": 8.023412877082395e-05, | |
| "loss": 0.3047, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.3992925918648065, | |
| "grad_norm": 0.16438795626163483, | |
| "learning_rate": 8.018910400720396e-05, | |
| "loss": 0.2833, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.4024366280212224, | |
| "grad_norm": 0.1803821176290512, | |
| "learning_rate": 8.014407924358398e-05, | |
| "loss": 0.2693, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.405580664177638, | |
| "grad_norm": 0.17037837207317352, | |
| "learning_rate": 8.009905447996399e-05, | |
| "loss": 0.2931, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.4087247003340537, | |
| "grad_norm": 0.17838133871555328, | |
| "learning_rate": 8.0054029716344e-05, | |
| "loss": 0.3126, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.4118687364904696, | |
| "grad_norm": 0.17596563696861267, | |
| "learning_rate": 8.0009004952724e-05, | |
| "loss": 0.3124, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4150127726468855, | |
| "grad_norm": 0.17477372288703918, | |
| "learning_rate": 7.996398018910402e-05, | |
| "loss": 0.2846, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.4181568088033012, | |
| "grad_norm": 0.16831114888191223, | |
| "learning_rate": 7.991895542548402e-05, | |
| "loss": 0.2841, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.421300844959717, | |
| "grad_norm": 0.16885237395763397, | |
| "learning_rate": 7.987393066186403e-05, | |
| "loss": 0.2882, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.4244448811161328, | |
| "grad_norm": 0.1732211410999298, | |
| "learning_rate": 7.982890589824403e-05, | |
| "loss": 0.3065, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.4275889172725487, | |
| "grad_norm": 0.17489729821681976, | |
| "learning_rate": 7.978388113462405e-05, | |
| "loss": 0.2937, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.4307329534289646, | |
| "grad_norm": 0.1771242618560791, | |
| "learning_rate": 7.973885637100406e-05, | |
| "loss": 0.2963, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.4338769895853802, | |
| "grad_norm": 0.19036780297756195, | |
| "learning_rate": 7.969383160738407e-05, | |
| "loss": 0.3104, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.437021025741796, | |
| "grad_norm": 0.1863013058900833, | |
| "learning_rate": 7.964880684376408e-05, | |
| "loss": 0.3047, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.4401650618982118, | |
| "grad_norm": 0.1722109168767929, | |
| "learning_rate": 7.960378208014408e-05, | |
| "loss": 0.2828, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.4433090980546277, | |
| "grad_norm": 0.1802283078432083, | |
| "learning_rate": 7.95587573165241e-05, | |
| "loss": 0.2978, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4464531342110434, | |
| "grad_norm": 0.17628727853298187, | |
| "learning_rate": 7.95137325529041e-05, | |
| "loss": 0.2963, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.4495971703674593, | |
| "grad_norm": 0.17598123848438263, | |
| "learning_rate": 7.946870778928411e-05, | |
| "loss": 0.3106, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.452741206523875, | |
| "grad_norm": 0.17388591170310974, | |
| "learning_rate": 7.942368302566412e-05, | |
| "loss": 0.3067, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.4558852426802908, | |
| "grad_norm": 0.17893949151039124, | |
| "learning_rate": 7.937865826204413e-05, | |
| "loss": 0.2756, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.4590292788367067, | |
| "grad_norm": 0.16779755055904388, | |
| "learning_rate": 7.933363349842414e-05, | |
| "loss": 0.2911, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.4621733149931224, | |
| "grad_norm": 0.19151651859283447, | |
| "learning_rate": 7.928860873480414e-05, | |
| "loss": 0.2919, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.465317351149538, | |
| "grad_norm": 0.17654001712799072, | |
| "learning_rate": 7.924358397118415e-05, | |
| "loss": 0.3021, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.468461387305954, | |
| "grad_norm": 0.17647038400173187, | |
| "learning_rate": 7.919855920756416e-05, | |
| "loss": 0.271, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.4716054234623699, | |
| "grad_norm": 0.17813007533550262, | |
| "learning_rate": 7.915353444394417e-05, | |
| "loss": 0.3031, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.4747494596187856, | |
| "grad_norm": 0.19432079792022705, | |
| "learning_rate": 7.910850968032418e-05, | |
| "loss": 0.3529, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.4778934957752015, | |
| "grad_norm": 0.18345120549201965, | |
| "learning_rate": 7.906348491670419e-05, | |
| "loss": 0.2832, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.4810375319316171, | |
| "grad_norm": 0.1722515970468521, | |
| "learning_rate": 7.90184601530842e-05, | |
| "loss": 0.3294, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.484181568088033, | |
| "grad_norm": 0.1815156191587448, | |
| "learning_rate": 7.897343538946422e-05, | |
| "loss": 0.3025, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.487325604244449, | |
| "grad_norm": 0.17528071999549866, | |
| "learning_rate": 7.892841062584421e-05, | |
| "loss": 0.3167, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.4904696404008646, | |
| "grad_norm": 0.1877971738576889, | |
| "learning_rate": 7.888338586222422e-05, | |
| "loss": 0.3088, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.4936136765572803, | |
| "grad_norm": 0.16935402154922485, | |
| "learning_rate": 7.883836109860424e-05, | |
| "loss": 0.3003, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.4967577127136962, | |
| "grad_norm": 0.1625109314918518, | |
| "learning_rate": 7.879333633498425e-05, | |
| "loss": 0.2853, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.499901748870112, | |
| "grad_norm": 0.17674805223941803, | |
| "learning_rate": 7.874831157136425e-05, | |
| "loss": 0.3047, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.503045785026528, | |
| "grad_norm": 0.168808251619339, | |
| "learning_rate": 7.870328680774426e-05, | |
| "loss": 0.301, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.5061898211829436, | |
| "grad_norm": 0.1753881871700287, | |
| "learning_rate": 7.865826204412428e-05, | |
| "loss": 0.2939, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5093338573393593, | |
| "grad_norm": 0.16852906346321106, | |
| "learning_rate": 7.861323728050429e-05, | |
| "loss": 0.2989, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.5124778934957752, | |
| "grad_norm": 0.16612806916236877, | |
| "learning_rate": 7.856821251688428e-05, | |
| "loss": 0.2731, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.5156219296521911, | |
| "grad_norm": 0.17498096823692322, | |
| "learning_rate": 7.85231877532643e-05, | |
| "loss": 0.2903, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.5187659658086068, | |
| "grad_norm": 0.1843009740114212, | |
| "learning_rate": 7.847816298964431e-05, | |
| "loss": 0.3168, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.5219100019650225, | |
| "grad_norm": 0.17858386039733887, | |
| "learning_rate": 7.843313822602432e-05, | |
| "loss": 0.2954, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.5250540381214384, | |
| "grad_norm": 0.18993936479091644, | |
| "learning_rate": 7.838811346240432e-05, | |
| "loss": 0.3264, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.5281980742778543, | |
| "grad_norm": 0.1731633096933365, | |
| "learning_rate": 7.834308869878434e-05, | |
| "loss": 0.2982, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.5313421104342702, | |
| "grad_norm": 0.17727167904376984, | |
| "learning_rate": 7.829806393516435e-05, | |
| "loss": 0.2854, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.5344861465906858, | |
| "grad_norm": 0.17536379396915436, | |
| "learning_rate": 7.825303917154436e-05, | |
| "loss": 0.2788, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.5376301827471015, | |
| "grad_norm": 0.1785167157649994, | |
| "learning_rate": 7.820801440792435e-05, | |
| "loss": 0.3007, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5407742189035174, | |
| "grad_norm": 0.1738578975200653, | |
| "learning_rate": 7.816298964430437e-05, | |
| "loss": 0.3015, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.5439182550599333, | |
| "grad_norm": 0.1737809181213379, | |
| "learning_rate": 7.811796488068438e-05, | |
| "loss": 0.3031, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.547062291216349, | |
| "grad_norm": 0.17526312172412872, | |
| "learning_rate": 7.807294011706439e-05, | |
| "loss": 0.3072, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.5502063273727646, | |
| "grad_norm": 0.17959162592887878, | |
| "learning_rate": 7.80279153534444e-05, | |
| "loss": 0.2699, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.5533503635291805, | |
| "grad_norm": 0.17218153178691864, | |
| "learning_rate": 7.798289058982441e-05, | |
| "loss": 0.2734, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.5564943996855964, | |
| "grad_norm": 0.17062252759933472, | |
| "learning_rate": 7.793786582620442e-05, | |
| "loss": 0.2426, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.5596384358420123, | |
| "grad_norm": 0.19795489311218262, | |
| "learning_rate": 7.789284106258442e-05, | |
| "loss": 0.3396, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.562782471998428, | |
| "grad_norm": 0.18899548053741455, | |
| "learning_rate": 7.784781629896443e-05, | |
| "loss": 0.295, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.5659265081548437, | |
| "grad_norm": 0.18889367580413818, | |
| "learning_rate": 7.780279153534444e-05, | |
| "loss": 0.2996, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.5690705443112596, | |
| "grad_norm": 0.184955894947052, | |
| "learning_rate": 7.775776677172445e-05, | |
| "loss": 0.2833, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.5722145804676755, | |
| "grad_norm": 0.16244037449359894, | |
| "learning_rate": 7.771274200810447e-05, | |
| "loss": 0.2677, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.5753586166240912, | |
| "grad_norm": 0.19440148770809174, | |
| "learning_rate": 7.766771724448447e-05, | |
| "loss": 0.3052, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.5785026527805068, | |
| "grad_norm": 0.1759510636329651, | |
| "learning_rate": 7.762269248086448e-05, | |
| "loss": 0.2824, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.5816466889369227, | |
| "grad_norm": 0.17166948318481445, | |
| "learning_rate": 7.757766771724448e-05, | |
| "loss": 0.2911, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.5847907250933386, | |
| "grad_norm": 0.17509418725967407, | |
| "learning_rate": 7.753264295362451e-05, | |
| "loss": 0.2845, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.5879347612497545, | |
| "grad_norm": 0.16338001191616058, | |
| "learning_rate": 7.74876181900045e-05, | |
| "loss": 0.2759, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.5910787974061702, | |
| "grad_norm": 0.1770390421152115, | |
| "learning_rate": 7.744259342638451e-05, | |
| "loss": 0.3137, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.5942228335625859, | |
| "grad_norm": 0.17159558832645416, | |
| "learning_rate": 7.739756866276452e-05, | |
| "loss": 0.2736, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.5973668697190018, | |
| "grad_norm": 0.18849338591098785, | |
| "learning_rate": 7.735254389914454e-05, | |
| "loss": 0.2775, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.6005109058754177, | |
| "grad_norm": 0.18084058165550232, | |
| "learning_rate": 7.730751913552454e-05, | |
| "loss": 0.2883, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6036549420318333, | |
| "grad_norm": 0.1859467774629593, | |
| "learning_rate": 7.726249437190454e-05, | |
| "loss": 0.3053, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.606798978188249, | |
| "grad_norm": 0.18158085644245148, | |
| "learning_rate": 7.721746960828457e-05, | |
| "loss": 0.2923, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.609943014344665, | |
| "grad_norm": 0.18600253760814667, | |
| "learning_rate": 7.717244484466458e-05, | |
| "loss": 0.3107, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.6130870505010808, | |
| "grad_norm": 0.1869710236787796, | |
| "learning_rate": 7.712742008104457e-05, | |
| "loss": 0.2821, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.6162310866574967, | |
| "grad_norm": 0.1755673587322235, | |
| "learning_rate": 7.708239531742458e-05, | |
| "loss": 0.2621, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.6193751228139124, | |
| "grad_norm": 0.17789125442504883, | |
| "learning_rate": 7.70373705538046e-05, | |
| "loss": 0.3073, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.622519158970328, | |
| "grad_norm": 0.16756756603717804, | |
| "learning_rate": 7.699234579018461e-05, | |
| "loss": 0.3078, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.625663195126744, | |
| "grad_norm": 0.17822512984275818, | |
| "learning_rate": 7.69473210265646e-05, | |
| "loss": 0.2998, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.6288072312831599, | |
| "grad_norm": 0.16880451142787933, | |
| "learning_rate": 7.690229626294463e-05, | |
| "loss": 0.2918, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.6319512674395755, | |
| "grad_norm": 0.1791965365409851, | |
| "learning_rate": 7.685727149932464e-05, | |
| "loss": 0.2898, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6350953035959912, | |
| "grad_norm": 0.17452813684940338, | |
| "learning_rate": 7.681224673570464e-05, | |
| "loss": 0.2885, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.638239339752407, | |
| "grad_norm": 0.18743397295475006, | |
| "learning_rate": 7.676722197208465e-05, | |
| "loss": 0.3007, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.641383375908823, | |
| "grad_norm": 0.18785692751407623, | |
| "learning_rate": 7.672219720846466e-05, | |
| "loss": 0.2928, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.644527412065239, | |
| "grad_norm": 0.19505468010902405, | |
| "learning_rate": 7.667717244484467e-05, | |
| "loss": 0.3206, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.6476714482216546, | |
| "grad_norm": 0.1750132143497467, | |
| "learning_rate": 7.663214768122468e-05, | |
| "loss": 0.2764, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.6508154843780702, | |
| "grad_norm": 0.18247836828231812, | |
| "learning_rate": 7.658712291760469e-05, | |
| "loss": 0.314, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.6539595205344861, | |
| "grad_norm": 0.1866837590932846, | |
| "learning_rate": 7.65420981539847e-05, | |
| "loss": 0.3158, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.657103556690902, | |
| "grad_norm": 0.17475096881389618, | |
| "learning_rate": 7.64970733903647e-05, | |
| "loss": 0.2964, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.6602475928473177, | |
| "grad_norm": 0.1679716855287552, | |
| "learning_rate": 7.645204862674471e-05, | |
| "loss": 0.3122, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.6633916290037334, | |
| "grad_norm": 0.16546200215816498, | |
| "learning_rate": 7.640702386312472e-05, | |
| "loss": 0.3125, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.6665356651601493, | |
| "grad_norm": 0.16651305556297302, | |
| "learning_rate": 7.636199909950473e-05, | |
| "loss": 0.2636, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.6696797013165652, | |
| "grad_norm": 0.16956521570682526, | |
| "learning_rate": 7.631697433588474e-05, | |
| "loss": 0.2956, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.672823737472981, | |
| "grad_norm": 0.17262689769268036, | |
| "learning_rate": 7.627194957226475e-05, | |
| "loss": 0.2889, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.6759677736293968, | |
| "grad_norm": 0.17842979729175568, | |
| "learning_rate": 7.622692480864476e-05, | |
| "loss": 0.3175, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.6791118097858124, | |
| "grad_norm": 0.18716371059417725, | |
| "learning_rate": 7.618190004502476e-05, | |
| "loss": 0.294, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.6822558459422283, | |
| "grad_norm": 0.17072086036205292, | |
| "learning_rate": 7.613687528140477e-05, | |
| "loss": 0.3001, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.6853998820986442, | |
| "grad_norm": 0.16700303554534912, | |
| "learning_rate": 7.60918505177848e-05, | |
| "loss": 0.2576, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.68854391825506, | |
| "grad_norm": 0.17436909675598145, | |
| "learning_rate": 7.604682575416479e-05, | |
| "loss": 0.2986, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.6916879544114756, | |
| "grad_norm": 0.1712087094783783, | |
| "learning_rate": 7.60018009905448e-05, | |
| "loss": 0.2824, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.6948319905678915, | |
| "grad_norm": 0.17220038175582886, | |
| "learning_rate": 7.595677622692481e-05, | |
| "loss": 0.2901, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.6979760267243074, | |
| "grad_norm": 0.18637694418430328, | |
| "learning_rate": 7.591175146330483e-05, | |
| "loss": 0.3375, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.7011200628807233, | |
| "grad_norm": 0.1691576987504959, | |
| "learning_rate": 7.586672669968482e-05, | |
| "loss": 0.2638, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.704264099037139, | |
| "grad_norm": 0.17768289148807526, | |
| "learning_rate": 7.582170193606483e-05, | |
| "loss": 0.2879, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.7074081351935546, | |
| "grad_norm": 0.1812208741903305, | |
| "learning_rate": 7.577667717244484e-05, | |
| "loss": 0.3129, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.7105521713499705, | |
| "grad_norm": 0.18346074223518372, | |
| "learning_rate": 7.573165240882486e-05, | |
| "loss": 0.302, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.7136962075063864, | |
| "grad_norm": 0.17309945821762085, | |
| "learning_rate": 7.568662764520487e-05, | |
| "loss": 0.255, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.716840243662802, | |
| "grad_norm": 0.1879347264766693, | |
| "learning_rate": 7.564160288158487e-05, | |
| "loss": 0.3124, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.719984279819218, | |
| "grad_norm": 0.1695443093776703, | |
| "learning_rate": 7.559657811796489e-05, | |
| "loss": 0.2809, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.7231283159756337, | |
| "grad_norm": 0.17476417124271393, | |
| "learning_rate": 7.55515533543449e-05, | |
| "loss": 0.3043, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.7262723521320495, | |
| "grad_norm": 0.1775609701871872, | |
| "learning_rate": 7.550652859072491e-05, | |
| "loss": 0.289, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7294163882884654, | |
| "grad_norm": 0.17453855276107788, | |
| "learning_rate": 7.54615038271049e-05, | |
| "loss": 0.2914, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.7325604244448811, | |
| "grad_norm": 0.18414853513240814, | |
| "learning_rate": 7.541647906348492e-05, | |
| "loss": 0.3184, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.7357044606012968, | |
| "grad_norm": 0.18060451745986938, | |
| "learning_rate": 7.537145429986493e-05, | |
| "loss": 0.2998, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.7388484967577127, | |
| "grad_norm": 0.1735735535621643, | |
| "learning_rate": 7.532642953624494e-05, | |
| "loss": 0.3192, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.7419925329141286, | |
| "grad_norm": 0.17077748477458954, | |
| "learning_rate": 7.528140477262495e-05, | |
| "loss": 0.2912, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.7451365690705443, | |
| "grad_norm": 0.16513197124004364, | |
| "learning_rate": 7.523638000900496e-05, | |
| "loss": 0.3024, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.7482806052269602, | |
| "grad_norm": 0.1681637018918991, | |
| "learning_rate": 7.519135524538497e-05, | |
| "loss": 0.2764, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.7514246413833758, | |
| "grad_norm": 0.17060600221157074, | |
| "learning_rate": 7.514633048176498e-05, | |
| "loss": 0.272, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.7545686775397917, | |
| "grad_norm": 0.1727294623851776, | |
| "learning_rate": 7.510130571814498e-05, | |
| "loss": 0.2938, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.7577127136962076, | |
| "grad_norm": 0.16411182284355164, | |
| "learning_rate": 7.505628095452499e-05, | |
| "loss": 0.2864, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.7608567498526233, | |
| "grad_norm": 0.16701269149780273, | |
| "learning_rate": 7.5011256190905e-05, | |
| "loss": 0.276, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.764000786009039, | |
| "grad_norm": 0.16412830352783203, | |
| "learning_rate": 7.496623142728501e-05, | |
| "loss": 0.2836, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.7671448221654549, | |
| "grad_norm": 0.17730842530727386, | |
| "learning_rate": 7.492120666366502e-05, | |
| "loss": 0.2812, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.7702888583218708, | |
| "grad_norm": 0.16831046342849731, | |
| "learning_rate": 7.487618190004503e-05, | |
| "loss": 0.2832, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.7734328944782864, | |
| "grad_norm": 0.17002396285533905, | |
| "learning_rate": 7.483115713642504e-05, | |
| "loss": 0.2884, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.7765769306347023, | |
| "grad_norm": 0.181968092918396, | |
| "learning_rate": 7.478613237280504e-05, | |
| "loss": 0.32, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.779720966791118, | |
| "grad_norm": 0.18976394832134247, | |
| "learning_rate": 7.474110760918505e-05, | |
| "loss": 0.2993, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.782865002947534, | |
| "grad_norm": 0.1806926429271698, | |
| "learning_rate": 7.469608284556506e-05, | |
| "loss": 0.2914, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.7860090391039498, | |
| "grad_norm": 0.17822052538394928, | |
| "learning_rate": 7.465105808194507e-05, | |
| "loss": 0.2843, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.7891530752603655, | |
| "grad_norm": 0.18080289661884308, | |
| "learning_rate": 7.460603331832509e-05, | |
| "loss": 0.3121, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.7922971114167812, | |
| "grad_norm": 0.17676854133605957, | |
| "learning_rate": 7.456100855470509e-05, | |
| "loss": 0.2901, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.795441147573197, | |
| "grad_norm": 0.16959191858768463, | |
| "learning_rate": 7.45159837910851e-05, | |
| "loss": 0.3058, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.798585183729613, | |
| "grad_norm": 0.16757243871688843, | |
| "learning_rate": 7.447095902746512e-05, | |
| "loss": 0.2982, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.8017292198860289, | |
| "grad_norm": 0.1798073798418045, | |
| "learning_rate": 7.442593426384513e-05, | |
| "loss": 0.3097, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.8048732560424445, | |
| "grad_norm": 0.16888341307640076, | |
| "learning_rate": 7.438090950022512e-05, | |
| "loss": 0.2763, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.8080172921988602, | |
| "grad_norm": 0.17195682227611542, | |
| "learning_rate": 7.433588473660513e-05, | |
| "loss": 0.2778, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.811161328355276, | |
| "grad_norm": 0.17291922867298126, | |
| "learning_rate": 7.429085997298515e-05, | |
| "loss": 0.2894, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.814305364511692, | |
| "grad_norm": 0.17213907837867737, | |
| "learning_rate": 7.424583520936516e-05, | |
| "loss": 0.2998, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.8174494006681077, | |
| "grad_norm": 0.16969838738441467, | |
| "learning_rate": 7.420081044574516e-05, | |
| "loss": 0.2953, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.8205934368245233, | |
| "grad_norm": 0.16950733959674835, | |
| "learning_rate": 7.415578568212516e-05, | |
| "loss": 0.2774, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.8237374729809392, | |
| "grad_norm": 0.1866762787103653, | |
| "learning_rate": 7.411076091850519e-05, | |
| "loss": 0.3004, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.8268815091373551, | |
| "grad_norm": 0.18050317466259003, | |
| "learning_rate": 7.40657361548852e-05, | |
| "loss": 0.2867, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.830025545293771, | |
| "grad_norm": 0.19073279201984406, | |
| "learning_rate": 7.402071139126519e-05, | |
| "loss": 0.2874, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.8331695814501867, | |
| "grad_norm": 0.18162357807159424, | |
| "learning_rate": 7.397568662764521e-05, | |
| "loss": 0.3211, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.8363136176066024, | |
| "grad_norm": 0.17108604311943054, | |
| "learning_rate": 7.393066186402522e-05, | |
| "loss": 0.3021, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.8394576537630183, | |
| "grad_norm": 0.17849913239479065, | |
| "learning_rate": 7.388563710040523e-05, | |
| "loss": 0.3127, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.8426016899194342, | |
| "grad_norm": 0.16922686994075775, | |
| "learning_rate": 7.384061233678522e-05, | |
| "loss": 0.2732, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.8457457260758499, | |
| "grad_norm": 0.17308250069618225, | |
| "learning_rate": 7.379558757316525e-05, | |
| "loss": 0.2932, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.8488897622322655, | |
| "grad_norm": 0.18480746448040009, | |
| "learning_rate": 7.375056280954526e-05, | |
| "loss": 0.3036, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.8520337983886814, | |
| "grad_norm": 0.16831083595752716, | |
| "learning_rate": 7.370553804592526e-05, | |
| "loss": 0.2913, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.8551778345450973, | |
| "grad_norm": 0.1726708710193634, | |
| "learning_rate": 7.366051328230527e-05, | |
| "loss": 0.2683, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.8583218707015132, | |
| "grad_norm": 0.17040051519870758, | |
| "learning_rate": 7.361548851868528e-05, | |
| "loss": 0.2681, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.861465906857929, | |
| "grad_norm": 0.18175894021987915, | |
| "learning_rate": 7.357046375506529e-05, | |
| "loss": 0.2643, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.8646099430143446, | |
| "grad_norm": 0.18901702761650085, | |
| "learning_rate": 7.35254389914453e-05, | |
| "loss": 0.2859, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.8677539791707605, | |
| "grad_norm": 0.18690907955169678, | |
| "learning_rate": 7.348041422782531e-05, | |
| "loss": 0.3001, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.8708980153271764, | |
| "grad_norm": 0.16587451100349426, | |
| "learning_rate": 7.343538946420532e-05, | |
| "loss": 0.2833, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.874042051483592, | |
| "grad_norm": 0.170462504029274, | |
| "learning_rate": 7.339036470058532e-05, | |
| "loss": 0.2754, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.8771860876400077, | |
| "grad_norm": 0.17350532114505768, | |
| "learning_rate": 7.334533993696533e-05, | |
| "loss": 0.2956, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.8803301237964236, | |
| "grad_norm": 0.1863803118467331, | |
| "learning_rate": 7.330031517334534e-05, | |
| "loss": 0.3071, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.8834741599528395, | |
| "grad_norm": 0.17055153846740723, | |
| "learning_rate": 7.325529040972535e-05, | |
| "loss": 0.3082, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.8866181961092554, | |
| "grad_norm": 0.17581762373447418, | |
| "learning_rate": 7.321026564610536e-05, | |
| "loss": 0.2947, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.889762232265671, | |
| "grad_norm": 0.18630677461624146, | |
| "learning_rate": 7.316524088248538e-05, | |
| "loss": 0.315, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.8929062684220868, | |
| "grad_norm": 0.18152126669883728, | |
| "learning_rate": 7.312021611886538e-05, | |
| "loss": 0.312, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.8960503045785027, | |
| "grad_norm": 0.1875506043434143, | |
| "learning_rate": 7.307519135524538e-05, | |
| "loss": 0.3224, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.8991943407349186, | |
| "grad_norm": 0.18474234640598297, | |
| "learning_rate": 7.303016659162539e-05, | |
| "loss": 0.2944, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.9023383768913342, | |
| "grad_norm": 0.18096047639846802, | |
| "learning_rate": 7.298514182800542e-05, | |
| "loss": 0.3152, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.90548241304775, | |
| "grad_norm": 0.16774339973926544, | |
| "learning_rate": 7.294011706438541e-05, | |
| "loss": 0.2588, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.9086264492041658, | |
| "grad_norm": 0.17628465592861176, | |
| "learning_rate": 7.289509230076542e-05, | |
| "loss": 0.2923, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.9117704853605817, | |
| "grad_norm": 0.17404650151729584, | |
| "learning_rate": 7.285006753714544e-05, | |
| "loss": 0.2852, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.9149145215169976, | |
| "grad_norm": 0.1805901676416397, | |
| "learning_rate": 7.280504277352545e-05, | |
| "loss": 0.2866, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.9180585576734133, | |
| "grad_norm": 0.18428674340248108, | |
| "learning_rate": 7.276001800990544e-05, | |
| "loss": 0.3207, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.921202593829829, | |
| "grad_norm": 0.1807202845811844, | |
| "learning_rate": 7.271499324628545e-05, | |
| "loss": 0.2933, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.9243466299862448, | |
| "grad_norm": 0.17590177059173584, | |
| "learning_rate": 7.266996848266548e-05, | |
| "loss": 0.2732, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.9274906661426607, | |
| "grad_norm": 0.16720589995384216, | |
| "learning_rate": 7.262494371904548e-05, | |
| "loss": 0.3146, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.9306347022990764, | |
| "grad_norm": 0.1786167472600937, | |
| "learning_rate": 7.257991895542548e-05, | |
| "loss": 0.2892, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.933778738455492, | |
| "grad_norm": 0.17377831041812897, | |
| "learning_rate": 7.253489419180549e-05, | |
| "loss": 0.3105, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.936922774611908, | |
| "grad_norm": 0.18173402547836304, | |
| "learning_rate": 7.248986942818551e-05, | |
| "loss": 0.3436, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.9400668107683239, | |
| "grad_norm": 0.17383264005184174, | |
| "learning_rate": 7.244484466456552e-05, | |
| "loss": 0.2823, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.9432108469247398, | |
| "grad_norm": 0.18473853170871735, | |
| "learning_rate": 7.239981990094553e-05, | |
| "loss": 0.2867, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.9463548830811555, | |
| "grad_norm": 0.17817547917366028, | |
| "learning_rate": 7.235479513732554e-05, | |
| "loss": 0.3056, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.9494989192375711, | |
| "grad_norm": 0.17514194548130035, | |
| "learning_rate": 7.230977037370554e-05, | |
| "loss": 0.2951, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.952642955393987, | |
| "grad_norm": 0.17744790017604828, | |
| "learning_rate": 7.226474561008555e-05, | |
| "loss": 0.293, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.955786991550403, | |
| "grad_norm": 0.1766396164894104, | |
| "learning_rate": 7.221972084646556e-05, | |
| "loss": 0.2775, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.9589310277068186, | |
| "grad_norm": 0.17238113284111023, | |
| "learning_rate": 7.217469608284557e-05, | |
| "loss": 0.2873, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.9620750638632343, | |
| "grad_norm": 0.17035745084285736, | |
| "learning_rate": 7.212967131922558e-05, | |
| "loss": 0.2552, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.9652191000196502, | |
| "grad_norm": 0.17209386825561523, | |
| "learning_rate": 7.208464655560559e-05, | |
| "loss": 0.2826, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.968363136176066, | |
| "grad_norm": 0.17958694696426392, | |
| "learning_rate": 7.20396217919856e-05, | |
| "loss": 0.3108, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.971507172332482, | |
| "grad_norm": 0.18314975500106812, | |
| "learning_rate": 7.19945970283656e-05, | |
| "loss": 0.3155, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.9746512084888976, | |
| "grad_norm": 0.17581366002559662, | |
| "learning_rate": 7.194957226474561e-05, | |
| "loss": 0.2829, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.9777952446453133, | |
| "grad_norm": 0.1770240068435669, | |
| "learning_rate": 7.190454750112562e-05, | |
| "loss": 0.2861, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.9809392808017292, | |
| "grad_norm": 0.17571915686130524, | |
| "learning_rate": 7.185952273750563e-05, | |
| "loss": 0.2827, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.984083316958145, | |
| "grad_norm": 0.18270526826381683, | |
| "learning_rate": 7.181449797388564e-05, | |
| "loss": 0.325, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.9872273531145608, | |
| "grad_norm": 0.18204954266548157, | |
| "learning_rate": 7.176947321026565e-05, | |
| "loss": 0.3038, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.9903713892709765, | |
| "grad_norm": 0.16646772623062134, | |
| "learning_rate": 7.172444844664566e-05, | |
| "loss": 0.2859, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.9935154254273924, | |
| "grad_norm": 0.1777997761964798, | |
| "learning_rate": 7.167942368302566e-05, | |
| "loss": 0.2793, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.9966594615838082, | |
| "grad_norm": 0.1707630306482315, | |
| "learning_rate": 7.163439891940567e-05, | |
| "loss": 0.2845, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.9998034977402241, | |
| "grad_norm": 0.17496661841869354, | |
| "learning_rate": 7.158937415578568e-05, | |
| "loss": 0.2853, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.7054563760757446, | |
| "learning_rate": 7.15443493921657e-05, | |
| "loss": 0.3121, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.313894122838974, | |
| "eval_runtime": 102.2414, | |
| "eval_samples_per_second": 12.441, | |
| "eval_steps_per_second": 12.441, | |
| "step": 638 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2226, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.882536262118605e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |