| { |
| "best_global_step": 72742, |
| "best_metric": 0.4625195264816284, |
| "best_model_checkpoint": "./chess_t5_model_hikaru/checkpoint-72742", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 72742, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005498886475488713, |
| "grad_norm": 1.238595962524414, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.506280403137207, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.010997772950977426, |
| "grad_norm": 1.4552940130233765, |
| "learning_rate": 3.99e-05, |
| "loss": 0.49826000213623045, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01649665942646614, |
| "grad_norm": 1.404646873474121, |
| "learning_rate": 4.9954425345032365e-05, |
| "loss": 0.49065006256103516, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.021995545901954853, |
| "grad_norm": 1.1736574172973633, |
| "learning_rate": 4.9862355334996736e-05, |
| "loss": 0.4841666030883789, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.02749443237744357, |
| "grad_norm": 0.9726278185844421, |
| "learning_rate": 4.97702853249611e-05, |
| "loss": 0.49401542663574216, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03299331885293228, |
| "grad_norm": 1.2402087450027466, |
| "learning_rate": 4.967821531492547e-05, |
| "loss": 0.49149925231933594, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.03849220532842099, |
| "grad_norm": 1.2934446334838867, |
| "learning_rate": 4.958614530488984e-05, |
| "loss": 0.4847659683227539, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.043991091803909706, |
| "grad_norm": 1.5672705173492432, |
| "learning_rate": 4.949407529485421e-05, |
| "loss": 0.4918820571899414, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.049489978279398425, |
| "grad_norm": 1.2068564891815186, |
| "learning_rate": 4.940200528481858e-05, |
| "loss": 0.48373069763183596, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.05498886475488714, |
| "grad_norm": 1.1447815895080566, |
| "learning_rate": 4.9309935274782946e-05, |
| "loss": 0.49397098541259765, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.06048775123037585, |
| "grad_norm": 1.3082820177078247, |
| "learning_rate": 4.9217865264747316e-05, |
| "loss": 0.4879715728759766, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.06598663770586456, |
| "grad_norm": 1.322120189666748, |
| "learning_rate": 4.912579525471169e-05, |
| "loss": 0.4860882568359375, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.07148552418135327, |
| "grad_norm": 1.3626155853271484, |
| "learning_rate": 4.903372524467605e-05, |
| "loss": 0.49077301025390624, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.07698441065684199, |
| "grad_norm": 1.0659453868865967, |
| "learning_rate": 4.894165523464042e-05, |
| "loss": 0.48927955627441405, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.0824832971323307, |
| "grad_norm": 1.1972386837005615, |
| "learning_rate": 4.884958522460479e-05, |
| "loss": 0.4904148101806641, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08798218360781941, |
| "grad_norm": 1.3156094551086426, |
| "learning_rate": 4.875751521456916e-05, |
| "loss": 0.4949393081665039, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.09348107008330812, |
| "grad_norm": 1.1924458742141724, |
| "learning_rate": 4.8665445204533527e-05, |
| "loss": 0.48969757080078125, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.09897995655879685, |
| "grad_norm": 1.4736772775650024, |
| "learning_rate": 4.85733751944979e-05, |
| "loss": 0.4906147384643555, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.10447884303428556, |
| "grad_norm": 1.2425668239593506, |
| "learning_rate": 4.848130518446227e-05, |
| "loss": 0.48961822509765623, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.10997772950977427, |
| "grad_norm": 1.2657986879348755, |
| "learning_rate": 4.838923517442663e-05, |
| "loss": 0.4902804183959961, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.11547661598526299, |
| "grad_norm": 1.2814760208129883, |
| "learning_rate": 4.829716516439101e-05, |
| "loss": 0.4866915130615234, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1209755024607517, |
| "grad_norm": 1.3233275413513184, |
| "learning_rate": 4.820509515435537e-05, |
| "loss": 0.4842318344116211, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.1264743889362404, |
| "grad_norm": 1.0813190937042236, |
| "learning_rate": 4.8113025144319744e-05, |
| "loss": 0.4887635040283203, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.13197327541172912, |
| "grad_norm": 1.4319493770599365, |
| "learning_rate": 4.8020955134284114e-05, |
| "loss": 0.48523338317871095, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.13747216188721784, |
| "grad_norm": 1.1767573356628418, |
| "learning_rate": 4.792888512424848e-05, |
| "loss": 0.4876384735107422, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.14297104836270655, |
| "grad_norm": 1.2524778842926025, |
| "learning_rate": 4.783681511421285e-05, |
| "loss": 0.48621952056884765, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.14846993483819526, |
| "grad_norm": 1.7391471862792969, |
| "learning_rate": 4.774474510417722e-05, |
| "loss": 0.4950310516357422, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.15396882131368397, |
| "grad_norm": 1.3185511827468872, |
| "learning_rate": 4.765267509414158e-05, |
| "loss": 0.4825564193725586, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.15946770778917269, |
| "grad_norm": 1.023362636566162, |
| "learning_rate": 4.756060508410596e-05, |
| "loss": 0.4869500732421875, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.1649665942646614, |
| "grad_norm": 1.4824140071868896, |
| "learning_rate": 4.7468535074070324e-05, |
| "loss": 0.49057952880859373, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.1704654807401501, |
| "grad_norm": 1.1914821863174438, |
| "learning_rate": 4.7376465064034695e-05, |
| "loss": 0.49073287963867185, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.17596436721563882, |
| "grad_norm": 1.0815869569778442, |
| "learning_rate": 4.7284395053999066e-05, |
| "loss": 0.49227970123291015, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.18146325369112754, |
| "grad_norm": 1.644206166267395, |
| "learning_rate": 4.719232504396343e-05, |
| "loss": 0.48635608673095704, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.18696214016661625, |
| "grad_norm": 1.1657360792160034, |
| "learning_rate": 4.710025503392781e-05, |
| "loss": 0.49577178955078127, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.192461026642105, |
| "grad_norm": 1.3343608379364014, |
| "learning_rate": 4.700818502389217e-05, |
| "loss": 0.48896270751953125, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.1979599131175937, |
| "grad_norm": 1.036275863647461, |
| "learning_rate": 4.6916115013856535e-05, |
| "loss": 0.4918210983276367, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.2034587995930824, |
| "grad_norm": 1.1466560363769531, |
| "learning_rate": 4.682404500382091e-05, |
| "loss": 0.4845957946777344, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.20895768606857112, |
| "grad_norm": 1.2194637060165405, |
| "learning_rate": 4.6731974993785276e-05, |
| "loss": 0.48361312866210937, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.21445657254405984, |
| "grad_norm": 1.0549407005310059, |
| "learning_rate": 4.663990498374965e-05, |
| "loss": 0.4873248291015625, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.21995545901954855, |
| "grad_norm": 1.5164191722869873, |
| "learning_rate": 4.654783497371402e-05, |
| "loss": 0.48396194458007813, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.22545434549503726, |
| "grad_norm": 0.9566870927810669, |
| "learning_rate": 4.645576496367838e-05, |
| "loss": 0.49601322174072265, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.23095323197052597, |
| "grad_norm": 1.268650770187378, |
| "learning_rate": 4.636369495364275e-05, |
| "loss": 0.4893684387207031, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.2364521184460147, |
| "grad_norm": 1.626772165298462, |
| "learning_rate": 4.627162494360712e-05, |
| "loss": 0.4904355621337891, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.2419510049215034, |
| "grad_norm": 1.2238197326660156, |
| "learning_rate": 4.6179554933571486e-05, |
| "loss": 0.484462890625, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.2474498913969921, |
| "grad_norm": 1.4520012140274048, |
| "learning_rate": 4.608748492353586e-05, |
| "loss": 0.4788643264770508, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2529487778724808, |
| "grad_norm": 1.1315358877182007, |
| "learning_rate": 4.599541491350023e-05, |
| "loss": 0.48716392517089846, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.25844766434796956, |
| "grad_norm": 1.0476795434951782, |
| "learning_rate": 4.59033449034646e-05, |
| "loss": 0.4895347213745117, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.26394655082345825, |
| "grad_norm": 1.100468635559082, |
| "learning_rate": 4.581127489342897e-05, |
| "loss": 0.4899191665649414, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.269445437298947, |
| "grad_norm": 1.1204516887664795, |
| "learning_rate": 4.571920488339333e-05, |
| "loss": 0.4837226486206055, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.2749443237744357, |
| "grad_norm": 1.271126627922058, |
| "learning_rate": 4.56271348733577e-05, |
| "loss": 0.48351455688476563, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.2804432102499244, |
| "grad_norm": 1.294801115989685, |
| "learning_rate": 4.5535064863322074e-05, |
| "loss": 0.4861069107055664, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.2859420967254131, |
| "grad_norm": 0.9449974894523621, |
| "learning_rate": 4.5442994853286444e-05, |
| "loss": 0.4828087997436523, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.29144098320090184, |
| "grad_norm": 1.017383337020874, |
| "learning_rate": 4.535092484325081e-05, |
| "loss": 0.4907358551025391, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.2969398696763905, |
| "grad_norm": 1.4358981847763062, |
| "learning_rate": 4.525885483321518e-05, |
| "loss": 0.482423210144043, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.30243875615187926, |
| "grad_norm": 1.3579864501953125, |
| "learning_rate": 4.516678482317955e-05, |
| "loss": 0.4872136688232422, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.30793764262736795, |
| "grad_norm": 1.309594750404358, |
| "learning_rate": 4.5074714813143913e-05, |
| "loss": 0.48710639953613283, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.3134365291028567, |
| "grad_norm": 1.4916502237319946, |
| "learning_rate": 4.4982644803108284e-05, |
| "loss": 0.4856998062133789, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.31893541557834537, |
| "grad_norm": 1.1984270811080933, |
| "learning_rate": 4.4890574793072655e-05, |
| "loss": 0.48433101654052735, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.3244343020538341, |
| "grad_norm": 1.376825213432312, |
| "learning_rate": 4.479850478303702e-05, |
| "loss": 0.4843286514282227, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.3299331885293228, |
| "grad_norm": 1.04801607131958, |
| "learning_rate": 4.4706434773001396e-05, |
| "loss": 0.4760005187988281, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.33543207500481154, |
| "grad_norm": 1.277635097503662, |
| "learning_rate": 4.461436476296576e-05, |
| "loss": 0.4829146194458008, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.3409309614803002, |
| "grad_norm": 1.08747398853302, |
| "learning_rate": 4.452229475293013e-05, |
| "loss": 0.49240009307861327, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.34642984795578896, |
| "grad_norm": 1.1133017539978027, |
| "learning_rate": 4.44302247428945e-05, |
| "loss": 0.4923815536499023, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.35192873443127765, |
| "grad_norm": 1.5661677122116089, |
| "learning_rate": 4.4338154732858865e-05, |
| "loss": 0.4879690933227539, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.3574276209067664, |
| "grad_norm": 1.4570703506469727, |
| "learning_rate": 4.4246084722823236e-05, |
| "loss": 0.4856468963623047, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.36292650738225507, |
| "grad_norm": 1.4638596773147583, |
| "learning_rate": 4.4154014712787606e-05, |
| "loss": 0.48412940979003904, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.3684253938577438, |
| "grad_norm": 1.2463369369506836, |
| "learning_rate": 4.406194470275197e-05, |
| "loss": 0.4839859771728516, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.3739242803332325, |
| "grad_norm": 1.0832504034042358, |
| "learning_rate": 4.396987469271635e-05, |
| "loss": 0.49135875701904297, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.37942316680872124, |
| "grad_norm": 1.1107310056686401, |
| "learning_rate": 4.387780468268071e-05, |
| "loss": 0.47643829345703126, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.38492205328421, |
| "grad_norm": 1.1073737144470215, |
| "learning_rate": 4.378573467264508e-05, |
| "loss": 0.4878357315063477, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.39042093975969866, |
| "grad_norm": 1.4523825645446777, |
| "learning_rate": 4.369366466260945e-05, |
| "loss": 0.4929097747802734, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.3959198262351874, |
| "grad_norm": 1.1978082656860352, |
| "learning_rate": 4.3601594652573816e-05, |
| "loss": 0.47999427795410154, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.4014187127106761, |
| "grad_norm": 1.080812692642212, |
| "learning_rate": 4.3509524642538194e-05, |
| "loss": 0.4825727081298828, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.4069175991861648, |
| "grad_norm": 1.053101897239685, |
| "learning_rate": 4.341745463250256e-05, |
| "loss": 0.4855875778198242, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.4124164856616535, |
| "grad_norm": 1.5434905290603638, |
| "learning_rate": 4.332538462246692e-05, |
| "loss": 0.48418006896972654, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.41791537213714225, |
| "grad_norm": 1.3098441362380981, |
| "learning_rate": 4.32333146124313e-05, |
| "loss": 0.47957420349121094, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.42341425861263093, |
| "grad_norm": 1.1274868249893188, |
| "learning_rate": 4.314124460239566e-05, |
| "loss": 0.48411872863769534, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.4289131450881197, |
| "grad_norm": 1.1913822889328003, |
| "learning_rate": 4.3049174592360033e-05, |
| "loss": 0.48610164642333986, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.43441203156360836, |
| "grad_norm": 1.1664844751358032, |
| "learning_rate": 4.2957104582324404e-05, |
| "loss": 0.48251495361328123, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.4399109180390971, |
| "grad_norm": 0.9833515882492065, |
| "learning_rate": 4.286503457228877e-05, |
| "loss": 0.48185344696044924, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.4454098045145858, |
| "grad_norm": 1.3691802024841309, |
| "learning_rate": 4.277296456225314e-05, |
| "loss": 0.47827003479003904, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.4509086909900745, |
| "grad_norm": 1.4538307189941406, |
| "learning_rate": 4.268089455221751e-05, |
| "loss": 0.48655067443847655, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.4564075774655632, |
| "grad_norm": 1.6174641847610474, |
| "learning_rate": 4.258882454218188e-05, |
| "loss": 0.4811368179321289, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.46190646394105195, |
| "grad_norm": 1.379770278930664, |
| "learning_rate": 4.2496754532146244e-05, |
| "loss": 0.4825275421142578, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.46740535041654063, |
| "grad_norm": 1.1480027437210083, |
| "learning_rate": 4.2404684522110614e-05, |
| "loss": 0.4793708801269531, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.4729042368920294, |
| "grad_norm": 1.2923580408096313, |
| "learning_rate": 4.2312614512074985e-05, |
| "loss": 0.48294658660888673, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.47840312336751806, |
| "grad_norm": 1.1704210042953491, |
| "learning_rate": 4.2220544502039356e-05, |
| "loss": 0.48764766693115236, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.4839020098430068, |
| "grad_norm": 0.9645224213600159, |
| "learning_rate": 4.212847449200372e-05, |
| "loss": 0.48104751586914063, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.4894008963184955, |
| "grad_norm": 1.0854864120483398, |
| "learning_rate": 4.203640448196809e-05, |
| "loss": 0.48372928619384764, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.4948997827939842, |
| "grad_norm": 1.058073878288269, |
| "learning_rate": 4.194433447193246e-05, |
| "loss": 0.481105842590332, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.500398669269473, |
| "grad_norm": 1.1038442850112915, |
| "learning_rate": 4.185226446189683e-05, |
| "loss": 0.48221038818359374, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5058975557449616, |
| "grad_norm": 1.1211503744125366, |
| "learning_rate": 4.1760194451861195e-05, |
| "loss": 0.48461868286132814, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.5113964422204503, |
| "grad_norm": 1.1851303577423096, |
| "learning_rate": 4.1668124441825566e-05, |
| "loss": 0.48900299072265624, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5168953286959391, |
| "grad_norm": 1.1773110628128052, |
| "learning_rate": 4.1576054431789936e-05, |
| "loss": 0.4895766067504883, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.5223942151714278, |
| "grad_norm": 1.0236694812774658, |
| "learning_rate": 4.14839844217543e-05, |
| "loss": 0.47842552185058596, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.5278931016469165, |
| "grad_norm": 1.2550437450408936, |
| "learning_rate": 4.139191441171867e-05, |
| "loss": 0.4883332061767578, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.5333919881224052, |
| "grad_norm": 1.6811326742172241, |
| "learning_rate": 4.129984440168304e-05, |
| "loss": 0.48251426696777344, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.538890874597894, |
| "grad_norm": 1.1312133073806763, |
| "learning_rate": 4.1207774391647405e-05, |
| "loss": 0.4811555480957031, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.5443897610733827, |
| "grad_norm": 1.106419563293457, |
| "learning_rate": 4.111570438161178e-05, |
| "loss": 0.4829677963256836, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.5498886475488713, |
| "grad_norm": 1.2335270643234253, |
| "learning_rate": 4.102363437157615e-05, |
| "loss": 0.48413619995117185, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.55538753402436, |
| "grad_norm": 1.195844054222107, |
| "learning_rate": 4.093156436154052e-05, |
| "loss": 0.4821126937866211, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.5608864204998488, |
| "grad_norm": 1.0814074277877808, |
| "learning_rate": 4.083949435150489e-05, |
| "loss": 0.4847369003295898, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.5663853069753375, |
| "grad_norm": 1.4510689973831177, |
| "learning_rate": 4.074742434146925e-05, |
| "loss": 0.4875687789916992, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.5718841934508262, |
| "grad_norm": 1.0444058179855347, |
| "learning_rate": 4.065535433143363e-05, |
| "loss": 0.4803382110595703, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.5773830799263149, |
| "grad_norm": 1.1824759244918823, |
| "learning_rate": 4.056328432139799e-05, |
| "loss": 0.48757186889648435, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.5828819664018037, |
| "grad_norm": 1.1672804355621338, |
| "learning_rate": 4.047121431136236e-05, |
| "loss": 0.47619979858398437, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.5883808528772924, |
| "grad_norm": 1.3952018022537231, |
| "learning_rate": 4.0379144301326734e-05, |
| "loss": 0.4820771026611328, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.593879739352781, |
| "grad_norm": 1.5481926202774048, |
| "learning_rate": 4.02870742912911e-05, |
| "loss": 0.4789703369140625, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.5993786258282697, |
| "grad_norm": 1.1940809488296509, |
| "learning_rate": 4.019500428125547e-05, |
| "loss": 0.4823886871337891, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.6048775123037585, |
| "grad_norm": 1.470038890838623, |
| "learning_rate": 4.010293427121984e-05, |
| "loss": 0.47876800537109376, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.6103763987792472, |
| "grad_norm": 1.372512698173523, |
| "learning_rate": 4.00108642611842e-05, |
| "loss": 0.48137435913085935, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.6158752852547359, |
| "grad_norm": 0.9625583291053772, |
| "learning_rate": 3.991879425114858e-05, |
| "loss": 0.4751309967041016, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.6213741717302246, |
| "grad_norm": 1.0047613382339478, |
| "learning_rate": 3.9826724241112945e-05, |
| "loss": 0.4809339141845703, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.6268730582057134, |
| "grad_norm": 1.8941971063613892, |
| "learning_rate": 3.973465423107731e-05, |
| "loss": 0.47376441955566406, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.6323719446812021, |
| "grad_norm": 1.0294033288955688, |
| "learning_rate": 3.9642584221041686e-05, |
| "loss": 0.4846999740600586, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.6378708311566907, |
| "grad_norm": 1.1899781227111816, |
| "learning_rate": 3.955051421100605e-05, |
| "loss": 0.4818299865722656, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.6433697176321795, |
| "grad_norm": 1.5099271535873413, |
| "learning_rate": 3.945844420097042e-05, |
| "loss": 0.4828767776489258, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.6488686041076682, |
| "grad_norm": 1.3377799987792969, |
| "learning_rate": 3.936637419093479e-05, |
| "loss": 0.47890872955322267, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.6543674905831569, |
| "grad_norm": 1.6240547895431519, |
| "learning_rate": 3.9274304180899155e-05, |
| "loss": 0.4793845748901367, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.6598663770586456, |
| "grad_norm": 1.32374107837677, |
| "learning_rate": 3.9182234170863525e-05, |
| "loss": 0.48126724243164065, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6653652635341344, |
| "grad_norm": 1.1302155256271362, |
| "learning_rate": 3.9090164160827896e-05, |
| "loss": 0.4794307708740234, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.6708641500096231, |
| "grad_norm": 1.2106575965881348, |
| "learning_rate": 3.899809415079227e-05, |
| "loss": 0.4764822769165039, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.6763630364851118, |
| "grad_norm": 1.1682376861572266, |
| "learning_rate": 3.890602414075663e-05, |
| "loss": 0.48130035400390625, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.6818619229606004, |
| "grad_norm": 1.7385523319244385, |
| "learning_rate": 3.8813954130721e-05, |
| "loss": 0.48233951568603517, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.6873608094360892, |
| "grad_norm": 0.9956115484237671, |
| "learning_rate": 3.872188412068537e-05, |
| "loss": 0.47769607543945314, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.6928596959115779, |
| "grad_norm": 0.9261813759803772, |
| "learning_rate": 3.862981411064974e-05, |
| "loss": 0.4826504898071289, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.6983585823870666, |
| "grad_norm": 1.0754562616348267, |
| "learning_rate": 3.8537744100614106e-05, |
| "loss": 0.48267059326171874, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.7038574688625553, |
| "grad_norm": 1.2435545921325684, |
| "learning_rate": 3.844567409057848e-05, |
| "loss": 0.48062828063964846, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.7093563553380441, |
| "grad_norm": 1.1161478757858276, |
| "learning_rate": 3.835360408054285e-05, |
| "loss": 0.476544189453125, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.7148552418135328, |
| "grad_norm": 1.144326090812683, |
| "learning_rate": 3.826153407050722e-05, |
| "loss": 0.4830437469482422, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.7203541282890215, |
| "grad_norm": 1.2163105010986328, |
| "learning_rate": 3.816946406047158e-05, |
| "loss": 0.48178863525390625, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.7258530147645101, |
| "grad_norm": 1.3089566230773926, |
| "learning_rate": 3.807739405043595e-05, |
| "loss": 0.4754468536376953, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.7313519012399989, |
| "grad_norm": 1.2991975545883179, |
| "learning_rate": 3.798532404040032e-05, |
| "loss": 0.4895411682128906, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.7368507877154876, |
| "grad_norm": 1.6097289323806763, |
| "learning_rate": 3.789325403036469e-05, |
| "loss": 0.47313800811767576, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.7423496741909763, |
| "grad_norm": 1.4237576723098755, |
| "learning_rate": 3.7801184020329065e-05, |
| "loss": 0.47288108825683595, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.747848560666465, |
| "grad_norm": 1.7340173721313477, |
| "learning_rate": 3.770911401029343e-05, |
| "loss": 0.4713779067993164, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.7533474471419538, |
| "grad_norm": 1.3480178117752075, |
| "learning_rate": 3.761704400025779e-05, |
| "loss": 0.4823367309570312, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.7588463336174425, |
| "grad_norm": 0.945102870464325, |
| "learning_rate": 3.752497399022217e-05, |
| "loss": 0.485689697265625, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.7643452200929312, |
| "grad_norm": 1.5504003763198853, |
| "learning_rate": 3.7432903980186534e-05, |
| "loss": 0.4697317886352539, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.76984410656842, |
| "grad_norm": 1.4954441785812378, |
| "learning_rate": 3.7340833970150904e-05, |
| "loss": 0.4746841049194336, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.7753429930439086, |
| "grad_norm": 1.660771131515503, |
| "learning_rate": 3.7248763960115275e-05, |
| "loss": 0.48746414184570314, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.7808418795193973, |
| "grad_norm": 1.216834306716919, |
| "learning_rate": 3.715669395007964e-05, |
| "loss": 0.4784600067138672, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.786340765994886, |
| "grad_norm": 1.3025329113006592, |
| "learning_rate": 3.7064623940044016e-05, |
| "loss": 0.48134098052978513, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.7918396524703748, |
| "grad_norm": 0.8612267374992371, |
| "learning_rate": 3.697255393000838e-05, |
| "loss": 0.48288066864013673, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.7973385389458635, |
| "grad_norm": 1.5112066268920898, |
| "learning_rate": 3.6880483919972744e-05, |
| "loss": 0.48638771057128904, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.8028374254213522, |
| "grad_norm": 1.2981903553009033, |
| "learning_rate": 3.678841390993712e-05, |
| "loss": 0.4764302444458008, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.8083363118968409, |
| "grad_norm": 1.2499499320983887, |
| "learning_rate": 3.6696343899901485e-05, |
| "loss": 0.47807662963867187, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.8138351983723296, |
| "grad_norm": 1.4974340200424194, |
| "learning_rate": 3.6604273889865856e-05, |
| "loss": 0.48103851318359375, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.8193340848478183, |
| "grad_norm": 1.6043846607208252, |
| "learning_rate": 3.6512203879830226e-05, |
| "loss": 0.4745806121826172, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.824832971323307, |
| "grad_norm": 1.0718004703521729, |
| "learning_rate": 3.642013386979459e-05, |
| "loss": 0.4758340835571289, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.8303318577987957, |
| "grad_norm": 1.31827712059021, |
| "learning_rate": 3.632806385975897e-05, |
| "loss": 0.48326473236083983, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.8358307442742845, |
| "grad_norm": 1.214794635772705, |
| "learning_rate": 3.623599384972333e-05, |
| "loss": 0.4670214080810547, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.8413296307497732, |
| "grad_norm": 1.3490458726882935, |
| "learning_rate": 3.61439238396877e-05, |
| "loss": 0.4771783065795898, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.8468285172252619, |
| "grad_norm": 1.7430227994918823, |
| "learning_rate": 3.605185382965207e-05, |
| "loss": 0.47809303283691407, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.8523274037007506, |
| "grad_norm": 1.04710054397583, |
| "learning_rate": 3.5959783819616437e-05, |
| "loss": 0.47361648559570313, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.8578262901762393, |
| "grad_norm": 1.239403247833252, |
| "learning_rate": 3.586771380958081e-05, |
| "loss": 0.47289577484130857, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.863325176651728, |
| "grad_norm": 1.0348613262176514, |
| "learning_rate": 3.577564379954518e-05, |
| "loss": 0.4844191360473633, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.8688240631272167, |
| "grad_norm": 1.2087358236312866, |
| "learning_rate": 3.568357378950954e-05, |
| "loss": 0.4849067687988281, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.8743229496027054, |
| "grad_norm": 1.498613715171814, |
| "learning_rate": 3.559150377947391e-05, |
| "loss": 0.4736307907104492, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.8798218360781942, |
| "grad_norm": 1.2673721313476562, |
| "learning_rate": 3.549943376943828e-05, |
| "loss": 0.4751145553588867, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.8853207225536829, |
| "grad_norm": 1.078145980834961, |
| "learning_rate": 3.5407363759402654e-05, |
| "loss": 0.48442405700683594, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.8908196090291716, |
| "grad_norm": 1.8665213584899902, |
| "learning_rate": 3.531529374936702e-05, |
| "loss": 0.47800296783447266, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.8963184955046603, |
| "grad_norm": 1.093640685081482, |
| "learning_rate": 3.522322373933139e-05, |
| "loss": 0.4768505859375, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.901817381980149, |
| "grad_norm": 1.438798189163208, |
| "learning_rate": 3.513115372929576e-05, |
| "loss": 0.4752470016479492, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.9073162684556377, |
| "grad_norm": 1.156036376953125, |
| "learning_rate": 3.503908371926013e-05, |
| "loss": 0.47669639587402346, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.9128151549311264, |
| "grad_norm": 1.223441481590271, |
| "learning_rate": 3.494701370922449e-05, |
| "loss": 0.4820696258544922, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.9183140414066152, |
| "grad_norm": 1.161592721939087, |
| "learning_rate": 3.4854943699188864e-05, |
| "loss": 0.46704940795898436, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.9238129278821039, |
| "grad_norm": 1.217645287513733, |
| "learning_rate": 3.4762873689153234e-05, |
| "loss": 0.4787548065185547, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.9293118143575926, |
| "grad_norm": 1.2599478960037231, |
| "learning_rate": 3.4670803679117605e-05, |
| "loss": 0.47969642639160154, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.9348107008330813, |
| "grad_norm": 1.1119675636291504, |
| "learning_rate": 3.457873366908197e-05, |
| "loss": 0.48166824340820313, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.9403095873085701, |
| "grad_norm": 1.4451464414596558, |
| "learning_rate": 3.448666365904634e-05, |
| "loss": 0.4774625015258789, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.9458084737840587, |
| "grad_norm": 1.121450662612915, |
| "learning_rate": 3.439459364901071e-05, |
| "loss": 0.4775946426391602, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.9513073602595474, |
| "grad_norm": 1.7251038551330566, |
| "learning_rate": 3.4302523638975074e-05, |
| "loss": 0.46810245513916016, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.9568062467350361, |
| "grad_norm": 1.1376259326934814, |
| "learning_rate": 3.421045362893945e-05, |
| "loss": 0.4734595108032227, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.9623051332105249, |
| "grad_norm": 1.3909783363342285, |
| "learning_rate": 3.4118383618903815e-05, |
| "loss": 0.4745623016357422, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.9678040196860136, |
| "grad_norm": 1.4496464729309082, |
| "learning_rate": 3.4026313608868186e-05, |
| "loss": 0.4793576431274414, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.9733029061615023, |
| "grad_norm": 1.188259482383728, |
| "learning_rate": 3.3934243598832557e-05, |
| "loss": 0.48435794830322265, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.978801792636991, |
| "grad_norm": 0.972775936126709, |
| "learning_rate": 3.384217358879692e-05, |
| "loss": 0.48073070526123046, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.9843006791124798, |
| "grad_norm": 1.3712236881256104, |
| "learning_rate": 3.375010357876129e-05, |
| "loss": 0.47246246337890624, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.9897995655879684, |
| "grad_norm": 1.0553455352783203, |
| "learning_rate": 3.365803356872566e-05, |
| "loss": 0.4749702835083008, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.9952984520634571, |
| "grad_norm": 2.0960538387298584, |
| "learning_rate": 3.3565963558690026e-05, |
| "loss": 0.48137100219726564, |
| "step": 36200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.46924570202827454, |
| "eval_runtime": 158.4469, |
| "eval_samples_per_second": 408.08, |
| "eval_steps_per_second": 25.51, |
| "step": 36371 |
| }, |
| { |
| "epoch": 1.000797338538946, |
| "grad_norm": 1.0945351123809814, |
| "learning_rate": 3.34738935486544e-05, |
| "loss": 0.47277111053466797, |
| "step": 36400 |
| }, |
| { |
| "epoch": 1.0062962250144345, |
| "grad_norm": 1.1377208232879639, |
| "learning_rate": 3.338182353861877e-05, |
| "loss": 0.4704814147949219, |
| "step": 36600 |
| }, |
| { |
| "epoch": 1.0117951114899233, |
| "grad_norm": 1.2042992115020752, |
| "learning_rate": 3.328975352858314e-05, |
| "loss": 0.4778765869140625, |
| "step": 36800 |
| }, |
| { |
| "epoch": 1.017293997965412, |
| "grad_norm": 1.2293647527694702, |
| "learning_rate": 3.319768351854751e-05, |
| "loss": 0.4779492950439453, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.0227928844409007, |
| "grad_norm": 1.0912444591522217, |
| "learning_rate": 3.310561350851187e-05, |
| "loss": 0.47877525329589843, |
| "step": 37200 |
| }, |
| { |
| "epoch": 1.0282917709163895, |
| "grad_norm": 1.2448941469192505, |
| "learning_rate": 3.301354349847624e-05, |
| "loss": 0.4758515930175781, |
| "step": 37400 |
| }, |
| { |
| "epoch": 1.033790657391878, |
| "grad_norm": 1.127113699913025, |
| "learning_rate": 3.292147348844061e-05, |
| "loss": 0.47277240753173827, |
| "step": 37600 |
| }, |
| { |
| "epoch": 1.0392895438673668, |
| "grad_norm": 1.184788703918457, |
| "learning_rate": 3.282940347840498e-05, |
| "loss": 0.48051612854003906, |
| "step": 37800 |
| }, |
| { |
| "epoch": 1.0447884303428556, |
| "grad_norm": 1.3059478998184204, |
| "learning_rate": 3.2737333468369354e-05, |
| "loss": 0.4780512237548828, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.0502873168183442, |
| "grad_norm": 1.035843014717102, |
| "learning_rate": 3.264526345833372e-05, |
| "loss": 0.4778805160522461, |
| "step": 38200 |
| }, |
| { |
| "epoch": 1.055786203293833, |
| "grad_norm": 1.142691731452942, |
| "learning_rate": 3.255319344829809e-05, |
| "loss": 0.4747405242919922, |
| "step": 38400 |
| }, |
| { |
| "epoch": 1.0612850897693218, |
| "grad_norm": 1.2115979194641113, |
| "learning_rate": 3.246112343826246e-05, |
| "loss": 0.4684751510620117, |
| "step": 38600 |
| }, |
| { |
| "epoch": 1.0667839762448104, |
| "grad_norm": 1.0604227781295776, |
| "learning_rate": 3.2369053428226823e-05, |
| "loss": 0.4836904525756836, |
| "step": 38800 |
| }, |
| { |
| "epoch": 1.0722828627202992, |
| "grad_norm": 1.2616559267044067, |
| "learning_rate": 3.2276983418191194e-05, |
| "loss": 0.47024051666259764, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.077781749195788, |
| "grad_norm": 1.1861746311187744, |
| "learning_rate": 3.2184913408155565e-05, |
| "loss": 0.47626224517822263, |
| "step": 39200 |
| }, |
| { |
| "epoch": 1.0832806356712765, |
| "grad_norm": 1.0768451690673828, |
| "learning_rate": 3.209284339811993e-05, |
| "loss": 0.4712419128417969, |
| "step": 39400 |
| }, |
| { |
| "epoch": 1.0887795221467653, |
| "grad_norm": 1.1116639375686646, |
| "learning_rate": 3.20007733880843e-05, |
| "loss": 0.47870445251464844, |
| "step": 39600 |
| }, |
| { |
| "epoch": 1.094278408622254, |
| "grad_norm": 0.9229024648666382, |
| "learning_rate": 3.190870337804867e-05, |
| "loss": 0.47164249420166016, |
| "step": 39800 |
| }, |
| { |
| "epoch": 1.0997772950977427, |
| "grad_norm": 1.2584002017974854, |
| "learning_rate": 3.181663336801304e-05, |
| "loss": 0.46996349334716797, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.1052761815732315, |
| "grad_norm": 1.1987744569778442, |
| "learning_rate": 3.1724563357977404e-05, |
| "loss": 0.47581478118896486, |
| "step": 40200 |
| }, |
| { |
| "epoch": 1.11077506804872, |
| "grad_norm": 1.897595763206482, |
| "learning_rate": 3.1632493347941775e-05, |
| "loss": 0.47223583221435544, |
| "step": 40400 |
| }, |
| { |
| "epoch": 1.1162739545242089, |
| "grad_norm": 1.384735345840454, |
| "learning_rate": 3.1540423337906146e-05, |
| "loss": 0.4742586898803711, |
| "step": 40600 |
| }, |
| { |
| "epoch": 1.1217728409996977, |
| "grad_norm": 1.2924162149429321, |
| "learning_rate": 3.1448353327870516e-05, |
| "loss": 0.4763710403442383, |
| "step": 40800 |
| }, |
| { |
| "epoch": 1.1272717274751862, |
| "grad_norm": 1.2529865503311157, |
| "learning_rate": 3.135628331783489e-05, |
| "loss": 0.4804756546020508, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.132770613950675, |
| "grad_norm": 1.0378504991531372, |
| "learning_rate": 3.126421330779925e-05, |
| "loss": 0.4701519775390625, |
| "step": 41200 |
| }, |
| { |
| "epoch": 1.1382695004261638, |
| "grad_norm": 1.3165602684020996, |
| "learning_rate": 3.117214329776362e-05, |
| "loss": 0.4799094009399414, |
| "step": 41400 |
| }, |
| { |
| "epoch": 1.1437683869016524, |
| "grad_norm": 1.3106869459152222, |
| "learning_rate": 3.108007328772799e-05, |
| "loss": 0.4807415771484375, |
| "step": 41600 |
| }, |
| { |
| "epoch": 1.1492672733771412, |
| "grad_norm": 1.870168685913086, |
| "learning_rate": 3.0988003277692356e-05, |
| "loss": 0.4763855743408203, |
| "step": 41800 |
| }, |
| { |
| "epoch": 1.1547661598526298, |
| "grad_norm": 1.2770658731460571, |
| "learning_rate": 3.0895933267656726e-05, |
| "loss": 0.47005424499511717, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.1602650463281186, |
| "grad_norm": 1.2080628871917725, |
| "learning_rate": 3.08038632576211e-05, |
| "loss": 0.4715093231201172, |
| "step": 42200 |
| }, |
| { |
| "epoch": 1.1657639328036074, |
| "grad_norm": 1.8036431074142456, |
| "learning_rate": 3.071179324758546e-05, |
| "loss": 0.4677348327636719, |
| "step": 42400 |
| }, |
| { |
| "epoch": 1.171262819279096, |
| "grad_norm": 1.0280815362930298, |
| "learning_rate": 3.061972323754984e-05, |
| "loss": 0.4739281463623047, |
| "step": 42600 |
| }, |
| { |
| "epoch": 1.1767617057545847, |
| "grad_norm": 0.9961258769035339, |
| "learning_rate": 3.05276532275142e-05, |
| "loss": 0.4825423049926758, |
| "step": 42800 |
| }, |
| { |
| "epoch": 1.1822605922300733, |
| "grad_norm": 1.0836036205291748, |
| "learning_rate": 3.0435583217478576e-05, |
| "loss": 0.47616680145263673, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.187759478705562, |
| "grad_norm": 0.9266841411590576, |
| "learning_rate": 3.0343513207442943e-05, |
| "loss": 0.47153358459472655, |
| "step": 43200 |
| }, |
| { |
| "epoch": 1.1932583651810509, |
| "grad_norm": 1.0143980979919434, |
| "learning_rate": 3.0251443197407307e-05, |
| "loss": 0.4762028503417969, |
| "step": 43400 |
| }, |
| { |
| "epoch": 1.1987572516565395, |
| "grad_norm": 1.160222053527832, |
| "learning_rate": 3.015937318737168e-05, |
| "loss": 0.4718109893798828, |
| "step": 43600 |
| }, |
| { |
| "epoch": 1.2042561381320283, |
| "grad_norm": 1.1540669202804565, |
| "learning_rate": 3.006730317733605e-05, |
| "loss": 0.47153167724609374, |
| "step": 43800 |
| }, |
| { |
| "epoch": 1.209755024607517, |
| "grad_norm": 1.3754700422286987, |
| "learning_rate": 2.9975233167300416e-05, |
| "loss": 0.4751555252075195, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.2152539110830056, |
| "grad_norm": 1.095689296722412, |
| "learning_rate": 2.9883163157264786e-05, |
| "loss": 0.47820320129394533, |
| "step": 44200 |
| }, |
| { |
| "epoch": 1.2207527975584944, |
| "grad_norm": 1.2152804136276245, |
| "learning_rate": 2.9791093147229154e-05, |
| "loss": 0.4785987091064453, |
| "step": 44400 |
| }, |
| { |
| "epoch": 1.2262516840339832, |
| "grad_norm": 1.3621678352355957, |
| "learning_rate": 2.969902313719352e-05, |
| "loss": 0.4778928375244141, |
| "step": 44600 |
| }, |
| { |
| "epoch": 1.2317505705094718, |
| "grad_norm": 1.3576879501342773, |
| "learning_rate": 2.9606953127157895e-05, |
| "loss": 0.46979766845703125, |
| "step": 44800 |
| }, |
| { |
| "epoch": 1.2372494569849606, |
| "grad_norm": 1.4446898698806763, |
| "learning_rate": 2.9514883117122262e-05, |
| "loss": 0.47956855773925783, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.2427483434604492, |
| "grad_norm": 1.1428676843643188, |
| "learning_rate": 2.9422813107086626e-05, |
| "loss": 0.46750675201416014, |
| "step": 45200 |
| }, |
| { |
| "epoch": 1.248247229935938, |
| "grad_norm": 1.1125656366348267, |
| "learning_rate": 2.9330743097051e-05, |
| "loss": 0.4821536254882812, |
| "step": 45400 |
| }, |
| { |
| "epoch": 1.2537461164114267, |
| "grad_norm": 0.9081394672393799, |
| "learning_rate": 2.9238673087015367e-05, |
| "loss": 0.48335330963134765, |
| "step": 45600 |
| }, |
| { |
| "epoch": 1.2592450028869153, |
| "grad_norm": 1.3965390920639038, |
| "learning_rate": 2.9146603076979738e-05, |
| "loss": 0.48991138458251954, |
| "step": 45800 |
| }, |
| { |
| "epoch": 1.2647438893624041, |
| "grad_norm": 0.9960418939590454, |
| "learning_rate": 2.9054533066944105e-05, |
| "loss": 0.48175228118896485, |
| "step": 46000 |
| }, |
| { |
| "epoch": 1.2702427758378927, |
| "grad_norm": 0.8425759077072144, |
| "learning_rate": 2.8962463056908472e-05, |
| "loss": 0.48490497589111325, |
| "step": 46200 |
| }, |
| { |
| "epoch": 1.2757416623133815, |
| "grad_norm": 0.8783431053161621, |
| "learning_rate": 2.8870393046872846e-05, |
| "loss": 0.4830588150024414, |
| "step": 46400 |
| }, |
| { |
| "epoch": 1.2812405487888703, |
| "grad_norm": 1.6315195560455322, |
| "learning_rate": 2.8778323036837214e-05, |
| "loss": 0.48057308197021487, |
| "step": 46600 |
| }, |
| { |
| "epoch": 1.286739435264359, |
| "grad_norm": 1.2200597524642944, |
| "learning_rate": 2.868625302680158e-05, |
| "loss": 0.48826507568359373, |
| "step": 46800 |
| }, |
| { |
| "epoch": 1.2922383217398477, |
| "grad_norm": 1.008957028388977, |
| "learning_rate": 2.859418301676595e-05, |
| "loss": 0.4910233306884766, |
| "step": 47000 |
| }, |
| { |
| "epoch": 1.2977372082153364, |
| "grad_norm": 0.9655813574790955, |
| "learning_rate": 2.850211300673032e-05, |
| "loss": 0.48260990142822263, |
| "step": 47200 |
| }, |
| { |
| "epoch": 1.303236094690825, |
| "grad_norm": 1.0368990898132324, |
| "learning_rate": 2.8410042996694686e-05, |
| "loss": 0.4869321060180664, |
| "step": 47400 |
| }, |
| { |
| "epoch": 1.3087349811663138, |
| "grad_norm": 1.0914088487625122, |
| "learning_rate": 2.8317972986659057e-05, |
| "loss": 0.4798837661743164, |
| "step": 47600 |
| }, |
| { |
| "epoch": 1.3142338676418026, |
| "grad_norm": 1.0549296140670776, |
| "learning_rate": 2.8225902976623424e-05, |
| "loss": 0.4868314743041992, |
| "step": 47800 |
| }, |
| { |
| "epoch": 1.3197327541172912, |
| "grad_norm": 0.9864702224731445, |
| "learning_rate": 2.813383296658779e-05, |
| "loss": 0.48143596649169923, |
| "step": 48000 |
| }, |
| { |
| "epoch": 1.32523164059278, |
| "grad_norm": 1.276328444480896, |
| "learning_rate": 2.8041762956552165e-05, |
| "loss": 0.4901668930053711, |
| "step": 48200 |
| }, |
| { |
| "epoch": 1.3307305270682686, |
| "grad_norm": 0.9716532826423645, |
| "learning_rate": 2.7949692946516532e-05, |
| "loss": 0.48207698822021483, |
| "step": 48400 |
| }, |
| { |
| "epoch": 1.3362294135437573, |
| "grad_norm": 1.3309965133666992, |
| "learning_rate": 2.7857622936480903e-05, |
| "loss": 0.4830322265625, |
| "step": 48600 |
| }, |
| { |
| "epoch": 1.3417283000192461, |
| "grad_norm": 0.8904381990432739, |
| "learning_rate": 2.776555292644527e-05, |
| "loss": 0.488801383972168, |
| "step": 48800 |
| }, |
| { |
| "epoch": 1.347227186494735, |
| "grad_norm": 1.4656221866607666, |
| "learning_rate": 2.7673482916409638e-05, |
| "loss": 0.48581710815429685, |
| "step": 49000 |
| }, |
| { |
| "epoch": 1.3527260729702235, |
| "grad_norm": 1.1317617893218994, |
| "learning_rate": 2.758141290637401e-05, |
| "loss": 0.4906336212158203, |
| "step": 49200 |
| }, |
| { |
| "epoch": 1.3582249594457123, |
| "grad_norm": 0.944570779800415, |
| "learning_rate": 2.7489342896338375e-05, |
| "loss": 0.4796075439453125, |
| "step": 49400 |
| }, |
| { |
| "epoch": 1.3637238459212009, |
| "grad_norm": 0.8989654779434204, |
| "learning_rate": 2.7397272886302743e-05, |
| "loss": 0.48385326385498045, |
| "step": 49600 |
| }, |
| { |
| "epoch": 1.3692227323966897, |
| "grad_norm": 1.2828127145767212, |
| "learning_rate": 2.7305202876267117e-05, |
| "loss": 0.4900363540649414, |
| "step": 49800 |
| }, |
| { |
| "epoch": 1.3747216188721785, |
| "grad_norm": 1.3695372343063354, |
| "learning_rate": 2.7213132866231484e-05, |
| "loss": 0.4815263366699219, |
| "step": 50000 |
| }, |
| { |
| "epoch": 1.380220505347667, |
| "grad_norm": 1.1346147060394287, |
| "learning_rate": 2.712106285619585e-05, |
| "loss": 0.48870357513427737, |
| "step": 50200 |
| }, |
| { |
| "epoch": 1.3857193918231558, |
| "grad_norm": 1.2779992818832397, |
| "learning_rate": 2.7028992846160222e-05, |
| "loss": 0.4858957290649414, |
| "step": 50400 |
| }, |
| { |
| "epoch": 1.3912182782986444, |
| "grad_norm": 1.0286052227020264, |
| "learning_rate": 2.693692283612459e-05, |
| "loss": 0.48650901794433593, |
| "step": 50600 |
| }, |
| { |
| "epoch": 1.3967171647741332, |
| "grad_norm": 1.0637270212173462, |
| "learning_rate": 2.6844852826088963e-05, |
| "loss": 0.48736335754394533, |
| "step": 50800 |
| }, |
| { |
| "epoch": 1.402216051249622, |
| "grad_norm": 1.3406178951263428, |
| "learning_rate": 2.675278281605333e-05, |
| "loss": 0.4900504684448242, |
| "step": 51000 |
| }, |
| { |
| "epoch": 1.4077149377251106, |
| "grad_norm": 1.1052333116531372, |
| "learning_rate": 2.6660712806017694e-05, |
| "loss": 0.4855587387084961, |
| "step": 51200 |
| }, |
| { |
| "epoch": 1.4132138242005994, |
| "grad_norm": 0.931908130645752, |
| "learning_rate": 2.6568642795982068e-05, |
| "loss": 0.4813541030883789, |
| "step": 51400 |
| }, |
| { |
| "epoch": 1.4187127106760882, |
| "grad_norm": 0.9499631524085999, |
| "learning_rate": 2.6476572785946435e-05, |
| "loss": 0.4899889373779297, |
| "step": 51600 |
| }, |
| { |
| "epoch": 1.4242115971515767, |
| "grad_norm": 1.1931513547897339, |
| "learning_rate": 2.6384502775910803e-05, |
| "loss": 0.48534503936767576, |
| "step": 51800 |
| }, |
| { |
| "epoch": 1.4297104836270655, |
| "grad_norm": 1.3906440734863281, |
| "learning_rate": 2.6292432765875173e-05, |
| "loss": 0.47944049835205077, |
| "step": 52000 |
| }, |
| { |
| "epoch": 1.4352093701025543, |
| "grad_norm": 1.1049039363861084, |
| "learning_rate": 2.620036275583954e-05, |
| "loss": 0.4796323776245117, |
| "step": 52200 |
| }, |
| { |
| "epoch": 1.440708256578043, |
| "grad_norm": 1.035280704498291, |
| "learning_rate": 2.6108292745803908e-05, |
| "loss": 0.4778638076782227, |
| "step": 52400 |
| }, |
| { |
| "epoch": 1.4462071430535317, |
| "grad_norm": 0.9371760487556458, |
| "learning_rate": 2.6016222735768282e-05, |
| "loss": 0.4937860870361328, |
| "step": 52600 |
| }, |
| { |
| "epoch": 1.4517060295290203, |
| "grad_norm": 0.932565450668335, |
| "learning_rate": 2.592415272573265e-05, |
| "loss": 0.48315887451171874, |
| "step": 52800 |
| }, |
| { |
| "epoch": 1.457204916004509, |
| "grad_norm": 1.1414536237716675, |
| "learning_rate": 2.5832082715697016e-05, |
| "loss": 0.48177513122558596, |
| "step": 53000 |
| }, |
| { |
| "epoch": 1.4627038024799979, |
| "grad_norm": 1.3313400745391846, |
| "learning_rate": 2.5740012705661387e-05, |
| "loss": 0.4810881423950195, |
| "step": 53200 |
| }, |
| { |
| "epoch": 1.4682026889554864, |
| "grad_norm": 0.9843188524246216, |
| "learning_rate": 2.5647942695625754e-05, |
| "loss": 0.48992759704589844, |
| "step": 53400 |
| }, |
| { |
| "epoch": 1.4737015754309752, |
| "grad_norm": 1.0765944719314575, |
| "learning_rate": 2.5555872685590128e-05, |
| "loss": 0.48404861450195313, |
| "step": 53600 |
| }, |
| { |
| "epoch": 1.4792004619064638, |
| "grad_norm": 0.9720175266265869, |
| "learning_rate": 2.5463802675554492e-05, |
| "loss": 0.48842796325683596, |
| "step": 53800 |
| }, |
| { |
| "epoch": 1.4846993483819526, |
| "grad_norm": 0.9759963154792786, |
| "learning_rate": 2.537173266551886e-05, |
| "loss": 0.47752620697021486, |
| "step": 54000 |
| }, |
| { |
| "epoch": 1.4901982348574414, |
| "grad_norm": 0.9573367834091187, |
| "learning_rate": 2.5279662655483233e-05, |
| "loss": 0.48062808990478517, |
| "step": 54200 |
| }, |
| { |
| "epoch": 1.4956971213329302, |
| "grad_norm": 1.292158603668213, |
| "learning_rate": 2.51875926454476e-05, |
| "loss": 0.487774658203125, |
| "step": 54400 |
| }, |
| { |
| "epoch": 1.5011960078084188, |
| "grad_norm": 1.4202347993850708, |
| "learning_rate": 2.5095522635411968e-05, |
| "loss": 0.4807822799682617, |
| "step": 54600 |
| }, |
| { |
| "epoch": 1.5066948942839073, |
| "grad_norm": 1.5612984895706177, |
| "learning_rate": 2.500345262537634e-05, |
| "loss": 0.4789771270751953, |
| "step": 54800 |
| }, |
| { |
| "epoch": 1.5121937807593961, |
| "grad_norm": 0.886279821395874, |
| "learning_rate": 2.4911382615340706e-05, |
| "loss": 0.482733268737793, |
| "step": 55000 |
| }, |
| { |
| "epoch": 1.517692667234885, |
| "grad_norm": 1.2323397397994995, |
| "learning_rate": 2.4819312605305076e-05, |
| "loss": 0.48148895263671876, |
| "step": 55200 |
| }, |
| { |
| "epoch": 1.5231915537103737, |
| "grad_norm": 1.1137135028839111, |
| "learning_rate": 2.4727242595269447e-05, |
| "loss": 0.48247013092041013, |
| "step": 55400 |
| }, |
| { |
| "epoch": 1.5286904401858623, |
| "grad_norm": 1.1854609251022339, |
| "learning_rate": 2.463517258523381e-05, |
| "loss": 0.48267646789550783, |
| "step": 55600 |
| }, |
| { |
| "epoch": 1.534189326661351, |
| "grad_norm": 1.1057685613632202, |
| "learning_rate": 2.454310257519818e-05, |
| "loss": 0.48411903381347654, |
| "step": 55800 |
| }, |
| { |
| "epoch": 1.5396882131368397, |
| "grad_norm": 1.2663975954055786, |
| "learning_rate": 2.4451032565162552e-05, |
| "loss": 0.4761699295043945, |
| "step": 56000 |
| }, |
| { |
| "epoch": 1.5451870996123285, |
| "grad_norm": 1.0173465013504028, |
| "learning_rate": 2.435896255512692e-05, |
| "loss": 0.48153770446777344, |
| "step": 56200 |
| }, |
| { |
| "epoch": 1.5506859860878173, |
| "grad_norm": 1.0407702922821045, |
| "learning_rate": 2.4266892545091287e-05, |
| "loss": 0.4878800201416016, |
| "step": 56400 |
| }, |
| { |
| "epoch": 1.556184872563306, |
| "grad_norm": 1.0399770736694336, |
| "learning_rate": 2.4174822535055657e-05, |
| "loss": 0.4796760177612305, |
| "step": 56600 |
| }, |
| { |
| "epoch": 1.5616837590387946, |
| "grad_norm": 1.2796666622161865, |
| "learning_rate": 2.4082752525020028e-05, |
| "loss": 0.47880504608154295, |
| "step": 56800 |
| }, |
| { |
| "epoch": 1.5671826455142832, |
| "grad_norm": 1.2479208707809448, |
| "learning_rate": 2.3990682514984395e-05, |
| "loss": 0.47731819152832033, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.572681531989772, |
| "grad_norm": 1.1050926446914673, |
| "learning_rate": 2.3898612504948766e-05, |
| "loss": 0.483460693359375, |
| "step": 57200 |
| }, |
| { |
| "epoch": 1.5781804184652608, |
| "grad_norm": 0.9544827342033386, |
| "learning_rate": 2.3806542494913133e-05, |
| "loss": 0.48048728942871094, |
| "step": 57400 |
| }, |
| { |
| "epoch": 1.5836793049407496, |
| "grad_norm": 1.063852071762085, |
| "learning_rate": 2.37144724848775e-05, |
| "loss": 0.485230827331543, |
| "step": 57600 |
| }, |
| { |
| "epoch": 1.5891781914162382, |
| "grad_norm": 1.1819310188293457, |
| "learning_rate": 2.362240247484187e-05, |
| "loss": 0.480164794921875, |
| "step": 57800 |
| }, |
| { |
| "epoch": 1.594677077891727, |
| "grad_norm": 1.021468162536621, |
| "learning_rate": 2.353033246480624e-05, |
| "loss": 0.4904788589477539, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.6001759643672155, |
| "grad_norm": 1.3577057123184204, |
| "learning_rate": 2.343826245477061e-05, |
| "loss": 0.48077606201171874, |
| "step": 58200 |
| }, |
| { |
| "epoch": 1.6056748508427043, |
| "grad_norm": 1.2617197036743164, |
| "learning_rate": 2.3346192444734976e-05, |
| "loss": 0.4806778717041016, |
| "step": 58400 |
| }, |
| { |
| "epoch": 1.6111737373181931, |
| "grad_norm": 1.2320860624313354, |
| "learning_rate": 2.3254122434699347e-05, |
| "loss": 0.4775208282470703, |
| "step": 58600 |
| }, |
| { |
| "epoch": 1.616672623793682, |
| "grad_norm": 0.9680395126342773, |
| "learning_rate": 2.3162052424663717e-05, |
| "loss": 0.48886814117431643, |
| "step": 58800 |
| }, |
| { |
| "epoch": 1.6221715102691705, |
| "grad_norm": 1.3157929182052612, |
| "learning_rate": 2.3069982414628084e-05, |
| "loss": 0.48573501586914064, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.627670396744659, |
| "grad_norm": 0.900864839553833, |
| "learning_rate": 2.297791240459245e-05, |
| "loss": 0.48609561920166017, |
| "step": 59200 |
| }, |
| { |
| "epoch": 1.6331692832201479, |
| "grad_norm": 1.0947906970977783, |
| "learning_rate": 2.2885842394556822e-05, |
| "loss": 0.4897247314453125, |
| "step": 59400 |
| }, |
| { |
| "epoch": 1.6386681696956367, |
| "grad_norm": 0.816973865032196, |
| "learning_rate": 2.2793772384521193e-05, |
| "loss": 0.47951828002929686, |
| "step": 59600 |
| }, |
| { |
| "epoch": 1.6441670561711255, |
| "grad_norm": 1.2236440181732178, |
| "learning_rate": 2.270170237448556e-05, |
| "loss": 0.4842032241821289, |
| "step": 59800 |
| }, |
| { |
| "epoch": 1.649665942646614, |
| "grad_norm": 1.1023343801498413, |
| "learning_rate": 2.2609632364449927e-05, |
| "loss": 0.4781660461425781, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.6551648291221026, |
| "grad_norm": 0.9589300155639648, |
| "learning_rate": 2.2517562354414298e-05, |
| "loss": 0.47841606140136717, |
| "step": 60200 |
| }, |
| { |
| "epoch": 1.6606637155975914, |
| "grad_norm": 1.3003031015396118, |
| "learning_rate": 2.242549234437867e-05, |
| "loss": 0.48363441467285156, |
| "step": 60400 |
| }, |
| { |
| "epoch": 1.6661626020730802, |
| "grad_norm": 0.9985244870185852, |
| "learning_rate": 2.2333422334343036e-05, |
| "loss": 0.48706722259521484, |
| "step": 60600 |
| }, |
| { |
| "epoch": 1.671661488548569, |
| "grad_norm": 1.319917917251587, |
| "learning_rate": 2.2241352324307403e-05, |
| "loss": 0.4843954086303711, |
| "step": 60800 |
| }, |
| { |
| "epoch": 1.6771603750240578, |
| "grad_norm": 1.3378630876541138, |
| "learning_rate": 2.2149282314271774e-05, |
| "loss": 0.48122127532958986, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.6826592614995464, |
| "grad_norm": 1.0471312999725342, |
| "learning_rate": 2.205721230423614e-05, |
| "loss": 0.48413547515869143, |
| "step": 61200 |
| }, |
| { |
| "epoch": 1.688158147975035, |
| "grad_norm": 1.0439791679382324, |
| "learning_rate": 2.196514229420051e-05, |
| "loss": 0.48604167938232423, |
| "step": 61400 |
| }, |
| { |
| "epoch": 1.6936570344505237, |
| "grad_norm": 0.9854567050933838, |
| "learning_rate": 2.187307228416488e-05, |
| "loss": 0.4817595291137695, |
| "step": 61600 |
| }, |
| { |
| "epoch": 1.6991559209260125, |
| "grad_norm": 1.1079517602920532, |
| "learning_rate": 2.178100227412925e-05, |
| "loss": 0.48393955230712893, |
| "step": 61800 |
| }, |
| { |
| "epoch": 1.7046548074015013, |
| "grad_norm": 1.1403529644012451, |
| "learning_rate": 2.1688932264093617e-05, |
| "loss": 0.47360748291015625, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.71015369387699, |
| "grad_norm": 0.8809356689453125, |
| "learning_rate": 2.1596862254057987e-05, |
| "loss": 0.47694496154785154, |
| "step": 62200 |
| }, |
| { |
| "epoch": 1.7156525803524785, |
| "grad_norm": 0.9528295993804932, |
| "learning_rate": 2.1504792244022358e-05, |
| "loss": 0.4844463348388672, |
| "step": 62400 |
| }, |
| { |
| "epoch": 1.7211514668279673, |
| "grad_norm": 1.0902634859085083, |
| "learning_rate": 2.1412722233986722e-05, |
| "loss": 0.47806488037109374, |
| "step": 62600 |
| }, |
| { |
| "epoch": 1.726650353303456, |
| "grad_norm": 1.0174310207366943, |
| "learning_rate": 2.1320652223951093e-05, |
| "loss": 0.48461170196533204, |
| "step": 62800 |
| }, |
| { |
| "epoch": 1.7321492397789449, |
| "grad_norm": 1.1780657768249512, |
| "learning_rate": 2.1228582213915463e-05, |
| "loss": 0.4868865203857422, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.7376481262544334, |
| "grad_norm": 1.257879614830017, |
| "learning_rate": 2.1136512203879834e-05, |
| "loss": 0.4772517776489258, |
| "step": 63200 |
| }, |
| { |
| "epoch": 1.7431470127299222, |
| "grad_norm": 2.5110182762145996, |
| "learning_rate": 2.10444421938442e-05, |
| "loss": 0.48027557373046875, |
| "step": 63400 |
| }, |
| { |
| "epoch": 1.7486458992054108, |
| "grad_norm": 1.061119556427002, |
| "learning_rate": 2.0952372183808568e-05, |
| "loss": 0.4825307846069336, |
| "step": 63600 |
| }, |
| { |
| "epoch": 1.7541447856808996, |
| "grad_norm": 1.3090649843215942, |
| "learning_rate": 2.086030217377294e-05, |
| "loss": 0.4777912902832031, |
| "step": 63800 |
| }, |
| { |
| "epoch": 1.7596436721563884, |
| "grad_norm": 0.8455436825752258, |
| "learning_rate": 2.0768232163737306e-05, |
| "loss": 0.4868216705322266, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.7651425586318772, |
| "grad_norm": 1.1341484785079956, |
| "learning_rate": 2.0676162153701677e-05, |
| "loss": 0.4825804901123047, |
| "step": 64200 |
| }, |
| { |
| "epoch": 1.7706414451073658, |
| "grad_norm": 0.9106566905975342, |
| "learning_rate": 2.0584092143666044e-05, |
| "loss": 0.480031852722168, |
| "step": 64400 |
| }, |
| { |
| "epoch": 1.7761403315828543, |
| "grad_norm": 0.8978875279426575, |
| "learning_rate": 2.0492022133630415e-05, |
| "loss": 0.48035388946533203, |
| "step": 64600 |
| }, |
| { |
| "epoch": 1.7816392180583431, |
| "grad_norm": 1.508074164390564, |
| "learning_rate": 2.0399952123594782e-05, |
| "loss": 0.4823148727416992, |
| "step": 64800 |
| }, |
| { |
| "epoch": 1.787138104533832, |
| "grad_norm": 1.0851056575775146, |
| "learning_rate": 2.0307882113559153e-05, |
| "loss": 0.4738383102416992, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.7926369910093207, |
| "grad_norm": 1.0651288032531738, |
| "learning_rate": 2.021581210352352e-05, |
| "loss": 0.4777484130859375, |
| "step": 65200 |
| }, |
| { |
| "epoch": 1.7981358774848093, |
| "grad_norm": 1.3095803260803223, |
| "learning_rate": 2.0123742093487887e-05, |
| "loss": 0.48325523376464846, |
| "step": 65400 |
| }, |
| { |
| "epoch": 1.803634763960298, |
| "grad_norm": 1.1658202409744263, |
| "learning_rate": 2.0031672083452258e-05, |
| "loss": 0.4814822769165039, |
| "step": 65600 |
| }, |
| { |
| "epoch": 1.8091336504357867, |
| "grad_norm": 0.974337637424469, |
| "learning_rate": 1.9939602073416628e-05, |
| "loss": 0.47399234771728516, |
| "step": 65800 |
| }, |
| { |
| "epoch": 1.8146325369112755, |
| "grad_norm": 0.914979875087738, |
| "learning_rate": 1.9847532063380995e-05, |
| "loss": 0.48681838989257814, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.8201314233867643, |
| "grad_norm": 0.7990674376487732, |
| "learning_rate": 1.9755462053345363e-05, |
| "loss": 0.47843902587890624, |
| "step": 66200 |
| }, |
| { |
| "epoch": 1.825630309862253, |
| "grad_norm": 1.2652182579040527, |
| "learning_rate": 1.9663392043309733e-05, |
| "loss": 0.4840336990356445, |
| "step": 66400 |
| }, |
| { |
| "epoch": 1.8311291963377416, |
| "grad_norm": 0.9367465376853943, |
| "learning_rate": 1.9571322033274104e-05, |
| "loss": 0.48031715393066404, |
| "step": 66600 |
| }, |
| { |
| "epoch": 1.8366280828132302, |
| "grad_norm": 0.9445034861564636, |
| "learning_rate": 1.947925202323847e-05, |
| "loss": 0.48153636932373045, |
| "step": 66800 |
| }, |
| { |
| "epoch": 1.842126969288719, |
| "grad_norm": 1.062595009803772, |
| "learning_rate": 1.938718201320284e-05, |
| "loss": 0.4798342514038086, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.8476258557642078, |
| "grad_norm": 1.0887633562088013, |
| "learning_rate": 1.929511200316721e-05, |
| "loss": 0.4826160430908203, |
| "step": 67200 |
| }, |
| { |
| "epoch": 1.8531247422396966, |
| "grad_norm": 1.4558460712432861, |
| "learning_rate": 1.920304199313158e-05, |
| "loss": 0.48820636749267576, |
| "step": 67400 |
| }, |
| { |
| "epoch": 1.8586236287151852, |
| "grad_norm": 0.9983727931976318, |
| "learning_rate": 1.9110971983095947e-05, |
| "loss": 0.4826961135864258, |
| "step": 67600 |
| }, |
| { |
| "epoch": 1.8641225151906737, |
| "grad_norm": 0.9502201676368713, |
| "learning_rate": 1.9018901973060314e-05, |
| "loss": 0.4772541046142578, |
| "step": 67800 |
| }, |
| { |
| "epoch": 1.8696214016661625, |
| "grad_norm": 0.9462329149246216, |
| "learning_rate": 1.8926831963024685e-05, |
| "loss": 0.4827272415161133, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.8751202881416513, |
| "grad_norm": 1.2585595846176147, |
| "learning_rate": 1.8834761952989056e-05, |
| "loss": 0.48325294494628906, |
| "step": 68200 |
| }, |
| { |
| "epoch": 1.8806191746171401, |
| "grad_norm": 1.0165777206420898, |
| "learning_rate": 1.8742691942953423e-05, |
| "loss": 0.4868499755859375, |
| "step": 68400 |
| }, |
| { |
| "epoch": 1.8861180610926287, |
| "grad_norm": 1.1448917388916016, |
| "learning_rate": 1.8650621932917793e-05, |
| "loss": 0.47457069396972656, |
| "step": 68600 |
| }, |
| { |
| "epoch": 1.8916169475681175, |
| "grad_norm": 0.9723443984985352, |
| "learning_rate": 1.855855192288216e-05, |
| "loss": 0.4808235168457031, |
| "step": 68800 |
| }, |
| { |
| "epoch": 1.897115834043606, |
| "grad_norm": 1.8042104244232178, |
| "learning_rate": 1.8466481912846528e-05, |
| "loss": 0.4818389892578125, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.9026147205190949, |
| "grad_norm": 1.1425598859786987, |
| "learning_rate": 1.83744119028109e-05, |
| "loss": 0.47744728088378907, |
| "step": 69200 |
| }, |
| { |
| "epoch": 1.9081136069945837, |
| "grad_norm": 1.3648266792297363, |
| "learning_rate": 1.828234189277527e-05, |
| "loss": 0.47696762084960936, |
| "step": 69400 |
| }, |
| { |
| "epoch": 1.9136124934700725, |
| "grad_norm": 1.2545722723007202, |
| "learning_rate": 1.8190271882739636e-05, |
| "loss": 0.4733824920654297, |
| "step": 69600 |
| }, |
| { |
| "epoch": 1.919111379945561, |
| "grad_norm": 1.1813223361968994, |
| "learning_rate": 1.8098201872704004e-05, |
| "loss": 0.4728484344482422, |
| "step": 69800 |
| }, |
| { |
| "epoch": 1.9246102664210496, |
| "grad_norm": 1.2796030044555664, |
| "learning_rate": 1.8006131862668374e-05, |
| "loss": 0.4804762649536133, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.9301091528965384, |
| "grad_norm": 1.3735687732696533, |
| "learning_rate": 1.7914061852632745e-05, |
| "loss": 0.4790033721923828, |
| "step": 70200 |
| }, |
| { |
| "epoch": 1.9356080393720272, |
| "grad_norm": 1.2554829120635986, |
| "learning_rate": 1.7821991842597112e-05, |
| "loss": 0.48504138946533204, |
| "step": 70400 |
| }, |
| { |
| "epoch": 1.941106925847516, |
| "grad_norm": 1.08273184299469, |
| "learning_rate": 1.772992183256148e-05, |
| "loss": 0.4772909545898438, |
| "step": 70600 |
| }, |
| { |
| "epoch": 1.9466058123230046, |
| "grad_norm": 0.6954657435417175, |
| "learning_rate": 1.763785182252585e-05, |
| "loss": 0.49507545471191405, |
| "step": 70800 |
| }, |
| { |
| "epoch": 1.9521046987984934, |
| "grad_norm": 1.014246940612793, |
| "learning_rate": 1.754578181249022e-05, |
| "loss": 0.4824806213378906, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.957603585273982, |
| "grad_norm": 1.005923867225647, |
| "learning_rate": 1.7453711802454588e-05, |
| "loss": 0.4811605453491211, |
| "step": 71200 |
| }, |
| { |
| "epoch": 1.9631024717494707, |
| "grad_norm": 1.1930160522460938, |
| "learning_rate": 1.7361641792418955e-05, |
| "loss": 0.4723471450805664, |
| "step": 71400 |
| }, |
| { |
| "epoch": 1.9686013582249595, |
| "grad_norm": 1.132750153541565, |
| "learning_rate": 1.7269571782383326e-05, |
| "loss": 0.4810772323608398, |
| "step": 71600 |
| }, |
| { |
| "epoch": 1.9741002447004483, |
| "grad_norm": 1.2968944311141968, |
| "learning_rate": 1.7177501772347693e-05, |
| "loss": 0.47881488800048827, |
| "step": 71800 |
| }, |
| { |
| "epoch": 1.9795991311759369, |
| "grad_norm": 1.342724084854126, |
| "learning_rate": 1.7085431762312064e-05, |
| "loss": 0.4765338134765625, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.9850980176514255, |
| "grad_norm": 1.0654747486114502, |
| "learning_rate": 1.699336175227643e-05, |
| "loss": 0.4823237228393555, |
| "step": 72200 |
| }, |
| { |
| "epoch": 1.9905969041269143, |
| "grad_norm": 1.0994575023651123, |
| "learning_rate": 1.69012917422408e-05, |
| "loss": 0.48160724639892577, |
| "step": 72400 |
| }, |
| { |
| "epoch": 1.996095790602403, |
| "grad_norm": 1.0896570682525635, |
| "learning_rate": 1.680922173220517e-05, |
| "loss": 0.4756970977783203, |
| "step": 72600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.4625195264816284, |
| "eval_runtime": 158.6666, |
| "eval_samples_per_second": 407.515, |
| "eval_steps_per_second": 25.475, |
| "step": 72742 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 109113, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.875850713799066e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|