{ "best_global_step": 86262, "best_metric": 0.5954480213245847, "best_model_checkpoint": "outputs/cloudopsbert/hdfs-distilbert-base-uncased/checkpoint-86262", "epoch": 3.0, "eval_steps": 500, "global_step": 86262, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017388885024692217, "grad_norm": 0.21531887352466583, "learning_rate": 2.99829588926758e-05, "loss": 0.6712, "step": 50 }, { "epoch": 0.0034777770049384434, "grad_norm": 2.1512222290039062, "learning_rate": 2.996557000765111e-05, "loss": 0.8574, "step": 100 }, { "epoch": 0.005216665507407665, "grad_norm": 1.2049638032913208, "learning_rate": 2.9948181122626417e-05, "loss": 0.7598, "step": 150 }, { "epoch": 0.006955554009876887, "grad_norm": 11.5806245803833, "learning_rate": 2.9930792237601727e-05, "loss": 0.8228, "step": 200 }, { "epoch": 0.008694442512346108, "grad_norm": 8.717599868774414, "learning_rate": 2.9913403352577033e-05, "loss": 0.686, "step": 250 }, { "epoch": 0.01043333101481533, "grad_norm": 15.572234153747559, "learning_rate": 2.9896014467552342e-05, "loss": 0.8995, "step": 300 }, { "epoch": 0.012172219517284552, "grad_norm": 0.22520902752876282, "learning_rate": 2.987862558252765e-05, "loss": 0.6724, "step": 350 }, { "epoch": 0.013911108019753773, "grad_norm": 13.658159255981445, "learning_rate": 2.9861236697502958e-05, "loss": 0.5711, "step": 400 }, { "epoch": 0.015649996522222993, "grad_norm": 13.239530563354492, "learning_rate": 2.9843847812478264e-05, "loss": 0.6995, "step": 450 }, { "epoch": 0.017388885024692217, "grad_norm": 0.2823483943939209, "learning_rate": 2.9826458927453574e-05, "loss": 0.7025, "step": 500 }, { "epoch": 0.019127773527161437, "grad_norm": 14.249059677124023, "learning_rate": 2.980907004242888e-05, "loss": 0.5872, "step": 550 }, { "epoch": 0.02086666202963066, "grad_norm": 14.599956512451172, "learning_rate": 2.979168115740419e-05, "loss": 0.4677, "step": 600 }, { "epoch": 0.02260555053209988, "grad_norm": 0.1795855164527893, "learning_rate": 2.9774292272379496e-05, "loss": 0.5303, "step": 650 }, { "epoch": 0.024344439034569103, "grad_norm": 0.6893061995506287, "learning_rate": 2.9756903387354802e-05, "loss": 0.9625, "step": 700 }, { "epoch": 0.026083327537038323, "grad_norm": 7.416564464569092, "learning_rate": 2.973951450233011e-05, "loss": 0.6656, "step": 750 }, { "epoch": 0.027822216039507547, "grad_norm": 0.3595174551010132, "learning_rate": 2.9722125617305418e-05, "loss": 0.7361, "step": 800 }, { "epoch": 0.029561104541976767, "grad_norm": 11.918978691101074, "learning_rate": 2.9704736732280727e-05, "loss": 0.8825, "step": 850 }, { "epoch": 0.03129999304444599, "grad_norm": 0.23652283847332, "learning_rate": 2.9687347847256033e-05, "loss": 0.5662, "step": 900 }, { "epoch": 0.033038881546915214, "grad_norm": 13.681031227111816, "learning_rate": 2.9669958962231343e-05, "loss": 0.8632, "step": 950 }, { "epoch": 0.034777770049384434, "grad_norm": 7.932927131652832, "learning_rate": 2.965257007720665e-05, "loss": 0.6999, "step": 1000 }, { "epoch": 0.036516658551853654, "grad_norm": 0.11135145276784897, "learning_rate": 2.963518119218196e-05, "loss": 0.7414, "step": 1050 }, { "epoch": 0.03825554705432287, "grad_norm": 14.187750816345215, "learning_rate": 2.9617792307157265e-05, "loss": 0.5595, "step": 1100 }, { "epoch": 0.0399944355567921, "grad_norm": 0.2938290238380432, "learning_rate": 2.9600403422132575e-05, "loss": 0.7492, "step": 1150 }, { "epoch": 0.04173332405926132, "grad_norm": 0.25605008006095886, "learning_rate": 2.958301453710788e-05, "loss": 0.5284, "step": 1200 }, { "epoch": 0.04347221256173054, "grad_norm": 0.04060988128185272, "learning_rate": 2.956562565208319e-05, "loss": 0.5777, "step": 1250 }, { "epoch": 0.04521110106419976, "grad_norm": 16.17035675048828, "learning_rate": 2.9548236767058496e-05, "loss": 0.6612, "step": 1300 }, { "epoch": 0.04694998956666899, "grad_norm": 0.26229363679885864, "learning_rate": 2.9530847882033806e-05, "loss": 0.6118, "step": 1350 }, { "epoch": 0.04868887806913821, "grad_norm": 0.20632320642471313, "learning_rate": 2.9513458997009112e-05, "loss": 0.5305, "step": 1400 }, { "epoch": 0.05042776657160743, "grad_norm": 0.3919838070869446, "learning_rate": 2.9496070111984422e-05, "loss": 0.8385, "step": 1450 }, { "epoch": 0.05216665507407665, "grad_norm": 0.586615800857544, "learning_rate": 2.9478681226959728e-05, "loss": 0.8748, "step": 1500 }, { "epoch": 0.053905543576545874, "grad_norm": 0.34778815507888794, "learning_rate": 2.9461292341935038e-05, "loss": 0.6702, "step": 1550 }, { "epoch": 0.055644432079015094, "grad_norm": 0.19469404220581055, "learning_rate": 2.9443903456910344e-05, "loss": 0.8877, "step": 1600 }, { "epoch": 0.057383320581484314, "grad_norm": 0.2561992406845093, "learning_rate": 2.9426514571885653e-05, "loss": 0.6033, "step": 1650 }, { "epoch": 0.059122209083953534, "grad_norm": 13.005464553833008, "learning_rate": 2.940912568686096e-05, "loss": 0.9775, "step": 1700 }, { "epoch": 0.06086109758642276, "grad_norm": 0.2699541449546814, "learning_rate": 2.939173680183627e-05, "loss": 0.4454, "step": 1750 }, { "epoch": 0.06259998608889197, "grad_norm": 0.2656519114971161, "learning_rate": 2.9374347916811575e-05, "loss": 0.682, "step": 1800 }, { "epoch": 0.0643388745913612, "grad_norm": 0.0975981280207634, "learning_rate": 2.935695903178688e-05, "loss": 0.5488, "step": 1850 }, { "epoch": 0.06607776309383043, "grad_norm": 0.6183196902275085, "learning_rate": 2.9339570146762188e-05, "loss": 0.9071, "step": 1900 }, { "epoch": 0.06781665159629964, "grad_norm": 0.23420625925064087, "learning_rate": 2.9322181261737497e-05, "loss": 0.606, "step": 1950 }, { "epoch": 0.06955554009876887, "grad_norm": 0.18833336234092712, "learning_rate": 2.9304792376712803e-05, "loss": 0.4169, "step": 2000 }, { "epoch": 0.0712944286012381, "grad_norm": 0.11877614259719849, "learning_rate": 2.9287403491688113e-05, "loss": 0.8598, "step": 2050 }, { "epoch": 0.07303331710370731, "grad_norm": 5.578752517700195, "learning_rate": 2.927001460666342e-05, "loss": 0.8515, "step": 2100 }, { "epoch": 0.07477220560617653, "grad_norm": 0.5145632028579712, "learning_rate": 2.925262572163873e-05, "loss": 0.877, "step": 2150 }, { "epoch": 0.07651109410864575, "grad_norm": 0.4721980392932892, "learning_rate": 2.9235236836614035e-05, "loss": 0.7244, "step": 2200 }, { "epoch": 0.07824998261111497, "grad_norm": 0.26050278544425964, "learning_rate": 2.9217847951589344e-05, "loss": 0.5152, "step": 2250 }, { "epoch": 0.0799888711135842, "grad_norm": 14.50300121307373, "learning_rate": 2.9200459066564654e-05, "loss": 0.6521, "step": 2300 }, { "epoch": 0.08172775961605341, "grad_norm": 0.34740573167800903, "learning_rate": 2.918307018153996e-05, "loss": 0.6718, "step": 2350 }, { "epoch": 0.08346664811852264, "grad_norm": 14.538580894470215, "learning_rate": 2.916568129651527e-05, "loss": 0.7105, "step": 2400 }, { "epoch": 0.08520553662099187, "grad_norm": 0.2549963593482971, "learning_rate": 2.9148292411490576e-05, "loss": 0.6733, "step": 2450 }, { "epoch": 0.08694442512346108, "grad_norm": 0.040352508425712585, "learning_rate": 2.9130903526465885e-05, "loss": 0.7584, "step": 2500 }, { "epoch": 0.08868331362593031, "grad_norm": 13.189627647399902, "learning_rate": 2.911351464144119e-05, "loss": 0.7576, "step": 2550 }, { "epoch": 0.09042220212839952, "grad_norm": 0.3856923580169678, "learning_rate": 2.90961257564165e-05, "loss": 0.6788, "step": 2600 }, { "epoch": 0.09216109063086875, "grad_norm": 0.5096640586853027, "learning_rate": 2.9078736871391807e-05, "loss": 0.8286, "step": 2650 }, { "epoch": 0.09389997913333797, "grad_norm": 0.343321830034256, "learning_rate": 2.9061347986367117e-05, "loss": 0.6074, "step": 2700 }, { "epoch": 0.09563886763580719, "grad_norm": 0.36206918954849243, "learning_rate": 2.9043959101342423e-05, "loss": 0.8074, "step": 2750 }, { "epoch": 0.09737775613827641, "grad_norm": 0.33359429240226746, "learning_rate": 2.9026570216317733e-05, "loss": 0.6441, "step": 2800 }, { "epoch": 0.09911664464074564, "grad_norm": 0.1139458566904068, "learning_rate": 2.900918133129304e-05, "loss": 0.6867, "step": 2850 }, { "epoch": 0.10085553314321485, "grad_norm": 8.371479988098145, "learning_rate": 2.899179244626835e-05, "loss": 0.8066, "step": 2900 }, { "epoch": 0.10259442164568408, "grad_norm": 0.10019742697477341, "learning_rate": 2.8974403561243655e-05, "loss": 0.5955, "step": 2950 }, { "epoch": 0.1043333101481533, "grad_norm": 0.40135741233825684, "learning_rate": 2.8957014676218964e-05, "loss": 0.7127, "step": 3000 }, { "epoch": 0.10607219865062252, "grad_norm": 0.28914302587509155, "learning_rate": 2.8939625791194267e-05, "loss": 0.6954, "step": 3050 }, { "epoch": 0.10781108715309175, "grad_norm": 7.691045761108398, "learning_rate": 2.8922236906169577e-05, "loss": 0.7763, "step": 3100 }, { "epoch": 0.10954997565556096, "grad_norm": 12.460494041442871, "learning_rate": 2.8904848021144883e-05, "loss": 0.542, "step": 3150 }, { "epoch": 0.11128886415803019, "grad_norm": 0.38156870007514954, "learning_rate": 2.8887459136120192e-05, "loss": 0.806, "step": 3200 }, { "epoch": 0.11302775266049941, "grad_norm": 0.39273130893707275, "learning_rate": 2.88700702510955e-05, "loss": 0.6205, "step": 3250 }, { "epoch": 0.11476664116296863, "grad_norm": 0.29124337434768677, "learning_rate": 2.8852681366070808e-05, "loss": 0.7055, "step": 3300 }, { "epoch": 0.11650552966543785, "grad_norm": 0.34728166460990906, "learning_rate": 2.8835292481046114e-05, "loss": 0.6858, "step": 3350 }, { "epoch": 0.11824441816790707, "grad_norm": 8.108372688293457, "learning_rate": 2.8817903596021424e-05, "loss": 0.7907, "step": 3400 }, { "epoch": 0.1199833066703763, "grad_norm": 0.22226455807685852, "learning_rate": 2.880051471099673e-05, "loss": 0.4798, "step": 3450 }, { "epoch": 0.12172219517284552, "grad_norm": 0.27798640727996826, "learning_rate": 2.878312582597204e-05, "loss": 0.821, "step": 3500 }, { "epoch": 0.12346108367531473, "grad_norm": 0.30928412079811096, "learning_rate": 2.8765736940947346e-05, "loss": 0.7677, "step": 3550 }, { "epoch": 0.12519997217778395, "grad_norm": 14.631339073181152, "learning_rate": 2.8748348055922655e-05, "loss": 0.3127, "step": 3600 }, { "epoch": 0.12693886068025317, "grad_norm": 0.05776110291481018, "learning_rate": 2.873095917089796e-05, "loss": 0.6036, "step": 3650 }, { "epoch": 0.1286777491827224, "grad_norm": 0.19491440057754517, "learning_rate": 2.871357028587327e-05, "loss": 0.4779, "step": 3700 }, { "epoch": 0.13041663768519163, "grad_norm": 0.5055209994316101, "learning_rate": 2.8696181400848577e-05, "loss": 1.0301, "step": 3750 }, { "epoch": 0.13215552618766085, "grad_norm": 0.14795105159282684, "learning_rate": 2.8678792515823887e-05, "loss": 0.811, "step": 3800 }, { "epoch": 0.13389441469013008, "grad_norm": 0.22891080379486084, "learning_rate": 2.8661403630799193e-05, "loss": 0.3948, "step": 3850 }, { "epoch": 0.13563330319259928, "grad_norm": 0.41760048270225525, "learning_rate": 2.8644014745774502e-05, "loss": 0.7961, "step": 3900 }, { "epoch": 0.1373721916950685, "grad_norm": 6.169968605041504, "learning_rate": 2.8626625860749812e-05, "loss": 0.7672, "step": 3950 }, { "epoch": 0.13911108019753773, "grad_norm": 14.142441749572754, "learning_rate": 2.8609236975725118e-05, "loss": 0.6765, "step": 4000 }, { "epoch": 0.14084996870000696, "grad_norm": 0.43269848823547363, "learning_rate": 2.8591848090700428e-05, "loss": 0.7438, "step": 4050 }, { "epoch": 0.1425888572024762, "grad_norm": 13.631083488464355, "learning_rate": 2.8574459205675734e-05, "loss": 0.8063, "step": 4100 }, { "epoch": 0.1443277457049454, "grad_norm": 0.36307293176651, "learning_rate": 2.8557070320651044e-05, "loss": 0.6498, "step": 4150 }, { "epoch": 0.14606663420741461, "grad_norm": 0.32387998700141907, "learning_rate": 2.8539681435626346e-05, "loss": 0.7749, "step": 4200 }, { "epoch": 0.14780552270988384, "grad_norm": 0.32630008459091187, "learning_rate": 2.8522292550601656e-05, "loss": 0.6459, "step": 4250 }, { "epoch": 0.14954441121235307, "grad_norm": 0.38263922929763794, "learning_rate": 2.8504903665576962e-05, "loss": 0.7011, "step": 4300 }, { "epoch": 0.1512832997148223, "grad_norm": 13.226205825805664, "learning_rate": 2.848751478055227e-05, "loss": 0.705, "step": 4350 }, { "epoch": 0.1530221882172915, "grad_norm": 0.4142023026943207, "learning_rate": 2.8470125895527578e-05, "loss": 0.7636, "step": 4400 }, { "epoch": 0.15476107671976072, "grad_norm": 0.368549108505249, "learning_rate": 2.8452737010502887e-05, "loss": 0.6311, "step": 4450 }, { "epoch": 0.15649996522222995, "grad_norm": 7.875992774963379, "learning_rate": 2.8435348125478194e-05, "loss": 0.6996, "step": 4500 }, { "epoch": 0.15823885372469917, "grad_norm": 0.09905952960252762, "learning_rate": 2.8417959240453503e-05, "loss": 0.7064, "step": 4550 }, { "epoch": 0.1599777422271684, "grad_norm": 8.343487739562988, "learning_rate": 2.840057035542881e-05, "loss": 0.6609, "step": 4600 }, { "epoch": 0.16171663072963763, "grad_norm": 0.1153111383318901, "learning_rate": 2.838318147040412e-05, "loss": 0.7798, "step": 4650 }, { "epoch": 0.16345551923210683, "grad_norm": 0.314816415309906, "learning_rate": 2.8365792585379425e-05, "loss": 0.5917, "step": 4700 }, { "epoch": 0.16519440773457605, "grad_norm": 13.283761024475098, "learning_rate": 2.8348403700354735e-05, "loss": 0.8163, "step": 4750 }, { "epoch": 0.16693329623704528, "grad_norm": 13.551874160766602, "learning_rate": 2.833101481533004e-05, "loss": 0.7968, "step": 4800 }, { "epoch": 0.1686721847395145, "grad_norm": 13.574830055236816, "learning_rate": 2.831362593030535e-05, "loss": 0.8068, "step": 4850 }, { "epoch": 0.17041107324198373, "grad_norm": 0.3623298704624176, "learning_rate": 2.8296237045280657e-05, "loss": 0.7471, "step": 4900 }, { "epoch": 0.17214996174445293, "grad_norm": 15.73062515258789, "learning_rate": 2.8278848160255966e-05, "loss": 0.5662, "step": 4950 }, { "epoch": 0.17388885024692216, "grad_norm": 0.0957670658826828, "learning_rate": 2.8261459275231272e-05, "loss": 0.7556, "step": 5000 }, { "epoch": 0.1756277387493914, "grad_norm": 0.2196839153766632, "learning_rate": 2.8244070390206582e-05, "loss": 0.4344, "step": 5050 }, { "epoch": 0.17736662725186061, "grad_norm": 0.33694714307785034, "learning_rate": 2.8226681505181888e-05, "loss": 0.9018, "step": 5100 }, { "epoch": 0.17910551575432984, "grad_norm": 15.823934555053711, "learning_rate": 2.8209292620157198e-05, "loss": 0.8504, "step": 5150 }, { "epoch": 0.18084440425679904, "grad_norm": 0.34615108370780945, "learning_rate": 2.8191903735132504e-05, "loss": 0.5093, "step": 5200 }, { "epoch": 0.18258329275926827, "grad_norm": 14.107118606567383, "learning_rate": 2.8174514850107813e-05, "loss": 0.8394, "step": 5250 }, { "epoch": 0.1843221812617375, "grad_norm": 0.08523888885974884, "learning_rate": 2.815712596508312e-05, "loss": 0.3969, "step": 5300 }, { "epoch": 0.18606106976420672, "grad_norm": 13.68999195098877, "learning_rate": 2.813973708005843e-05, "loss": 0.9264, "step": 5350 }, { "epoch": 0.18779995826667595, "grad_norm": 0.12612222135066986, "learning_rate": 2.8122348195033732e-05, "loss": 0.7111, "step": 5400 }, { "epoch": 0.18953884676914518, "grad_norm": 0.29172536730766296, "learning_rate": 2.810495931000904e-05, "loss": 0.5145, "step": 5450 }, { "epoch": 0.19127773527161437, "grad_norm": 0.49600592255592346, "learning_rate": 2.808757042498435e-05, "loss": 0.934, "step": 5500 }, { "epoch": 0.1930166237740836, "grad_norm": 13.723418235778809, "learning_rate": 2.8070181539959657e-05, "loss": 0.5114, "step": 5550 }, { "epoch": 0.19475551227655283, "grad_norm": 0.35133516788482666, "learning_rate": 2.8052792654934967e-05, "loss": 0.827, "step": 5600 }, { "epoch": 0.19649440077902205, "grad_norm": 0.15841996669769287, "learning_rate": 2.8035403769910273e-05, "loss": 0.8583, "step": 5650 }, { "epoch": 0.19823328928149128, "grad_norm": 0.3962007164955139, "learning_rate": 2.8018014884885583e-05, "loss": 0.7394, "step": 5700 }, { "epoch": 0.19997217778396048, "grad_norm": 0.35625603795051575, "learning_rate": 2.800062599986089e-05, "loss": 0.5387, "step": 5750 }, { "epoch": 0.2017110662864297, "grad_norm": 13.912430763244629, "learning_rate": 2.7983237114836198e-05, "loss": 0.7052, "step": 5800 }, { "epoch": 0.20344995478889893, "grad_norm": 0.433989018201828, "learning_rate": 2.7965848229811504e-05, "loss": 0.7454, "step": 5850 }, { "epoch": 0.20518884329136816, "grad_norm": 0.35053467750549316, "learning_rate": 2.7948459344786814e-05, "loss": 0.6074, "step": 5900 }, { "epoch": 0.2069277317938374, "grad_norm": 11.318763732910156, "learning_rate": 2.793107045976212e-05, "loss": 0.6796, "step": 5950 }, { "epoch": 0.2086666202963066, "grad_norm": 14.512580871582031, "learning_rate": 2.791368157473743e-05, "loss": 0.4652, "step": 6000 }, { "epoch": 0.21040550879877581, "grad_norm": 0.13117973506450653, "learning_rate": 2.7896292689712736e-05, "loss": 0.9245, "step": 6050 }, { "epoch": 0.21214439730124504, "grad_norm": 12.345372200012207, "learning_rate": 2.7878903804688046e-05, "loss": 0.9304, "step": 6100 }, { "epoch": 0.21388328580371427, "grad_norm": 0.29051685333251953, "learning_rate": 2.7861514919663352e-05, "loss": 0.5536, "step": 6150 }, { "epoch": 0.2156221743061835, "grad_norm": 15.800280570983887, "learning_rate": 2.784412603463866e-05, "loss": 0.3204, "step": 6200 }, { "epoch": 0.21736106280865272, "grad_norm": 14.32712459564209, "learning_rate": 2.7826737149613967e-05, "loss": 0.9041, "step": 6250 }, { "epoch": 0.21909995131112192, "grad_norm": 0.4081219732761383, "learning_rate": 2.7809348264589277e-05, "loss": 0.6956, "step": 6300 }, { "epoch": 0.22083883981359115, "grad_norm": 0.05356422811746597, "learning_rate": 2.7791959379564583e-05, "loss": 0.6077, "step": 6350 }, { "epoch": 0.22257772831606037, "grad_norm": 0.10085748136043549, "learning_rate": 2.7774570494539893e-05, "loss": 0.7014, "step": 6400 }, { "epoch": 0.2243166168185296, "grad_norm": 0.25164371728897095, "learning_rate": 2.77571816095152e-05, "loss": 0.5981, "step": 6450 }, { "epoch": 0.22605550532099883, "grad_norm": 0.3360247313976288, "learning_rate": 2.773979272449051e-05, "loss": 0.7279, "step": 6500 }, { "epoch": 0.22779439382346803, "grad_norm": 0.38315820693969727, "learning_rate": 2.7722403839465815e-05, "loss": 0.7773, "step": 6550 }, { "epoch": 0.22953328232593725, "grad_norm": 0.530902624130249, "learning_rate": 2.770501495444112e-05, "loss": 1.2086, "step": 6600 }, { "epoch": 0.23127217082840648, "grad_norm": 13.965437889099121, "learning_rate": 2.7687626069416427e-05, "loss": 0.8157, "step": 6650 }, { "epoch": 0.2330110593308757, "grad_norm": 13.598339080810547, "learning_rate": 2.7670237184391737e-05, "loss": 0.6749, "step": 6700 }, { "epoch": 0.23474994783334494, "grad_norm": 0.32207340002059937, "learning_rate": 2.7652848299367043e-05, "loss": 0.6262, "step": 6750 }, { "epoch": 0.23648883633581413, "grad_norm": 16.014665603637695, "learning_rate": 2.7635459414342352e-05, "loss": 0.678, "step": 6800 }, { "epoch": 0.23822772483828336, "grad_norm": 0.1546684205532074, "learning_rate": 2.761807052931766e-05, "loss": 0.9721, "step": 6850 }, { "epoch": 0.2399666133407526, "grad_norm": 0.07373414933681488, "learning_rate": 2.7600681644292968e-05, "loss": 0.4793, "step": 6900 }, { "epoch": 0.24170550184322181, "grad_norm": 14.168567657470703, "learning_rate": 2.7583292759268274e-05, "loss": 0.9056, "step": 6950 }, { "epoch": 0.24344439034569104, "grad_norm": 13.444692611694336, "learning_rate": 2.7565903874243584e-05, "loss": 0.886, "step": 7000 }, { "epoch": 0.24518327884816027, "grad_norm": 0.3934480845928192, "learning_rate": 2.754851498921889e-05, "loss": 0.7522, "step": 7050 }, { "epoch": 0.24692216735062947, "grad_norm": 0.4108699858188629, "learning_rate": 2.75311261041942e-05, "loss": 0.7274, "step": 7100 }, { "epoch": 0.2486610558530987, "grad_norm": 0.14744871854782104, "learning_rate": 2.751373721916951e-05, "loss": 0.8179, "step": 7150 }, { "epoch": 0.2503999443555679, "grad_norm": 0.3611641526222229, "learning_rate": 2.7496348334144815e-05, "loss": 0.6383, "step": 7200 }, { "epoch": 0.2521388328580371, "grad_norm": 0.3606438934803009, "learning_rate": 2.7478959449120125e-05, "loss": 0.7365, "step": 7250 }, { "epoch": 0.25387772136050635, "grad_norm": 0.30051174759864807, "learning_rate": 2.746157056409543e-05, "loss": 0.6339, "step": 7300 }, { "epoch": 0.2556166098629756, "grad_norm": 13.22135066986084, "learning_rate": 2.744418167907074e-05, "loss": 0.6895, "step": 7350 }, { "epoch": 0.2573554983654448, "grad_norm": 13.905820846557617, "learning_rate": 2.7426792794046047e-05, "loss": 0.634, "step": 7400 }, { "epoch": 0.25909438686791403, "grad_norm": 0.2789829671382904, "learning_rate": 2.7409403909021356e-05, "loss": 0.5152, "step": 7450 }, { "epoch": 0.26083327537038326, "grad_norm": 0.36220815777778625, "learning_rate": 2.7392015023996663e-05, "loss": 0.8707, "step": 7500 }, { "epoch": 0.2625721638728525, "grad_norm": 13.259411811828613, "learning_rate": 2.7374626138971972e-05, "loss": 0.7467, "step": 7550 }, { "epoch": 0.2643110523753217, "grad_norm": 0.1467266082763672, "learning_rate": 2.735723725394728e-05, "loss": 0.7358, "step": 7600 }, { "epoch": 0.26604994087779094, "grad_norm": 0.39075198769569397, "learning_rate": 2.7339848368922588e-05, "loss": 0.6325, "step": 7650 }, { "epoch": 0.26778882938026016, "grad_norm": 0.4423547089099884, "learning_rate": 2.7322459483897894e-05, "loss": 0.7598, "step": 7700 }, { "epoch": 0.26952771788272933, "grad_norm": 0.41004419326782227, "learning_rate": 2.73050705988732e-05, "loss": 0.6707, "step": 7750 }, { "epoch": 0.27126660638519856, "grad_norm": 0.42505770921707153, "learning_rate": 2.7287681713848506e-05, "loss": 0.7144, "step": 7800 }, { "epoch": 0.2730054948876678, "grad_norm": 0.44134482741355896, "learning_rate": 2.7270292828823816e-05, "loss": 0.7948, "step": 7850 }, { "epoch": 0.274744383390137, "grad_norm": 0.11755936592817307, "learning_rate": 2.7252903943799122e-05, "loss": 0.6841, "step": 7900 }, { "epoch": 0.27648327189260624, "grad_norm": 0.39634180068969727, "learning_rate": 2.7235515058774432e-05, "loss": 0.6799, "step": 7950 }, { "epoch": 0.27822216039507547, "grad_norm": 13.612839698791504, "learning_rate": 2.7218126173749738e-05, "loss": 0.8446, "step": 8000 }, { "epoch": 0.2799610488975447, "grad_norm": 17.32013702392578, "learning_rate": 2.7200737288725048e-05, "loss": 0.4889, "step": 8050 }, { "epoch": 0.2816999374000139, "grad_norm": 14.156835556030273, "learning_rate": 2.7183348403700354e-05, "loss": 0.5951, "step": 8100 }, { "epoch": 0.28343882590248315, "grad_norm": 0.3639606535434723, "learning_rate": 2.7165959518675663e-05, "loss": 0.871, "step": 8150 }, { "epoch": 0.2851777144049524, "grad_norm": 14.879164695739746, "learning_rate": 2.714857063365097e-05, "loss": 0.6309, "step": 8200 }, { "epoch": 0.2869166029074216, "grad_norm": 0.0917825773358345, "learning_rate": 2.713118174862628e-05, "loss": 0.4603, "step": 8250 }, { "epoch": 0.2886554914098908, "grad_norm": 0.09159456193447113, "learning_rate": 2.7113792863601585e-05, "loss": 0.7572, "step": 8300 }, { "epoch": 0.29039437991236, "grad_norm": 0.2536778450012207, "learning_rate": 2.7096403978576895e-05, "loss": 0.6291, "step": 8350 }, { "epoch": 0.29213326841482923, "grad_norm": 0.09246829897165298, "learning_rate": 2.70790150935522e-05, "loss": 0.6749, "step": 8400 }, { "epoch": 0.29387215691729845, "grad_norm": 0.38467949628829956, "learning_rate": 2.706162620852751e-05, "loss": 0.8713, "step": 8450 }, { "epoch": 0.2956110454197677, "grad_norm": 13.252158164978027, "learning_rate": 2.7044237323502817e-05, "loss": 0.8845, "step": 8500 }, { "epoch": 0.2973499339222369, "grad_norm": 0.47461962699890137, "learning_rate": 2.7026848438478126e-05, "loss": 0.7981, "step": 8550 }, { "epoch": 0.29908882242470614, "grad_norm": 14.560328483581543, "learning_rate": 2.7009459553453432e-05, "loss": 0.494, "step": 8600 }, { "epoch": 0.30082771092717536, "grad_norm": 0.3961593806743622, "learning_rate": 2.6992070668428742e-05, "loss": 0.8747, "step": 8650 }, { "epoch": 0.3025665994296446, "grad_norm": 0.13507284224033356, "learning_rate": 2.697468178340405e-05, "loss": 0.8794, "step": 8700 }, { "epoch": 0.3043054879321138, "grad_norm": 0.3387044370174408, "learning_rate": 2.6957292898379358e-05, "loss": 0.5122, "step": 8750 }, { "epoch": 0.306044376434583, "grad_norm": 13.269755363464355, "learning_rate": 2.6939904013354667e-05, "loss": 0.8044, "step": 8800 }, { "epoch": 0.3077832649370522, "grad_norm": 0.06595932692289352, "learning_rate": 2.6922515128329974e-05, "loss": 0.4318, "step": 8850 }, { "epoch": 0.30952215343952144, "grad_norm": 0.4033437967300415, "learning_rate": 2.690512624330528e-05, "loss": 0.921, "step": 8900 }, { "epoch": 0.31126104194199067, "grad_norm": 0.3669593036174774, "learning_rate": 2.6887737358280586e-05, "loss": 0.6567, "step": 8950 }, { "epoch": 0.3129999304444599, "grad_norm": 0.07300108671188354, "learning_rate": 2.6870348473255895e-05, "loss": 0.3988, "step": 9000 }, { "epoch": 0.3147388189469291, "grad_norm": 0.08315572887659073, "learning_rate": 2.68529595882312e-05, "loss": 0.6685, "step": 9050 }, { "epoch": 0.31647770744939835, "grad_norm": 0.12973521649837494, "learning_rate": 2.683557070320651e-05, "loss": 0.9574, "step": 9100 }, { "epoch": 0.3182165959518676, "grad_norm": 0.433816134929657, "learning_rate": 2.6818181818181817e-05, "loss": 0.8079, "step": 9150 }, { "epoch": 0.3199554844543368, "grad_norm": 0.2871556580066681, "learning_rate": 2.6800792933157127e-05, "loss": 0.3992, "step": 9200 }, { "epoch": 0.32169437295680603, "grad_norm": 14.488433837890625, "learning_rate": 2.6783404048132433e-05, "loss": 0.9163, "step": 9250 }, { "epoch": 0.32343326145927526, "grad_norm": 0.20036731660366058, "learning_rate": 2.6766015163107743e-05, "loss": 0.3212, "step": 9300 }, { "epoch": 0.32517214996174443, "grad_norm": 0.44441738724708557, "learning_rate": 2.674862627808305e-05, "loss": 1.0489, "step": 9350 }, { "epoch": 0.32691103846421365, "grad_norm": 15.573990821838379, "learning_rate": 2.673123739305836e-05, "loss": 0.8287, "step": 9400 }, { "epoch": 0.3286499269666829, "grad_norm": 0.525644063949585, "learning_rate": 2.6713848508033665e-05, "loss": 0.6823, "step": 9450 }, { "epoch": 0.3303888154691521, "grad_norm": 0.2272241860628128, "learning_rate": 2.6696459623008974e-05, "loss": 0.2874, "step": 9500 }, { "epoch": 0.33212770397162134, "grad_norm": 15.048293113708496, "learning_rate": 2.667907073798428e-05, "loss": 0.7049, "step": 9550 }, { "epoch": 0.33386659247409056, "grad_norm": 0.35863372683525085, "learning_rate": 2.666168185295959e-05, "loss": 0.8949, "step": 9600 }, { "epoch": 0.3356054809765598, "grad_norm": 13.301305770874023, "learning_rate": 2.6644292967934896e-05, "loss": 0.9477, "step": 9650 }, { "epoch": 0.337344369479029, "grad_norm": 0.11718440800905228, "learning_rate": 2.6626904082910206e-05, "loss": 0.5729, "step": 9700 }, { "epoch": 0.33908325798149824, "grad_norm": 0.3565659821033478, "learning_rate": 2.6609515197885512e-05, "loss": 0.6198, "step": 9750 }, { "epoch": 0.34082214648396747, "grad_norm": 0.11834641546010971, "learning_rate": 2.659212631286082e-05, "loss": 0.7387, "step": 9800 }, { "epoch": 0.3425610349864367, "grad_norm": 0.26392120122909546, "learning_rate": 2.6574737427836128e-05, "loss": 0.4964, "step": 9850 }, { "epoch": 0.34429992348890587, "grad_norm": 0.30910661816596985, "learning_rate": 2.6557348542811437e-05, "loss": 0.8732, "step": 9900 }, { "epoch": 0.3460388119913751, "grad_norm": 14.243000030517578, "learning_rate": 2.6539959657786743e-05, "loss": 0.807, "step": 9950 }, { "epoch": 0.3477777004938443, "grad_norm": 0.5635280609130859, "learning_rate": 2.6522570772762053e-05, "loss": 0.9148, "step": 10000 }, { "epoch": 0.34951658899631355, "grad_norm": 0.13984541594982147, "learning_rate": 2.650518188773736e-05, "loss": 0.6883, "step": 10050 }, { "epoch": 0.3512554774987828, "grad_norm": 0.37048470973968506, "learning_rate": 2.6487793002712665e-05, "loss": 0.6619, "step": 10100 }, { "epoch": 0.352994366001252, "grad_norm": 0.30173954367637634, "learning_rate": 2.647040411768797e-05, "loss": 0.5862, "step": 10150 }, { "epoch": 0.35473325450372123, "grad_norm": 0.324190229177475, "learning_rate": 2.645301523266328e-05, "loss": 0.6438, "step": 10200 }, { "epoch": 0.35647214300619046, "grad_norm": 0.31963062286376953, "learning_rate": 2.6435626347638587e-05, "loss": 0.6544, "step": 10250 }, { "epoch": 0.3582110315086597, "grad_norm": 14.43270492553711, "learning_rate": 2.6418237462613897e-05, "loss": 0.6106, "step": 10300 }, { "epoch": 0.3599499200111289, "grad_norm": 0.09716640412807465, "learning_rate": 2.6400848577589206e-05, "loss": 0.6999, "step": 10350 }, { "epoch": 0.3616888085135981, "grad_norm": 0.2869093418121338, "learning_rate": 2.6383459692564513e-05, "loss": 0.4739, "step": 10400 }, { "epoch": 0.3634276970160673, "grad_norm": 0.07235327363014221, "learning_rate": 2.6366070807539822e-05, "loss": 0.5924, "step": 10450 }, { "epoch": 0.36516658551853654, "grad_norm": 13.823751449584961, "learning_rate": 2.6348681922515128e-05, "loss": 1.0196, "step": 10500 }, { "epoch": 0.36690547402100576, "grad_norm": 13.414283752441406, "learning_rate": 2.6331293037490438e-05, "loss": 0.8668, "step": 10550 }, { "epoch": 0.368644362523475, "grad_norm": 16.019412994384766, "learning_rate": 2.6313904152465744e-05, "loss": 0.6569, "step": 10600 }, { "epoch": 0.3703832510259442, "grad_norm": 0.39722299575805664, "learning_rate": 2.6296515267441054e-05, "loss": 0.7335, "step": 10650 }, { "epoch": 0.37212213952841344, "grad_norm": 13.023967742919922, "learning_rate": 2.627912638241636e-05, "loss": 0.7649, "step": 10700 }, { "epoch": 0.37386102803088267, "grad_norm": 14.089864730834961, "learning_rate": 2.626173749739167e-05, "loss": 0.6306, "step": 10750 }, { "epoch": 0.3755999165333519, "grad_norm": 15.030397415161133, "learning_rate": 2.6244348612366976e-05, "loss": 0.9625, "step": 10800 }, { "epoch": 0.3773388050358211, "grad_norm": 0.4826482832431793, "learning_rate": 2.6226959727342285e-05, "loss": 0.6661, "step": 10850 }, { "epoch": 0.37907769353829035, "grad_norm": 15.459709167480469, "learning_rate": 2.620957084231759e-05, "loss": 0.4533, "step": 10900 }, { "epoch": 0.3808165820407595, "grad_norm": 0.3697271943092346, "learning_rate": 2.61921819572929e-05, "loss": 0.7838, "step": 10950 }, { "epoch": 0.38255547054322875, "grad_norm": 0.2845596969127655, "learning_rate": 2.6174793072268207e-05, "loss": 0.5565, "step": 11000 }, { "epoch": 0.384294359045698, "grad_norm": 0.31797492504119873, "learning_rate": 2.6157404187243517e-05, "loss": 0.7687, "step": 11050 }, { "epoch": 0.3860332475481672, "grad_norm": 0.44683247804641724, "learning_rate": 2.6140015302218823e-05, "loss": 0.9018, "step": 11100 }, { "epoch": 0.38777213605063643, "grad_norm": 0.4156676232814789, "learning_rate": 2.6122626417194132e-05, "loss": 0.7594, "step": 11150 }, { "epoch": 0.38951102455310566, "grad_norm": 0.3792310953140259, "learning_rate": 2.610523753216944e-05, "loss": 0.7541, "step": 11200 }, { "epoch": 0.3912499130555749, "grad_norm": 0.3818751871585846, "learning_rate": 2.6087848647144748e-05, "loss": 0.7588, "step": 11250 }, { "epoch": 0.3929888015580441, "grad_norm": 0.2927264869213104, "learning_rate": 2.607045976212005e-05, "loss": 0.6153, "step": 11300 }, { "epoch": 0.39472769006051334, "grad_norm": 0.20653480291366577, "learning_rate": 2.605307087709536e-05, "loss": 0.3424, "step": 11350 }, { "epoch": 0.39646657856298256, "grad_norm": 0.29283103346824646, "learning_rate": 2.6035681992070667e-05, "loss": 0.8718, "step": 11400 }, { "epoch": 0.3982054670654518, "grad_norm": 28.917530059814453, "learning_rate": 2.6018293107045976e-05, "loss": 0.6853, "step": 11450 }, { "epoch": 0.39994435556792096, "grad_norm": 0.2819373607635498, "learning_rate": 2.6000904222021282e-05, "loss": 0.5639, "step": 11500 }, { "epoch": 0.4016832440703902, "grad_norm": 0.10528372973203659, "learning_rate": 2.5983515336996592e-05, "loss": 0.8573, "step": 11550 }, { "epoch": 0.4034221325728594, "grad_norm": 0.3933909833431244, "learning_rate": 2.5966126451971898e-05, "loss": 0.92, "step": 11600 }, { "epoch": 0.40516102107532864, "grad_norm": 16.51961898803711, "learning_rate": 2.5948737566947208e-05, "loss": 0.7629, "step": 11650 }, { "epoch": 0.40689990957779787, "grad_norm": 0.32065796852111816, "learning_rate": 2.5931348681922514e-05, "loss": 0.5534, "step": 11700 }, { "epoch": 0.4086387980802671, "grad_norm": 0.2726950943470001, "learning_rate": 2.5913959796897823e-05, "loss": 0.4949, "step": 11750 }, { "epoch": 0.4103776865827363, "grad_norm": 0.30706846714019775, "learning_rate": 2.589657091187313e-05, "loss": 0.8193, "step": 11800 }, { "epoch": 0.41211657508520555, "grad_norm": 0.2684274911880493, "learning_rate": 2.587918202684844e-05, "loss": 0.5189, "step": 11850 }, { "epoch": 0.4138554635876748, "grad_norm": 0.08292511105537415, "learning_rate": 2.586179314182375e-05, "loss": 0.5199, "step": 11900 }, { "epoch": 0.415594352090144, "grad_norm": 0.07494384050369263, "learning_rate": 2.5844404256799055e-05, "loss": 0.5777, "step": 11950 }, { "epoch": 0.4173332405926132, "grad_norm": 0.23529402911663055, "learning_rate": 2.5827015371774364e-05, "loss": 0.495, "step": 12000 }, { "epoch": 0.4190721290950824, "grad_norm": 14.870217323303223, "learning_rate": 2.580962648674967e-05, "loss": 0.8045, "step": 12050 }, { "epoch": 0.42081101759755163, "grad_norm": 0.33433887362480164, "learning_rate": 2.579223760172498e-05, "loss": 0.7329, "step": 12100 }, { "epoch": 0.42254990610002086, "grad_norm": 0.3028079867362976, "learning_rate": 2.5774848716700286e-05, "loss": 0.6334, "step": 12150 }, { "epoch": 0.4242887946024901, "grad_norm": 13.855344772338867, "learning_rate": 2.5757459831675596e-05, "loss": 0.8075, "step": 12200 }, { "epoch": 0.4260276831049593, "grad_norm": 0.3506879508495331, "learning_rate": 2.5740070946650902e-05, "loss": 0.5857, "step": 12250 }, { "epoch": 0.42776657160742854, "grad_norm": 0.26841405034065247, "learning_rate": 2.5722682061626212e-05, "loss": 0.5522, "step": 12300 }, { "epoch": 0.42950546010989776, "grad_norm": 0.2785841226577759, "learning_rate": 2.5705293176601518e-05, "loss": 0.7456, "step": 12350 }, { "epoch": 0.431244348612367, "grad_norm": 0.1140577495098114, "learning_rate": 2.5687904291576827e-05, "loss": 0.7666, "step": 12400 }, { "epoch": 0.4329832371148362, "grad_norm": 0.4176941215991974, "learning_rate": 2.567051540655213e-05, "loss": 0.7043, "step": 12450 }, { "epoch": 0.43472212561730544, "grad_norm": 13.494552612304688, "learning_rate": 2.565312652152744e-05, "loss": 0.7685, "step": 12500 }, { "epoch": 0.4364610141197746, "grad_norm": 0.2995794415473938, "learning_rate": 2.5635737636502746e-05, "loss": 0.5381, "step": 12550 }, { "epoch": 0.43819990262224384, "grad_norm": 0.3056049644947052, "learning_rate": 2.5618348751478056e-05, "loss": 0.7304, "step": 12600 }, { "epoch": 0.43993879112471307, "grad_norm": 0.30278047919273376, "learning_rate": 2.5600959866453362e-05, "loss": 0.6296, "step": 12650 }, { "epoch": 0.4416776796271823, "grad_norm": 0.08883975446224213, "learning_rate": 2.558357098142867e-05, "loss": 0.5121, "step": 12700 }, { "epoch": 0.4434165681296515, "grad_norm": 0.27918311953544617, "learning_rate": 2.5566182096403978e-05, "loss": 0.6648, "step": 12750 }, { "epoch": 0.44515545663212075, "grad_norm": 0.3867515027523041, "learning_rate": 2.5548793211379287e-05, "loss": 0.8216, "step": 12800 }, { "epoch": 0.44689434513459, "grad_norm": 0.31606364250183105, "learning_rate": 2.5531404326354593e-05, "loss": 0.6828, "step": 12850 }, { "epoch": 0.4486332336370592, "grad_norm": 14.8113431930542, "learning_rate": 2.5514015441329903e-05, "loss": 0.5872, "step": 12900 }, { "epoch": 0.45037212213952843, "grad_norm": 0.30183055996894836, "learning_rate": 2.549662655630521e-05, "loss": 0.754, "step": 12950 }, { "epoch": 0.45211101064199766, "grad_norm": 0.35067546367645264, "learning_rate": 2.547923767128052e-05, "loss": 0.6294, "step": 13000 }, { "epoch": 0.4538498991444669, "grad_norm": 0.4047861099243164, "learning_rate": 2.5461848786255825e-05, "loss": 0.7692, "step": 13050 }, { "epoch": 0.45558878764693606, "grad_norm": 0.5408716201782227, "learning_rate": 2.5444459901231134e-05, "loss": 0.9039, "step": 13100 }, { "epoch": 0.4573276761494053, "grad_norm": 0.33355841040611267, "learning_rate": 2.542707101620644e-05, "loss": 0.6435, "step": 13150 }, { "epoch": 0.4590665646518745, "grad_norm": 0.3559991717338562, "learning_rate": 2.540968213118175e-05, "loss": 0.6586, "step": 13200 }, { "epoch": 0.46080545315434374, "grad_norm": 0.23537401854991913, "learning_rate": 2.5392293246157056e-05, "loss": 0.3851, "step": 13250 }, { "epoch": 0.46254434165681296, "grad_norm": 0.30706942081451416, "learning_rate": 2.5374904361132366e-05, "loss": 0.793, "step": 13300 }, { "epoch": 0.4642832301592822, "grad_norm": 0.31487834453582764, "learning_rate": 2.5357515476107672e-05, "loss": 0.6471, "step": 13350 }, { "epoch": 0.4660221186617514, "grad_norm": 0.08170419186353683, "learning_rate": 2.534012659108298e-05, "loss": 0.4114, "step": 13400 }, { "epoch": 0.46776100716422064, "grad_norm": 13.717752456665039, "learning_rate": 2.5322737706058288e-05, "loss": 0.9954, "step": 13450 }, { "epoch": 0.46949989566668987, "grad_norm": 0.26287564635276794, "learning_rate": 2.5305348821033597e-05, "loss": 0.4469, "step": 13500 }, { "epoch": 0.4712387841691591, "grad_norm": 0.27812516689300537, "learning_rate": 2.5287959936008907e-05, "loss": 0.6587, "step": 13550 }, { "epoch": 0.47297767267162827, "grad_norm": 0.2884117364883423, "learning_rate": 2.5270571050984213e-05, "loss": 0.554, "step": 13600 }, { "epoch": 0.4747165611740975, "grad_norm": 0.07341925799846649, "learning_rate": 2.525318216595952e-05, "loss": 0.6002, "step": 13650 }, { "epoch": 0.4764554496765667, "grad_norm": 0.272438645362854, "learning_rate": 2.5235793280934825e-05, "loss": 0.7492, "step": 13700 }, { "epoch": 0.47819433817903595, "grad_norm": 0.2830987572669983, "learning_rate": 2.5218404395910135e-05, "loss": 0.5696, "step": 13750 }, { "epoch": 0.4799332266815052, "grad_norm": 0.3340734541416168, "learning_rate": 2.520101551088544e-05, "loss": 0.9337, "step": 13800 }, { "epoch": 0.4816721151839744, "grad_norm": 0.11989027261734009, "learning_rate": 2.518362662586075e-05, "loss": 0.7504, "step": 13850 }, { "epoch": 0.48341100368644363, "grad_norm": 14.068742752075195, "learning_rate": 2.5166237740836057e-05, "loss": 0.4927, "step": 13900 }, { "epoch": 0.48514989218891286, "grad_norm": 0.3375140428543091, "learning_rate": 2.5148848855811366e-05, "loss": 0.7994, "step": 13950 }, { "epoch": 0.4868887806913821, "grad_norm": 0.07118004560470581, "learning_rate": 2.5131459970786673e-05, "loss": 0.4349, "step": 14000 }, { "epoch": 0.4886276691938513, "grad_norm": 0.2660716474056244, "learning_rate": 2.5114071085761982e-05, "loss": 0.7375, "step": 14050 }, { "epoch": 0.49036655769632054, "grad_norm": 0.3064875304698944, "learning_rate": 2.509668220073729e-05, "loss": 0.6744, "step": 14100 }, { "epoch": 0.4921054461987897, "grad_norm": 0.3202391266822815, "learning_rate": 2.5079293315712598e-05, "loss": 0.723, "step": 14150 }, { "epoch": 0.49384433470125894, "grad_norm": 0.24043259024620056, "learning_rate": 2.5061904430687904e-05, "loss": 0.4174, "step": 14200 }, { "epoch": 0.49558322320372816, "grad_norm": 0.3756779730319977, "learning_rate": 2.5044515545663214e-05, "loss": 0.7772, "step": 14250 }, { "epoch": 0.4973221117061974, "grad_norm": 14.827591896057129, "learning_rate": 2.502712666063852e-05, "loss": 0.6928, "step": 14300 }, { "epoch": 0.4990610002086666, "grad_norm": 0.30469194054603577, "learning_rate": 2.500973777561383e-05, "loss": 0.5607, "step": 14350 }, { "epoch": 0.5007998887111358, "grad_norm": 0.12967005372047424, "learning_rate": 2.4992348890589136e-05, "loss": 0.8856, "step": 14400 }, { "epoch": 0.5025387772136051, "grad_norm": 0.3618403673171997, "learning_rate": 2.4974960005564445e-05, "loss": 0.6344, "step": 14450 }, { "epoch": 0.5042776657160742, "grad_norm": 0.11332044005393982, "learning_rate": 2.495757112053975e-05, "loss": 0.7029, "step": 14500 }, { "epoch": 0.5060165542185435, "grad_norm": 0.2932393550872803, "learning_rate": 2.494018223551506e-05, "loss": 0.4679, "step": 14550 }, { "epoch": 0.5077554427210127, "grad_norm": 0.2964160442352295, "learning_rate": 2.4922793350490367e-05, "loss": 0.5976, "step": 14600 }, { "epoch": 0.509494331223482, "grad_norm": 15.524166107177734, "learning_rate": 2.4905404465465677e-05, "loss": 0.6741, "step": 14650 }, { "epoch": 0.5112332197259511, "grad_norm": 0.36283645033836365, "learning_rate": 2.4888015580440983e-05, "loss": 0.8809, "step": 14700 }, { "epoch": 0.5129721082284204, "grad_norm": 14.058188438415527, "learning_rate": 2.4870626695416292e-05, "loss": 0.8314, "step": 14750 }, { "epoch": 0.5147109967308896, "grad_norm": 0.36868226528167725, "learning_rate": 2.4853237810391595e-05, "loss": 0.5523, "step": 14800 }, { "epoch": 0.5164498852333589, "grad_norm": 14.61916446685791, "learning_rate": 2.4835848925366905e-05, "loss": 0.7005, "step": 14850 }, { "epoch": 0.5181887737358281, "grad_norm": 0.10888968408107758, "learning_rate": 2.481846004034221e-05, "loss": 0.651, "step": 14900 }, { "epoch": 0.5199276622382972, "grad_norm": 0.3735586106777191, "learning_rate": 2.480107115531752e-05, "loss": 0.7986, "step": 14950 }, { "epoch": 0.5216665507407665, "grad_norm": 0.47358861565589905, "learning_rate": 2.4783682270292827e-05, "loss": 0.88, "step": 15000 }, { "epoch": 0.5234054392432357, "grad_norm": 0.3821775019168854, "learning_rate": 2.4766293385268136e-05, "loss": 0.6383, "step": 15050 }, { "epoch": 0.525144327745705, "grad_norm": 0.3203616440296173, "learning_rate": 2.4748904500243446e-05, "loss": 0.8141, "step": 15100 }, { "epoch": 0.5268832162481741, "grad_norm": 0.3582339584827423, "learning_rate": 2.4731515615218752e-05, "loss": 0.7444, "step": 15150 }, { "epoch": 0.5286221047506434, "grad_norm": 0.38256072998046875, "learning_rate": 2.471412673019406e-05, "loss": 0.6452, "step": 15200 }, { "epoch": 0.5303609932531126, "grad_norm": 14.964351654052734, "learning_rate": 2.4696737845169368e-05, "loss": 0.411, "step": 15250 }, { "epoch": 0.5320998817555819, "grad_norm": 0.31296077370643616, "learning_rate": 2.4679348960144677e-05, "loss": 0.8315, "step": 15300 }, { "epoch": 0.533838770258051, "grad_norm": 0.35681378841400146, "learning_rate": 2.4661960075119984e-05, "loss": 0.6046, "step": 15350 }, { "epoch": 0.5355776587605203, "grad_norm": 14.303959846496582, "learning_rate": 2.4644571190095293e-05, "loss": 0.9073, "step": 15400 }, { "epoch": 0.5373165472629895, "grad_norm": 0.36961236596107483, "learning_rate": 2.46271823050706e-05, "loss": 0.7141, "step": 15450 }, { "epoch": 0.5390554357654587, "grad_norm": 0.41073235869407654, "learning_rate": 2.460979342004591e-05, "loss": 0.636, "step": 15500 }, { "epoch": 0.540794324267928, "grad_norm": 0.10415417701005936, "learning_rate": 2.4592404535021215e-05, "loss": 0.6275, "step": 15550 }, { "epoch": 0.5425332127703971, "grad_norm": 0.2525995969772339, "learning_rate": 2.4575015649996525e-05, "loss": 0.5782, "step": 15600 }, { "epoch": 0.5442721012728664, "grad_norm": 0.3075423538684845, "learning_rate": 2.455762676497183e-05, "loss": 0.7682, "step": 15650 }, { "epoch": 0.5460109897753356, "grad_norm": 0.3564911186695099, "learning_rate": 2.454023787994714e-05, "loss": 0.6945, "step": 15700 }, { "epoch": 0.5477498782778049, "grad_norm": 0.11186113953590393, "learning_rate": 2.4522848994922447e-05, "loss": 0.6737, "step": 15750 }, { "epoch": 0.549488766780274, "grad_norm": 0.2933875322341919, "learning_rate": 2.4505460109897756e-05, "loss": 0.594, "step": 15800 }, { "epoch": 0.5512276552827433, "grad_norm": 0.22874318063259125, "learning_rate": 2.4488071224873062e-05, "loss": 0.3494, "step": 15850 }, { "epoch": 0.5529665437852125, "grad_norm": 0.24619042873382568, "learning_rate": 2.4470682339848372e-05, "loss": 0.6471, "step": 15900 }, { "epoch": 0.5547054322876818, "grad_norm": 0.3554873764514923, "learning_rate": 2.4453293454823678e-05, "loss": 0.7968, "step": 15950 }, { "epoch": 0.5564443207901509, "grad_norm": 0.2721209228038788, "learning_rate": 2.4435904569798984e-05, "loss": 0.4559, "step": 16000 }, { "epoch": 0.5581832092926201, "grad_norm": 0.2638147175312042, "learning_rate": 2.441851568477429e-05, "loss": 0.6293, "step": 16050 }, { "epoch": 0.5599220977950894, "grad_norm": 0.23964059352874756, "learning_rate": 2.44011267997496e-05, "loss": 0.454, "step": 16100 }, { "epoch": 0.5616609862975586, "grad_norm": 0.3575807511806488, "learning_rate": 2.4383737914724906e-05, "loss": 0.9547, "step": 16150 }, { "epoch": 0.5633998748000278, "grad_norm": 0.5044054985046387, "learning_rate": 2.4366349029700216e-05, "loss": 0.8927, "step": 16200 }, { "epoch": 0.565138763302497, "grad_norm": 0.3494166433811188, "learning_rate": 2.4348960144675522e-05, "loss": 0.6611, "step": 16250 }, { "epoch": 0.5668776518049663, "grad_norm": 0.39576590061187744, "learning_rate": 2.433157125965083e-05, "loss": 0.7111, "step": 16300 }, { "epoch": 0.5686165403074355, "grad_norm": 0.2698497772216797, "learning_rate": 2.4314182374626138e-05, "loss": 0.4341, "step": 16350 }, { "epoch": 0.5703554288099048, "grad_norm": 0.07783553004264832, "learning_rate": 2.4296793489601447e-05, "loss": 0.5719, "step": 16400 }, { "epoch": 0.5720943173123739, "grad_norm": 0.3473074436187744, "learning_rate": 2.4279404604576753e-05, "loss": 0.7079, "step": 16450 }, { "epoch": 0.5738332058148432, "grad_norm": 0.26524677872657776, "learning_rate": 2.4262015719552063e-05, "loss": 0.4847, "step": 16500 }, { "epoch": 0.5755720943173124, "grad_norm": 0.07581447809934616, "learning_rate": 2.424462683452737e-05, "loss": 0.6094, "step": 16550 }, { "epoch": 0.5773109828197815, "grad_norm": 15.479100227355957, "learning_rate": 2.422723794950268e-05, "loss": 0.4711, "step": 16600 }, { "epoch": 0.5790498713222508, "grad_norm": 6.447624206542969, "learning_rate": 2.4209849064477985e-05, "loss": 0.8405, "step": 16650 }, { "epoch": 0.58078875982472, "grad_norm": 0.3773106336593628, "learning_rate": 2.4192460179453294e-05, "loss": 0.8306, "step": 16700 }, { "epoch": 0.5825276483271893, "grad_norm": 0.1099114865064621, "learning_rate": 2.4175071294428604e-05, "loss": 0.5826, "step": 16750 }, { "epoch": 0.5842665368296585, "grad_norm": 14.971306800842285, "learning_rate": 2.415768240940391e-05, "loss": 0.7954, "step": 16800 }, { "epoch": 0.5860054253321277, "grad_norm": 0.3108014166355133, "learning_rate": 2.414029352437922e-05, "loss": 0.5542, "step": 16850 }, { "epoch": 0.5877443138345969, "grad_norm": 0.37142741680145264, "learning_rate": 2.4122904639354526e-05, "loss": 0.9187, "step": 16900 }, { "epoch": 0.5894832023370662, "grad_norm": 0.379334419965744, "learning_rate": 2.4105515754329836e-05, "loss": 0.6389, "step": 16950 }, { "epoch": 0.5912220908395354, "grad_norm": 0.28387072682380676, "learning_rate": 2.408812686930514e-05, "loss": 0.5156, "step": 17000 }, { "epoch": 0.5929609793420046, "grad_norm": 0.09575625509023666, "learning_rate": 2.407073798428045e-05, "loss": 0.749, "step": 17050 }, { "epoch": 0.5946998678444738, "grad_norm": 0.3946804404258728, "learning_rate": 2.4053349099255757e-05, "loss": 0.7769, "step": 17100 }, { "epoch": 0.596438756346943, "grad_norm": 0.27003926038742065, "learning_rate": 2.4035960214231064e-05, "loss": 0.5769, "step": 17150 }, { "epoch": 0.5981776448494123, "grad_norm": 0.2528146803379059, "learning_rate": 2.401857132920637e-05, "loss": 0.5706, "step": 17200 }, { "epoch": 0.5999165333518814, "grad_norm": 0.23348194360733032, "learning_rate": 2.400118244418168e-05, "loss": 0.4798, "step": 17250 }, { "epoch": 0.6016554218543507, "grad_norm": 0.08327616751194, "learning_rate": 2.3983793559156986e-05, "loss": 0.8172, "step": 17300 }, { "epoch": 0.6033943103568199, "grad_norm": 16.285873413085938, "learning_rate": 2.3966404674132295e-05, "loss": 0.7651, "step": 17350 }, { "epoch": 0.6051331988592892, "grad_norm": 15.471016883850098, "learning_rate": 2.39490157891076e-05, "loss": 0.5632, "step": 17400 }, { "epoch": 0.6068720873617583, "grad_norm": 14.548842430114746, "learning_rate": 2.393162690408291e-05, "loss": 0.7731, "step": 17450 }, { "epoch": 0.6086109758642276, "grad_norm": 0.49227452278137207, "learning_rate": 2.3914238019058217e-05, "loss": 0.9148, "step": 17500 }, { "epoch": 0.6103498643666968, "grad_norm": 0.5034491419792175, "learning_rate": 2.3896849134033527e-05, "loss": 0.8019, "step": 17550 }, { "epoch": 0.612088752869166, "grad_norm": 0.10245110839605331, "learning_rate": 2.3879460249008833e-05, "loss": 0.4645, "step": 17600 }, { "epoch": 0.6138276413716353, "grad_norm": 0.24425481259822845, "learning_rate": 2.3862071363984142e-05, "loss": 0.4299, "step": 17650 }, { "epoch": 0.6155665298741044, "grad_norm": 0.0785655826330185, "learning_rate": 2.384468247895945e-05, "loss": 0.5038, "step": 17700 }, { "epoch": 0.6173054183765737, "grad_norm": 0.2665925621986389, "learning_rate": 2.3827293593934758e-05, "loss": 0.6015, "step": 17750 }, { "epoch": 0.6190443068790429, "grad_norm": 0.36466577649116516, "learning_rate": 2.3809904708910064e-05, "loss": 0.8002, "step": 17800 }, { "epoch": 0.6207831953815122, "grad_norm": 17.339256286621094, "learning_rate": 2.3792515823885374e-05, "loss": 0.4534, "step": 17850 }, { "epoch": 0.6225220838839813, "grad_norm": 0.26136887073516846, "learning_rate": 2.377512693886068e-05, "loss": 0.645, "step": 17900 }, { "epoch": 0.6242609723864506, "grad_norm": 0.39176809787750244, "learning_rate": 2.375773805383599e-05, "loss": 0.7999, "step": 17950 }, { "epoch": 0.6259998608889198, "grad_norm": 0.36014682054519653, "learning_rate": 2.3740349168811296e-05, "loss": 0.6262, "step": 18000 }, { "epoch": 0.6277387493913891, "grad_norm": 0.2601962387561798, "learning_rate": 2.3722960283786605e-05, "loss": 0.3958, "step": 18050 }, { "epoch": 0.6294776378938582, "grad_norm": 15.840449333190918, "learning_rate": 2.370557139876191e-05, "loss": 0.5889, "step": 18100 }, { "epoch": 0.6312165263963274, "grad_norm": 0.0927547812461853, "learning_rate": 2.368818251373722e-05, "loss": 0.6818, "step": 18150 }, { "epoch": 0.6329554148987967, "grad_norm": 0.12880657613277435, "learning_rate": 2.3670793628712527e-05, "loss": 0.9259, "step": 18200 }, { "epoch": 0.6346943034012659, "grad_norm": 0.4678806960582733, "learning_rate": 2.3653404743687837e-05, "loss": 0.804, "step": 18250 }, { "epoch": 0.6364331919037352, "grad_norm": 13.326157569885254, "learning_rate": 2.3636015858663146e-05, "loss": 0.8873, "step": 18300 }, { "epoch": 0.6381720804062043, "grad_norm": 0.2906201183795929, "learning_rate": 2.361862697363845e-05, "loss": 0.4373, "step": 18350 }, { "epoch": 0.6399109689086736, "grad_norm": 0.33774152398109436, "learning_rate": 2.360123808861376e-05, "loss": 0.7218, "step": 18400 }, { "epoch": 0.6416498574111428, "grad_norm": 0.3739396929740906, "learning_rate": 2.3583849203589065e-05, "loss": 0.8048, "step": 18450 }, { "epoch": 0.6433887459136121, "grad_norm": 0.6967986226081848, "learning_rate": 2.3566460318564375e-05, "loss": 0.9189, "step": 18500 }, { "epoch": 0.6451276344160812, "grad_norm": 0.4377918541431427, "learning_rate": 2.354907143353968e-05, "loss": 0.6334, "step": 18550 }, { "epoch": 0.6468665229185505, "grad_norm": 0.3565727174282074, "learning_rate": 2.353168254851499e-05, "loss": 0.7587, "step": 18600 }, { "epoch": 0.6486054114210197, "grad_norm": 14.115187644958496, "learning_rate": 2.3514293663490296e-05, "loss": 0.7123, "step": 18650 }, { "epoch": 0.6503442999234889, "grad_norm": 0.3043876588344574, "learning_rate": 2.3496904778465606e-05, "loss": 0.5251, "step": 18700 }, { "epoch": 0.6520831884259581, "grad_norm": 0.28734010457992554, "learning_rate": 2.3479515893440912e-05, "loss": 0.5949, "step": 18750 }, { "epoch": 0.6538220769284273, "grad_norm": 0.28035423159599304, "learning_rate": 2.3462127008416222e-05, "loss": 0.6868, "step": 18800 }, { "epoch": 0.6555609654308966, "grad_norm": 0.3314853012561798, "learning_rate": 2.3444738123391528e-05, "loss": 0.6136, "step": 18850 }, { "epoch": 0.6572998539333658, "grad_norm": 0.3013773262500763, "learning_rate": 2.3427349238366838e-05, "loss": 0.7197, "step": 18900 }, { "epoch": 0.659038742435835, "grad_norm": 14.988571166992188, "learning_rate": 2.3409960353342144e-05, "loss": 0.5931, "step": 18950 }, { "epoch": 0.6607776309383042, "grad_norm": 0.4027162194252014, "learning_rate": 2.3392571468317453e-05, "loss": 0.898, "step": 19000 }, { "epoch": 0.6625165194407735, "grad_norm": 0.10429493337869644, "learning_rate": 2.337518258329276e-05, "loss": 0.6271, "step": 19050 }, { "epoch": 0.6642554079432427, "grad_norm": 14.614250183105469, "learning_rate": 2.335779369826807e-05, "loss": 0.5569, "step": 19100 }, { "epoch": 0.665994296445712, "grad_norm": 14.460880279541016, "learning_rate": 2.3340404813243375e-05, "loss": 0.8757, "step": 19150 }, { "epoch": 0.6677331849481811, "grad_norm": 0.30695316195487976, "learning_rate": 2.3323015928218685e-05, "loss": 0.4122, "step": 19200 }, { "epoch": 0.6694720734506503, "grad_norm": 0.313351035118103, "learning_rate": 2.330562704319399e-05, "loss": 0.7074, "step": 19250 }, { "epoch": 0.6712109619531196, "grad_norm": 0.3445839285850525, "learning_rate": 2.32882381581693e-05, "loss": 0.816, "step": 19300 }, { "epoch": 0.6729498504555887, "grad_norm": 15.237052917480469, "learning_rate": 2.3270849273144607e-05, "loss": 0.6705, "step": 19350 }, { "epoch": 0.674688738958058, "grad_norm": 0.4142249822616577, "learning_rate": 2.3253460388119916e-05, "loss": 0.7724, "step": 19400 }, { "epoch": 0.6764276274605272, "grad_norm": 14.296184539794922, "learning_rate": 2.3236071503095222e-05, "loss": 0.6872, "step": 19450 }, { "epoch": 0.6781665159629965, "grad_norm": 0.2904287874698639, "learning_rate": 2.321868261807053e-05, "loss": 0.5912, "step": 19500 }, { "epoch": 0.6799054044654657, "grad_norm": 0.2889081835746765, "learning_rate": 2.3201293733045835e-05, "loss": 0.7432, "step": 19550 }, { "epoch": 0.6816442929679349, "grad_norm": 0.04875553771853447, "learning_rate": 2.3183904848021144e-05, "loss": 0.6498, "step": 19600 }, { "epoch": 0.6833831814704041, "grad_norm": 11.693511009216309, "learning_rate": 2.316651596299645e-05, "loss": 0.6725, "step": 19650 }, { "epoch": 0.6851220699728734, "grad_norm": 14.698670387268066, "learning_rate": 2.314912707797176e-05, "loss": 0.6131, "step": 19700 }, { "epoch": 0.6868609584753426, "grad_norm": 0.33701905608177185, "learning_rate": 2.3131738192947066e-05, "loss": 0.856, "step": 19750 }, { "epoch": 0.6885998469778117, "grad_norm": 0.3263719975948334, "learning_rate": 2.3114349307922376e-05, "loss": 0.5799, "step": 19800 }, { "epoch": 0.690338735480281, "grad_norm": 0.33572545647621155, "learning_rate": 2.3096960422897682e-05, "loss": 0.8655, "step": 19850 }, { "epoch": 0.6920776239827502, "grad_norm": 0.3469328284263611, "learning_rate": 2.307957153787299e-05, "loss": 0.5777, "step": 19900 }, { "epoch": 0.6938165124852195, "grad_norm": 15.014354705810547, "learning_rate": 2.30621826528483e-05, "loss": 0.509, "step": 19950 }, { "epoch": 0.6955554009876886, "grad_norm": 0.3842661380767822, "learning_rate": 2.3044793767823607e-05, "loss": 0.843, "step": 20000 }, { "epoch": 0.6972942894901579, "grad_norm": 0.29375502467155457, "learning_rate": 2.3027404882798917e-05, "loss": 0.3928, "step": 20050 }, { "epoch": 0.6990331779926271, "grad_norm": 0.30595964193344116, "learning_rate": 2.3010015997774223e-05, "loss": 0.8393, "step": 20100 }, { "epoch": 0.7007720664950964, "grad_norm": 1.1586613655090332, "learning_rate": 2.2992627112749533e-05, "loss": 0.6233, "step": 20150 }, { "epoch": 0.7025109549975656, "grad_norm": 0.32588326930999756, "learning_rate": 2.297523822772484e-05, "loss": 0.6078, "step": 20200 }, { "epoch": 0.7042498435000347, "grad_norm": 0.3331815004348755, "learning_rate": 2.295784934270015e-05, "loss": 0.7479, "step": 20250 }, { "epoch": 0.705988732002504, "grad_norm": 0.24668283760547638, "learning_rate": 2.2940460457675455e-05, "loss": 0.4668, "step": 20300 }, { "epoch": 0.7077276205049732, "grad_norm": 0.28482386469841003, "learning_rate": 2.2923071572650764e-05, "loss": 0.8923, "step": 20350 }, { "epoch": 0.7094665090074425, "grad_norm": 0.40325742959976196, "learning_rate": 2.290568268762607e-05, "loss": 0.7328, "step": 20400 }, { "epoch": 0.7112053975099116, "grad_norm": 0.2851964831352234, "learning_rate": 2.288829380260138e-05, "loss": 0.4707, "step": 20450 }, { "epoch": 0.7129442860123809, "grad_norm": 0.36150985956192017, "learning_rate": 2.2870904917576686e-05, "loss": 0.8544, "step": 20500 }, { "epoch": 0.7146831745148501, "grad_norm": 0.27775704860687256, "learning_rate": 2.2853516032551996e-05, "loss": 0.3637, "step": 20550 }, { "epoch": 0.7164220630173194, "grad_norm": 0.26588091254234314, "learning_rate": 2.2836127147527302e-05, "loss": 0.6915, "step": 20600 }, { "epoch": 0.7181609515197885, "grad_norm": 0.37732571363449097, "learning_rate": 2.281873826250261e-05, "loss": 0.8626, "step": 20650 }, { "epoch": 0.7198998400222578, "grad_norm": 0.3477833569049835, "learning_rate": 2.2801349377477914e-05, "loss": 0.6426, "step": 20700 }, { "epoch": 0.721638728524727, "grad_norm": 0.14422321319580078, "learning_rate": 2.2783960492453224e-05, "loss": 0.9405, "step": 20750 }, { "epoch": 0.7233776170271962, "grad_norm": 0.09302867949008942, "learning_rate": 2.276657160742853e-05, "loss": 0.406, "step": 20800 }, { "epoch": 0.7251165055296654, "grad_norm": 0.3401138484477997, "learning_rate": 2.274918272240384e-05, "loss": 0.6872, "step": 20850 }, { "epoch": 0.7268553940321346, "grad_norm": 13.878528594970703, "learning_rate": 2.2731793837379146e-05, "loss": 0.7888, "step": 20900 }, { "epoch": 0.7285942825346039, "grad_norm": 15.98861312866211, "learning_rate": 2.2714404952354455e-05, "loss": 0.8469, "step": 20950 }, { "epoch": 0.7303331710370731, "grad_norm": 0.369468629360199, "learning_rate": 2.269701606732976e-05, "loss": 0.5919, "step": 21000 }, { "epoch": 0.7320720595395424, "grad_norm": 0.10440695285797119, "learning_rate": 2.267962718230507e-05, "loss": 0.6578, "step": 21050 }, { "epoch": 0.7338109480420115, "grad_norm": 0.28678056597709656, "learning_rate": 2.2662238297280377e-05, "loss": 0.5317, "step": 21100 }, { "epoch": 0.7355498365444808, "grad_norm": 1.2252366542816162, "learning_rate": 2.2644849412255687e-05, "loss": 1.0638, "step": 21150 }, { "epoch": 0.73728872504695, "grad_norm": 0.236678346991539, "learning_rate": 2.2627460527230993e-05, "loss": 0.6815, "step": 21200 }, { "epoch": 0.7390276135494193, "grad_norm": 0.4297665059566498, "learning_rate": 2.2610071642206302e-05, "loss": 0.9287, "step": 21250 }, { "epoch": 0.7407665020518884, "grad_norm": 14.483648300170898, "learning_rate": 2.259268275718161e-05, "loss": 0.5618, "step": 21300 }, { "epoch": 0.7425053905543576, "grad_norm": 15.5200777053833, "learning_rate": 2.2575293872156918e-05, "loss": 0.5481, "step": 21350 }, { "epoch": 0.7442442790568269, "grad_norm": 14.79207706451416, "learning_rate": 2.2557904987132224e-05, "loss": 0.6313, "step": 21400 }, { "epoch": 0.7459831675592961, "grad_norm": 0.29776531457901, "learning_rate": 2.2540516102107534e-05, "loss": 0.647, "step": 21450 }, { "epoch": 0.7477220560617653, "grad_norm": 16.960607528686523, "learning_rate": 2.2523127217082844e-05, "loss": 0.6784, "step": 21500 }, { "epoch": 0.7494609445642345, "grad_norm": 0.37356680631637573, "learning_rate": 2.250573833205815e-05, "loss": 0.7801, "step": 21550 }, { "epoch": 0.7511998330667038, "grad_norm": 0.3333500027656555, "learning_rate": 2.248834944703346e-05, "loss": 0.5943, "step": 21600 }, { "epoch": 0.752938721569173, "grad_norm": 15.462047576904297, "learning_rate": 2.2470960562008765e-05, "loss": 0.5336, "step": 21650 }, { "epoch": 0.7546776100716422, "grad_norm": 0.23114657402038574, "learning_rate": 2.2453571676984075e-05, "loss": 0.5852, "step": 21700 }, { "epoch": 0.7564164985741114, "grad_norm": 15.110422134399414, "learning_rate": 2.243618279195938e-05, "loss": 0.6793, "step": 21750 }, { "epoch": 0.7581553870765807, "grad_norm": 15.177083969116211, "learning_rate": 2.241879390693469e-05, "loss": 0.5657, "step": 21800 }, { "epoch": 0.7598942755790499, "grad_norm": 0.31824395060539246, "learning_rate": 2.2401405021909997e-05, "loss": 0.7481, "step": 21850 }, { "epoch": 0.761633164081519, "grad_norm": 0.0828360989689827, "learning_rate": 2.2384016136885303e-05, "loss": 0.5114, "step": 21900 }, { "epoch": 0.7633720525839883, "grad_norm": 0.25499773025512695, "learning_rate": 2.236662725186061e-05, "loss": 0.6465, "step": 21950 }, { "epoch": 0.7651109410864575, "grad_norm": 0.22381390631198883, "learning_rate": 2.234923836683592e-05, "loss": 0.5148, "step": 22000 }, { "epoch": 0.7668498295889268, "grad_norm": 0.2555175721645355, "learning_rate": 2.2331849481811225e-05, "loss": 0.6684, "step": 22050 }, { "epoch": 0.768588718091396, "grad_norm": 15.803478240966797, "learning_rate": 2.2314460596786535e-05, "loss": 0.6363, "step": 22100 }, { "epoch": 0.7703276065938652, "grad_norm": 0.369488924741745, "learning_rate": 2.229707171176184e-05, "loss": 0.8082, "step": 22150 }, { "epoch": 0.7720664950963344, "grad_norm": 15.66444206237793, "learning_rate": 2.227968282673715e-05, "loss": 0.5286, "step": 22200 }, { "epoch": 0.7738053835988037, "grad_norm": 0.2935386300086975, "learning_rate": 2.2262293941712457e-05, "loss": 0.8429, "step": 22250 }, { "epoch": 0.7755442721012729, "grad_norm": 13.716878890991211, "learning_rate": 2.2244905056687766e-05, "loss": 0.8653, "step": 22300 }, { "epoch": 0.7772831606037421, "grad_norm": 0.31653979420661926, "learning_rate": 2.2227516171663072e-05, "loss": 0.4255, "step": 22350 }, { "epoch": 0.7790220491062113, "grad_norm": 0.2622830271720886, "learning_rate": 2.2210127286638382e-05, "loss": 0.7085, "step": 22400 }, { "epoch": 0.7807609376086805, "grad_norm": 0.23553091287612915, "learning_rate": 2.2192738401613688e-05, "loss": 0.642, "step": 22450 }, { "epoch": 0.7824998261111498, "grad_norm": 0.2885299324989319, "learning_rate": 2.2175349516588998e-05, "loss": 0.6615, "step": 22500 }, { "epoch": 0.7842387146136189, "grad_norm": 14.475643157958984, "learning_rate": 2.2157960631564304e-05, "loss": 1.0366, "step": 22550 }, { "epoch": 0.7859776031160882, "grad_norm": 0.3919031322002411, "learning_rate": 2.2140571746539613e-05, "loss": 0.619, "step": 22600 }, { "epoch": 0.7877164916185574, "grad_norm": 14.031718254089355, "learning_rate": 2.212318286151492e-05, "loss": 0.7007, "step": 22650 }, { "epoch": 0.7894553801210267, "grad_norm": 0.07439234107732773, "learning_rate": 2.210579397649023e-05, "loss": 0.2787, "step": 22700 }, { "epoch": 0.7911942686234958, "grad_norm": 0.24161553382873535, "learning_rate": 2.2088405091465535e-05, "loss": 0.7503, "step": 22750 }, { "epoch": 0.7929331571259651, "grad_norm": 0.3969561755657196, "learning_rate": 2.2071016206440845e-05, "loss": 0.9328, "step": 22800 }, { "epoch": 0.7946720456284343, "grad_norm": 14.233762741088867, "learning_rate": 2.205362732141615e-05, "loss": 0.7592, "step": 22850 }, { "epoch": 0.7964109341309036, "grad_norm": 0.4661564826965332, "learning_rate": 2.203623843639146e-05, "loss": 0.7201, "step": 22900 }, { "epoch": 0.7981498226333728, "grad_norm": 0.32605066895484924, "learning_rate": 2.2018849551366767e-05, "loss": 0.5405, "step": 22950 }, { "epoch": 0.7998887111358419, "grad_norm": 0.06785812228918076, "learning_rate": 2.2001460666342076e-05, "loss": 0.5168, "step": 23000 }, { "epoch": 0.8016275996383112, "grad_norm": 0.2257971465587616, "learning_rate": 2.198407178131738e-05, "loss": 0.5414, "step": 23050 }, { "epoch": 0.8033664881407804, "grad_norm": 0.2969124913215637, "learning_rate": 2.196668289629269e-05, "loss": 0.8514, "step": 23100 }, { "epoch": 0.8051053766432497, "grad_norm": 0.3510895371437073, "learning_rate": 2.1949294011267998e-05, "loss": 0.8156, "step": 23150 }, { "epoch": 0.8068442651457188, "grad_norm": 0.3745481073856354, "learning_rate": 2.1931905126243304e-05, "loss": 0.6619, "step": 23200 }, { "epoch": 0.8085831536481881, "grad_norm": 14.6951265335083, "learning_rate": 2.1914516241218614e-05, "loss": 0.5841, "step": 23250 }, { "epoch": 0.8103220421506573, "grad_norm": 0.3405679166316986, "learning_rate": 2.189712735619392e-05, "loss": 0.7189, "step": 23300 }, { "epoch": 0.8120609306531266, "grad_norm": 15.301614761352539, "learning_rate": 2.187973847116923e-05, "loss": 0.6457, "step": 23350 }, { "epoch": 0.8137998191555957, "grad_norm": 14.475076675415039, "learning_rate": 2.1862349586144536e-05, "loss": 0.7841, "step": 23400 }, { "epoch": 0.8155387076580649, "grad_norm": 0.385305255651474, "learning_rate": 2.1844960701119846e-05, "loss": 0.7398, "step": 23450 }, { "epoch": 0.8172775961605342, "grad_norm": 0.3697810471057892, "learning_rate": 2.1827571816095152e-05, "loss": 0.7471, "step": 23500 }, { "epoch": 0.8190164846630034, "grad_norm": 0.3838272988796234, "learning_rate": 2.181018293107046e-05, "loss": 0.7452, "step": 23550 }, { "epoch": 0.8207553731654726, "grad_norm": 0.14504793286323547, "learning_rate": 2.1792794046045767e-05, "loss": 1.0085, "step": 23600 }, { "epoch": 0.8224942616679418, "grad_norm": 0.39224308729171753, "learning_rate": 2.1775405161021077e-05, "loss": 0.5183, "step": 23650 }, { "epoch": 0.8242331501704111, "grad_norm": 0.3547824025154114, "learning_rate": 2.1758016275996383e-05, "loss": 0.7136, "step": 23700 }, { "epoch": 0.8259720386728803, "grad_norm": 0.22923780977725983, "learning_rate": 2.1740627390971693e-05, "loss": 0.3469, "step": 23750 }, { "epoch": 0.8277109271753496, "grad_norm": 0.26457610726356506, "learning_rate": 2.1723238505947e-05, "loss": 0.6815, "step": 23800 }, { "epoch": 0.8294498156778187, "grad_norm": 0.0954211875796318, "learning_rate": 2.170584962092231e-05, "loss": 0.6971, "step": 23850 }, { "epoch": 0.831188704180288, "grad_norm": 0.41926613450050354, "learning_rate": 2.1688460735897615e-05, "loss": 0.8862, "step": 23900 }, { "epoch": 0.8329275926827572, "grad_norm": 14.820884704589844, "learning_rate": 2.1671071850872924e-05, "loss": 0.5501, "step": 23950 }, { "epoch": 0.8346664811852263, "grad_norm": 0.39577415585517883, "learning_rate": 2.165368296584823e-05, "loss": 0.8689, "step": 24000 }, { "epoch": 0.8364053696876956, "grad_norm": 0.2860095798969269, "learning_rate": 2.163629408082354e-05, "loss": 0.4624, "step": 24050 }, { "epoch": 0.8381442581901648, "grad_norm": 15.735854148864746, "learning_rate": 2.1618905195798846e-05, "loss": 0.4864, "step": 24100 }, { "epoch": 0.8398831466926341, "grad_norm": 0.2520955502986908, "learning_rate": 2.1601516310774156e-05, "loss": 0.5858, "step": 24150 }, { "epoch": 0.8416220351951033, "grad_norm": 0.24668724834918976, "learning_rate": 2.1584127425749462e-05, "loss": 0.676, "step": 24200 }, { "epoch": 0.8433609236975725, "grad_norm": 0.3810020089149475, "learning_rate": 2.1566738540724768e-05, "loss": 0.9683, "step": 24250 }, { "epoch": 0.8450998122000417, "grad_norm": 0.35456982254981995, "learning_rate": 2.1549349655700074e-05, "loss": 0.62, "step": 24300 }, { "epoch": 0.846838700702511, "grad_norm": 0.3160824179649353, "learning_rate": 2.1531960770675384e-05, "loss": 0.5729, "step": 24350 }, { "epoch": 0.8485775892049802, "grad_norm": 9.329594612121582, "learning_rate": 2.151457188565069e-05, "loss": 0.6011, "step": 24400 }, { "epoch": 0.8503164777074494, "grad_norm": 0.3115905225276947, "learning_rate": 2.1497183000626e-05, "loss": 0.6448, "step": 24450 }, { "epoch": 0.8520553662099186, "grad_norm": 0.21613167226314545, "learning_rate": 2.1479794115601306e-05, "loss": 0.2817, "step": 24500 }, { "epoch": 0.8537942547123878, "grad_norm": 0.2705794870853424, "learning_rate": 2.1462405230576615e-05, "loss": 0.7726, "step": 24550 }, { "epoch": 0.8555331432148571, "grad_norm": 0.050444021821022034, "learning_rate": 2.144501634555192e-05, "loss": 0.6639, "step": 24600 }, { "epoch": 0.8572720317173262, "grad_norm": 14.922074317932129, "learning_rate": 2.142762746052723e-05, "loss": 0.8166, "step": 24650 }, { "epoch": 0.8590109202197955, "grad_norm": 0.399127721786499, "learning_rate": 2.141023857550254e-05, "loss": 0.8275, "step": 24700 }, { "epoch": 0.8607498087222647, "grad_norm": 0.4830530285835266, "learning_rate": 2.1392849690477847e-05, "loss": 0.9391, "step": 24750 }, { "epoch": 0.862488697224734, "grad_norm": 0.411568284034729, "learning_rate": 2.1375460805453156e-05, "loss": 0.7163, "step": 24800 }, { "epoch": 0.8642275857272032, "grad_norm": 0.3052062392234802, "learning_rate": 2.1358071920428463e-05, "loss": 0.4032, "step": 24850 }, { "epoch": 0.8659664742296724, "grad_norm": 0.337489515542984, "learning_rate": 2.1340683035403772e-05, "loss": 0.9196, "step": 24900 }, { "epoch": 0.8677053627321416, "grad_norm": 0.46944114565849304, "learning_rate": 2.132329415037908e-05, "loss": 0.8098, "step": 24950 }, { "epoch": 0.8694442512346109, "grad_norm": 0.38088351488113403, "learning_rate": 2.1305905265354388e-05, "loss": 0.6352, "step": 25000 }, { "epoch": 0.8711831397370801, "grad_norm": 0.10885400325059891, "learning_rate": 2.1288516380329694e-05, "loss": 0.6078, "step": 25050 }, { "epoch": 0.8729220282395492, "grad_norm": 0.39546558260917664, "learning_rate": 2.1271127495305004e-05, "loss": 0.8388, "step": 25100 }, { "epoch": 0.8746609167420185, "grad_norm": 15.477993965148926, "learning_rate": 2.125373861028031e-05, "loss": 0.4063, "step": 25150 }, { "epoch": 0.8763998052444877, "grad_norm": 16.875173568725586, "learning_rate": 2.123634972525562e-05, "loss": 0.7645, "step": 25200 }, { "epoch": 0.878138693746957, "grad_norm": 15.730433464050293, "learning_rate": 2.1218960840230926e-05, "loss": 0.6947, "step": 25250 }, { "epoch": 0.8798775822494261, "grad_norm": 14.587946891784668, "learning_rate": 2.1201571955206235e-05, "loss": 0.8437, "step": 25300 }, { "epoch": 0.8816164707518954, "grad_norm": 0.3864341676235199, "learning_rate": 2.118418307018154e-05, "loss": 0.7497, "step": 25350 }, { "epoch": 0.8833553592543646, "grad_norm": 0.38608992099761963, "learning_rate": 2.1166794185156848e-05, "loss": 0.6447, "step": 25400 }, { "epoch": 0.8850942477568339, "grad_norm": 0.10157038271427155, "learning_rate": 2.1149405300132154e-05, "loss": 0.5693, "step": 25450 }, { "epoch": 0.886833136259303, "grad_norm": 0.41068533062934875, "learning_rate": 2.1132016415107463e-05, "loss": 0.8176, "step": 25500 }, { "epoch": 0.8885720247617723, "grad_norm": 0.11268925666809082, "learning_rate": 2.111462753008277e-05, "loss": 0.7465, "step": 25550 }, { "epoch": 0.8903109132642415, "grad_norm": 0.09619323164224625, "learning_rate": 2.109723864505808e-05, "loss": 0.5608, "step": 25600 }, { "epoch": 0.8920498017667107, "grad_norm": 0.08509095758199692, "learning_rate": 2.1079849760033385e-05, "loss": 0.5071, "step": 25650 }, { "epoch": 0.89378869026918, "grad_norm": 0.2818133234977722, "learning_rate": 2.1062460875008695e-05, "loss": 0.8084, "step": 25700 }, { "epoch": 0.8955275787716491, "grad_norm": 15.20567798614502, "learning_rate": 2.1045071989984e-05, "loss": 0.7546, "step": 25750 }, { "epoch": 0.8972664672741184, "grad_norm": 0.3750741481781006, "learning_rate": 2.102768310495931e-05, "loss": 0.7841, "step": 25800 }, { "epoch": 0.8990053557765876, "grad_norm": 0.3849508464336395, "learning_rate": 2.1010294219934617e-05, "loss": 0.6229, "step": 25850 }, { "epoch": 0.9007442442790569, "grad_norm": 0.3728174567222595, "learning_rate": 2.0992905334909926e-05, "loss": 0.7552, "step": 25900 }, { "epoch": 0.902483132781526, "grad_norm": 0.3613489270210266, "learning_rate": 2.0975516449885232e-05, "loss": 0.7055, "step": 25950 }, { "epoch": 0.9042220212839953, "grad_norm": 0.3878422975540161, "learning_rate": 2.0958127564860542e-05, "loss": 0.7534, "step": 26000 }, { "epoch": 0.9059609097864645, "grad_norm": 0.3020896017551422, "learning_rate": 2.0940738679835848e-05, "loss": 0.6236, "step": 26050 }, { "epoch": 0.9076997982889338, "grad_norm": 14.543740272521973, "learning_rate": 2.0923349794811158e-05, "loss": 0.7418, "step": 26100 }, { "epoch": 0.9094386867914029, "grad_norm": 0.0880632996559143, "learning_rate": 2.0905960909786464e-05, "loss": 0.5613, "step": 26150 }, { "epoch": 0.9111775752938721, "grad_norm": 0.11508060246706009, "learning_rate": 2.0888572024761774e-05, "loss": 0.9102, "step": 26200 }, { "epoch": 0.9129164637963414, "grad_norm": 0.3355863392353058, "learning_rate": 2.087118313973708e-05, "loss": 0.5765, "step": 26250 }, { "epoch": 0.9146553522988106, "grad_norm": 0.24028563499450684, "learning_rate": 2.085379425471239e-05, "loss": 0.4442, "step": 26300 }, { "epoch": 0.9163942408012798, "grad_norm": 0.2877187728881836, "learning_rate": 2.08364053696877e-05, "loss": 0.7166, "step": 26350 }, { "epoch": 0.918133129303749, "grad_norm": 0.2654583156108856, "learning_rate": 2.0819016484663005e-05, "loss": 0.4739, "step": 26400 }, { "epoch": 0.9198720178062183, "grad_norm": 0.194766566157341, "learning_rate": 2.0801627599638315e-05, "loss": 0.4176, "step": 26450 }, { "epoch": 0.9216109063086875, "grad_norm": 0.2546578049659729, "learning_rate": 2.078423871461362e-05, "loss": 0.7937, "step": 26500 }, { "epoch": 0.9233497948111568, "grad_norm": 6.492056369781494, "learning_rate": 2.0766849829588927e-05, "loss": 0.9024, "step": 26550 }, { "epoch": 0.9250886833136259, "grad_norm": 16.818002700805664, "learning_rate": 2.0749460944564233e-05, "loss": 0.6063, "step": 26600 }, { "epoch": 0.9268275718160951, "grad_norm": 0.3750746548175812, "learning_rate": 2.0732072059539543e-05, "loss": 0.7394, "step": 26650 }, { "epoch": 0.9285664603185644, "grad_norm": 0.3302268981933594, "learning_rate": 2.071468317451485e-05, "loss": 0.582, "step": 26700 }, { "epoch": 0.9303053488210336, "grad_norm": 0.353594034910202, "learning_rate": 2.069729428949016e-05, "loss": 0.7851, "step": 26750 }, { "epoch": 0.9320442373235028, "grad_norm": 0.15089674293994904, "learning_rate": 2.0679905404465465e-05, "loss": 1.0131, "step": 26800 }, { "epoch": 0.933783125825972, "grad_norm": 13.898560523986816, "learning_rate": 2.0662516519440774e-05, "loss": 0.6599, "step": 26850 }, { "epoch": 0.9355220143284413, "grad_norm": 0.12935850024223328, "learning_rate": 2.064512763441608e-05, "loss": 0.7664, "step": 26900 }, { "epoch": 0.9372609028309105, "grad_norm": 0.4231715500354767, "learning_rate": 2.062773874939139e-05, "loss": 0.8151, "step": 26950 }, { "epoch": 0.9389997913333797, "grad_norm": 0.1741296350955963, "learning_rate": 2.0610349864366696e-05, "loss": 0.9084, "step": 27000 }, { "epoch": 0.9407386798358489, "grad_norm": 15.841263771057129, "learning_rate": 2.0592960979342006e-05, "loss": 0.7097, "step": 27050 }, { "epoch": 0.9424775683383182, "grad_norm": 0.38920798897743225, "learning_rate": 2.0575572094317312e-05, "loss": 0.6193, "step": 27100 }, { "epoch": 0.9442164568407874, "grad_norm": 15.626350402832031, "learning_rate": 2.055818320929262e-05, "loss": 0.4637, "step": 27150 }, { "epoch": 0.9459553453432565, "grad_norm": 0.28089460730552673, "learning_rate": 2.0540794324267928e-05, "loss": 0.5831, "step": 27200 }, { "epoch": 0.9476942338457258, "grad_norm": 0.2909976541996002, "learning_rate": 2.0523405439243237e-05, "loss": 0.7081, "step": 27250 }, { "epoch": 0.949433122348195, "grad_norm": 14.884838104248047, "learning_rate": 2.0506016554218543e-05, "loss": 0.53, "step": 27300 }, { "epoch": 0.9511720108506643, "grad_norm": 0.33935022354125977, "learning_rate": 2.0488627669193853e-05, "loss": 0.7742, "step": 27350 }, { "epoch": 0.9529108993531334, "grad_norm": 0.40038925409317017, "learning_rate": 2.047123878416916e-05, "loss": 0.9814, "step": 27400 }, { "epoch": 0.9546497878556027, "grad_norm": 0.27903708815574646, "learning_rate": 2.045384989914447e-05, "loss": 0.2486, "step": 27450 }, { "epoch": 0.9563886763580719, "grad_norm": 0.28326892852783203, "learning_rate": 2.0436461014119775e-05, "loss": 0.7092, "step": 27500 }, { "epoch": 0.9581275648605412, "grad_norm": 0.27247941493988037, "learning_rate": 2.0419072129095084e-05, "loss": 0.5072, "step": 27550 }, { "epoch": 0.9598664533630104, "grad_norm": 15.382936477661133, "learning_rate": 2.040168324407039e-05, "loss": 0.7429, "step": 27600 }, { "epoch": 0.9616053418654796, "grad_norm": 0.2817753553390503, "learning_rate": 2.03842943590457e-05, "loss": 0.6012, "step": 27650 }, { "epoch": 0.9633442303679488, "grad_norm": 0.1906421184539795, "learning_rate": 2.0366905474021006e-05, "loss": 0.1798, "step": 27700 }, { "epoch": 0.965083118870418, "grad_norm": 0.22596246004104614, "learning_rate": 2.0349516588996313e-05, "loss": 0.7099, "step": 27750 }, { "epoch": 0.9668220073728873, "grad_norm": 0.24957595765590668, "learning_rate": 2.033212770397162e-05, "loss": 0.6083, "step": 27800 }, { "epoch": 0.9685608958753564, "grad_norm": 0.09353628754615784, "learning_rate": 2.0314738818946928e-05, "loss": 0.8465, "step": 27850 }, { "epoch": 0.9702997843778257, "grad_norm": 0.23048615455627441, "learning_rate": 2.0297349933922238e-05, "loss": 0.3515, "step": 27900 }, { "epoch": 0.9720386728802949, "grad_norm": 0.33687227964401245, "learning_rate": 2.0279961048897544e-05, "loss": 0.8781, "step": 27950 }, { "epoch": 0.9737775613827642, "grad_norm": 14.028570175170898, "learning_rate": 2.0262572163872854e-05, "loss": 1.0298, "step": 28000 }, { "epoch": 0.9755164498852333, "grad_norm": 0.42664074897766113, "learning_rate": 2.024518327884816e-05, "loss": 0.6907, "step": 28050 }, { "epoch": 0.9772553383877026, "grad_norm": 0.3505028188228607, "learning_rate": 2.022779439382347e-05, "loss": 0.527, "step": 28100 }, { "epoch": 0.9789942268901718, "grad_norm": 17.080129623413086, "learning_rate": 2.0210405508798776e-05, "loss": 0.698, "step": 28150 }, { "epoch": 0.9807331153926411, "grad_norm": 0.05734071880578995, "learning_rate": 2.0193016623774085e-05, "loss": 0.7287, "step": 28200 }, { "epoch": 0.9824720038951102, "grad_norm": 0.0745924785733223, "learning_rate": 2.017562773874939e-05, "loss": 0.3794, "step": 28250 }, { "epoch": 0.9842108923975794, "grad_norm": 0.23750722408294678, "learning_rate": 2.01582388537247e-05, "loss": 0.5612, "step": 28300 }, { "epoch": 0.9859497809000487, "grad_norm": 9.333701133728027, "learning_rate": 2.0140849968700007e-05, "loss": 0.7264, "step": 28350 }, { "epoch": 0.9876886694025179, "grad_norm": 0.33404064178466797, "learning_rate": 2.0123461083675317e-05, "loss": 0.7477, "step": 28400 }, { "epoch": 0.9894275579049872, "grad_norm": 0.2573692798614502, "learning_rate": 2.0106072198650623e-05, "loss": 0.3886, "step": 28450 }, { "epoch": 0.9911664464074563, "grad_norm": 14.894536018371582, "learning_rate": 2.0088683313625932e-05, "loss": 0.6181, "step": 28500 }, { "epoch": 0.9929053349099256, "grad_norm": 0.26112234592437744, "learning_rate": 2.007129442860124e-05, "loss": 0.5397, "step": 28550 }, { "epoch": 0.9946442234123948, "grad_norm": 0.09483776986598969, "learning_rate": 2.0053905543576548e-05, "loss": 0.6967, "step": 28600 }, { "epoch": 0.9963831119148641, "grad_norm": 0.4294809401035309, "learning_rate": 2.0036516658551854e-05, "loss": 1.1381, "step": 28650 }, { "epoch": 0.9981220004173332, "grad_norm": 14.106466293334961, "learning_rate": 2.0019127773527164e-05, "loss": 0.8034, "step": 28700 }, { "epoch": 0.9998608889198025, "grad_norm": 0.30930644273757935, "learning_rate": 2.000173888850247e-05, "loss": 0.5603, "step": 28750 }, { "epoch": 1.0, "eval_accuracy": 0.982114909748548, "eval_confusion_matrix": [ [ 111604, 1 ], [ 2056, 1351 ] ], "eval_f1": 0.5677663374658541, "eval_loss": 0.9550163745880127, "eval_precision": 0.9992603550295858, "eval_recall": 0.39653654241267977, "eval_roc_auc": 0.7412989924592933, "eval_runtime": 575.5814, "eval_samples_per_second": 199.819, "eval_steps_per_second": 6.246, "step": 28754 }, { "epoch": 1.0015997774222716, "grad_norm": 0.08516507595777512, "learning_rate": 1.998435000347778e-05, "loss": 0.4621, "step": 28800 }, { "epoch": 1.003338665924741, "grad_norm": 0.29326876997947693, "learning_rate": 1.9966961118453086e-05, "loss": 0.6934, "step": 28850 }, { "epoch": 1.0050775544272101, "grad_norm": 0.37902000546455383, "learning_rate": 1.9949572233428395e-05, "loss": 0.8615, "step": 28900 }, { "epoch": 1.0068164429296793, "grad_norm": 8.411083221435547, "learning_rate": 1.9932183348403698e-05, "loss": 0.6579, "step": 28950 }, { "epoch": 1.0085553314321485, "grad_norm": 0.3825915455818176, "learning_rate": 1.9914794463379008e-05, "loss": 0.7489, "step": 29000 }, { "epoch": 1.0102942199346179, "grad_norm": 0.2835555076599121, "learning_rate": 1.9897405578354314e-05, "loss": 0.3453, "step": 29050 }, { "epoch": 1.012033108437087, "grad_norm": 0.21685755252838135, "learning_rate": 1.9880016693329623e-05, "loss": 0.4983, "step": 29100 }, { "epoch": 1.0137719969395562, "grad_norm": 0.28452444076538086, "learning_rate": 1.986262780830493e-05, "loss": 0.7893, "step": 29150 }, { "epoch": 1.0155108854420254, "grad_norm": 0.26693475246429443, "learning_rate": 1.984523892328024e-05, "loss": 0.5823, "step": 29200 }, { "epoch": 1.0172497739444948, "grad_norm": 0.3723471462726593, "learning_rate": 1.9827850038255545e-05, "loss": 0.9178, "step": 29250 }, { "epoch": 1.018988662446964, "grad_norm": 0.13660454750061035, "learning_rate": 1.9810461153230855e-05, "loss": 0.8609, "step": 29300 }, { "epoch": 1.0207275509494331, "grad_norm": 0.12723450362682343, "learning_rate": 1.979307226820616e-05, "loss": 0.6415, "step": 29350 }, { "epoch": 1.0224664394519023, "grad_norm": 8.601691246032715, "learning_rate": 1.977568338318147e-05, "loss": 0.7219, "step": 29400 }, { "epoch": 1.0242053279543715, "grad_norm": 0.36359694600105286, "learning_rate": 1.975829449815678e-05, "loss": 0.648, "step": 29450 }, { "epoch": 1.0259442164568409, "grad_norm": 14.534405708312988, "learning_rate": 1.9740905613132086e-05, "loss": 0.8246, "step": 29500 }, { "epoch": 1.02768310495931, "grad_norm": 13.929572105407715, "learning_rate": 1.9723516728107396e-05, "loss": 0.6392, "step": 29550 }, { "epoch": 1.0294219934617792, "grad_norm": 8.660381317138672, "learning_rate": 1.9706127843082702e-05, "loss": 0.8248, "step": 29600 }, { "epoch": 1.0311608819642484, "grad_norm": 0.3289220333099365, "learning_rate": 1.9688738958058012e-05, "loss": 0.4727, "step": 29650 }, { "epoch": 1.0328997704667178, "grad_norm": 0.3486472964286804, "learning_rate": 1.9671350073033318e-05, "loss": 0.8158, "step": 29700 }, { "epoch": 1.034638658969187, "grad_norm": 0.10082079470157623, "learning_rate": 1.9653961188008627e-05, "loss": 0.6535, "step": 29750 }, { "epoch": 1.0363775474716561, "grad_norm": 0.2794027626514435, "learning_rate": 1.9636572302983934e-05, "loss": 0.512, "step": 29800 }, { "epoch": 1.0381164359741253, "grad_norm": 0.34246885776519775, "learning_rate": 1.9619183417959243e-05, "loss": 0.8588, "step": 29850 }, { "epoch": 1.0398553244765945, "grad_norm": 0.30086782574653625, "learning_rate": 1.960179453293455e-05, "loss": 0.6169, "step": 29900 }, { "epoch": 1.0415942129790638, "grad_norm": 24.432544708251953, "learning_rate": 1.958440564790986e-05, "loss": 0.5997, "step": 29950 }, { "epoch": 1.043333101481533, "grad_norm": 0.11851809173822403, "learning_rate": 1.9567016762885165e-05, "loss": 0.7588, "step": 30000 }, { "epoch": 1.0450719899840022, "grad_norm": 0.30232417583465576, "learning_rate": 1.9549627877860475e-05, "loss": 0.5093, "step": 30050 }, { "epoch": 1.0468108784864714, "grad_norm": 0.26071402430534363, "learning_rate": 1.9532238992835778e-05, "loss": 0.5978, "step": 30100 }, { "epoch": 1.0485497669889408, "grad_norm": 15.012554168701172, "learning_rate": 1.9514850107811087e-05, "loss": 0.8486, "step": 30150 }, { "epoch": 1.05028865549141, "grad_norm": 0.2790570557117462, "learning_rate": 1.9497461222786393e-05, "loss": 0.5703, "step": 30200 }, { "epoch": 1.052027543993879, "grad_norm": 16.060821533203125, "learning_rate": 1.9480072337761703e-05, "loss": 0.4095, "step": 30250 }, { "epoch": 1.0537664324963483, "grad_norm": 0.27592945098876953, "learning_rate": 1.946268345273701e-05, "loss": 0.6891, "step": 30300 }, { "epoch": 1.0555053209988174, "grad_norm": 14.645027160644531, "learning_rate": 1.944529456771232e-05, "loss": 0.7787, "step": 30350 }, { "epoch": 1.0572442095012868, "grad_norm": 14.829755783081055, "learning_rate": 1.9427905682687625e-05, "loss": 0.7898, "step": 30400 }, { "epoch": 1.058983098003756, "grad_norm": 0.47053980827331543, "learning_rate": 1.9410516797662934e-05, "loss": 0.8128, "step": 30450 }, { "epoch": 1.0607219865062252, "grad_norm": 16.38100242614746, "learning_rate": 1.939312791263824e-05, "loss": 0.6531, "step": 30500 }, { "epoch": 1.0624608750086943, "grad_norm": 14.48969554901123, "learning_rate": 1.937573902761355e-05, "loss": 0.5248, "step": 30550 }, { "epoch": 1.0641997635111637, "grad_norm": 15.1062650680542, "learning_rate": 1.9358350142588856e-05, "loss": 0.5526, "step": 30600 }, { "epoch": 1.065938652013633, "grad_norm": 0.23311901092529297, "learning_rate": 1.9340961257564166e-05, "loss": 0.36, "step": 30650 }, { "epoch": 1.067677540516102, "grad_norm": 0.2747093141078949, "learning_rate": 1.9323572372539472e-05, "loss": 0.6919, "step": 30700 }, { "epoch": 1.0694164290185713, "grad_norm": 0.3265669345855713, "learning_rate": 1.930618348751478e-05, "loss": 0.8556, "step": 30750 }, { "epoch": 1.0711553175210407, "grad_norm": 0.09320729225873947, "learning_rate": 1.9288794602490088e-05, "loss": 0.5398, "step": 30800 }, { "epoch": 1.0728942060235098, "grad_norm": 15.310491561889648, "learning_rate": 1.9271405717465397e-05, "loss": 0.7492, "step": 30850 }, { "epoch": 1.074633094525979, "grad_norm": 0.10254528373479843, "learning_rate": 1.9254016832440703e-05, "loss": 0.6258, "step": 30900 }, { "epoch": 1.0763719830284482, "grad_norm": 0.2851526439189911, "learning_rate": 1.9236627947416013e-05, "loss": 0.6352, "step": 30950 }, { "epoch": 1.0781108715309173, "grad_norm": 0.3072910010814667, "learning_rate": 1.921923906239132e-05, "loss": 0.6323, "step": 31000 }, { "epoch": 1.0798497600333867, "grad_norm": 0.3114217519760132, "learning_rate": 1.920185017736663e-05, "loss": 0.5922, "step": 31050 }, { "epoch": 1.081588648535856, "grad_norm": 0.09747115522623062, "learning_rate": 1.918446129234194e-05, "loss": 0.3978, "step": 31100 }, { "epoch": 1.083327537038325, "grad_norm": 0.22197897732257843, "learning_rate": 1.9167072407317245e-05, "loss": 0.406, "step": 31150 }, { "epoch": 1.0850664255407942, "grad_norm": 0.3350314497947693, "learning_rate": 1.9149683522292554e-05, "loss": 1.0904, "step": 31200 }, { "epoch": 1.0868053140432636, "grad_norm": 0.41657665371894836, "learning_rate": 1.913229463726786e-05, "loss": 0.8445, "step": 31250 }, { "epoch": 1.0885442025457328, "grad_norm": 0.09769626706838608, "learning_rate": 1.9114905752243166e-05, "loss": 0.488, "step": 31300 }, { "epoch": 1.090283091048202, "grad_norm": 0.2977975606918335, "learning_rate": 1.9097516867218473e-05, "loss": 0.5684, "step": 31350 }, { "epoch": 1.0920219795506712, "grad_norm": 0.23959672451019287, "learning_rate": 1.9080127982193782e-05, "loss": 0.3966, "step": 31400 }, { "epoch": 1.0937608680531405, "grad_norm": 0.24441972374916077, "learning_rate": 1.906273909716909e-05, "loss": 0.5769, "step": 31450 }, { "epoch": 1.0954997565556097, "grad_norm": 16.450084686279297, "learning_rate": 1.9045350212144398e-05, "loss": 0.343, "step": 31500 }, { "epoch": 1.0972386450580789, "grad_norm": 0.19125257432460785, "learning_rate": 1.9027961327119704e-05, "loss": 0.5118, "step": 31550 }, { "epoch": 1.098977533560548, "grad_norm": 0.2651856541633606, "learning_rate": 1.9010572442095014e-05, "loss": 0.8553, "step": 31600 }, { "epoch": 1.1007164220630172, "grad_norm": 0.04341524839401245, "learning_rate": 1.899318355707032e-05, "loss": 0.4357, "step": 31650 }, { "epoch": 1.1024553105654866, "grad_norm": 0.221459299325943, "learning_rate": 1.897579467204563e-05, "loss": 0.4718, "step": 31700 }, { "epoch": 1.1041941990679558, "grad_norm": 0.2803959846496582, "learning_rate": 1.8958405787020936e-05, "loss": 0.7545, "step": 31750 }, { "epoch": 1.105933087570425, "grad_norm": 0.30461427569389343, "learning_rate": 1.8941016901996245e-05, "loss": 0.7112, "step": 31800 }, { "epoch": 1.1076719760728941, "grad_norm": 0.35453692078590393, "learning_rate": 1.892362801697155e-05, "loss": 0.6557, "step": 31850 }, { "epoch": 1.1094108645753635, "grad_norm": 0.3814002573490143, "learning_rate": 1.890623913194686e-05, "loss": 0.7952, "step": 31900 }, { "epoch": 1.1111497530778327, "grad_norm": 0.34574055671691895, "learning_rate": 1.8888850246922167e-05, "loss": 0.5486, "step": 31950 }, { "epoch": 1.1128886415803019, "grad_norm": 6.529436111450195, "learning_rate": 1.8871461361897477e-05, "loss": 0.5088, "step": 32000 }, { "epoch": 1.114627530082771, "grad_norm": 0.4123043715953827, "learning_rate": 1.8854072476872783e-05, "loss": 1.0571, "step": 32050 }, { "epoch": 1.1163664185852402, "grad_norm": 2.087681531906128, "learning_rate": 1.8836683591848092e-05, "loss": 0.6169, "step": 32100 }, { "epoch": 1.1181053070877096, "grad_norm": 0.37604308128356934, "learning_rate": 1.88192947068234e-05, "loss": 0.7686, "step": 32150 }, { "epoch": 1.1198441955901788, "grad_norm": 0.3209399878978729, "learning_rate": 1.8801905821798708e-05, "loss": 0.556, "step": 32200 }, { "epoch": 1.121583084092648, "grad_norm": 0.35963577032089233, "learning_rate": 1.8784516936774014e-05, "loss": 0.7382, "step": 32250 }, { "epoch": 1.1233219725951171, "grad_norm": 0.3451351821422577, "learning_rate": 1.8767128051749324e-05, "loss": 0.8777, "step": 32300 }, { "epoch": 1.1250608610975865, "grad_norm": 0.34873807430267334, "learning_rate": 1.874973916672463e-05, "loss": 0.5063, "step": 32350 }, { "epoch": 1.1267997496000557, "grad_norm": 14.675960540771484, "learning_rate": 1.873235028169994e-05, "loss": 0.7736, "step": 32400 }, { "epoch": 1.1285386381025249, "grad_norm": 14.545536041259766, "learning_rate": 1.8714961396675246e-05, "loss": 0.9096, "step": 32450 }, { "epoch": 1.130277526604994, "grad_norm": 0.13444770872592926, "learning_rate": 1.8697572511650552e-05, "loss": 0.5341, "step": 32500 }, { "epoch": 1.1320164151074632, "grad_norm": 0.3244789242744446, "learning_rate": 1.8680183626625858e-05, "loss": 0.6683, "step": 32550 }, { "epoch": 1.1337553036099326, "grad_norm": 0.3180435597896576, "learning_rate": 1.8662794741601168e-05, "loss": 0.5847, "step": 32600 }, { "epoch": 1.1354941921124018, "grad_norm": 0.3736891448497772, "learning_rate": 1.8645405856576477e-05, "loss": 0.7776, "step": 32650 }, { "epoch": 1.137233080614871, "grad_norm": 0.43643301725387573, "learning_rate": 1.8628016971551784e-05, "loss": 0.8416, "step": 32700 }, { "epoch": 1.13897196911734, "grad_norm": 0.36014387011528015, "learning_rate": 1.8610628086527093e-05, "loss": 0.5323, "step": 32750 }, { "epoch": 1.1407108576198095, "grad_norm": 14.775834083557129, "learning_rate": 1.85932392015024e-05, "loss": 0.661, "step": 32800 }, { "epoch": 1.1424497461222787, "grad_norm": 0.09107530117034912, "learning_rate": 1.857585031647771e-05, "loss": 0.6315, "step": 32850 }, { "epoch": 1.1441886346247478, "grad_norm": 0.3153896629810333, "learning_rate": 1.8558461431453015e-05, "loss": 0.5319, "step": 32900 }, { "epoch": 1.145927523127217, "grad_norm": 14.836748123168945, "learning_rate": 1.8541072546428325e-05, "loss": 0.6734, "step": 32950 }, { "epoch": 1.1476664116296864, "grad_norm": 0.31663376092910767, "learning_rate": 1.852368366140363e-05, "loss": 0.5722, "step": 33000 }, { "epoch": 1.1494053001321556, "grad_norm": 0.3627249300479889, "learning_rate": 1.850629477637894e-05, "loss": 0.8279, "step": 33050 }, { "epoch": 1.1511441886346248, "grad_norm": 0.2982751131057739, "learning_rate": 1.8488905891354247e-05, "loss": 0.5786, "step": 33100 }, { "epoch": 1.152883077137094, "grad_norm": 15.644326210021973, "learning_rate": 1.8471517006329556e-05, "loss": 0.639, "step": 33150 }, { "epoch": 1.154621965639563, "grad_norm": 0.2834921181201935, "learning_rate": 1.8454128121304862e-05, "loss": 0.5593, "step": 33200 }, { "epoch": 1.1563608541420325, "grad_norm": 0.27426356077194214, "learning_rate": 1.8436739236280172e-05, "loss": 0.5795, "step": 33250 }, { "epoch": 1.1580997426445017, "grad_norm": 0.2847076952457428, "learning_rate": 1.8419350351255478e-05, "loss": 0.6434, "step": 33300 }, { "epoch": 1.1598386311469708, "grad_norm": 14.33848762512207, "learning_rate": 1.8401961466230788e-05, "loss": 0.7375, "step": 33350 }, { "epoch": 1.16157751964944, "grad_norm": 17.48409652709961, "learning_rate": 1.8384572581206094e-05, "loss": 0.5501, "step": 33400 }, { "epoch": 1.1633164081519092, "grad_norm": 0.2671235203742981, "learning_rate": 1.8367183696181403e-05, "loss": 0.5867, "step": 33450 }, { "epoch": 1.1650552966543786, "grad_norm": 0.27466458082199097, "learning_rate": 1.834979481115671e-05, "loss": 0.6892, "step": 33500 }, { "epoch": 1.1667941851568477, "grad_norm": 0.3541955351829529, "learning_rate": 1.833240592613202e-05, "loss": 0.7226, "step": 33550 }, { "epoch": 1.168533073659317, "grad_norm": 0.2778860032558441, "learning_rate": 1.8315017041107325e-05, "loss": 0.4639, "step": 33600 }, { "epoch": 1.1702719621617863, "grad_norm": 0.2797551155090332, "learning_rate": 1.829762815608263e-05, "loss": 0.7151, "step": 33650 }, { "epoch": 1.1720108506642555, "grad_norm": 0.7543402314186096, "learning_rate": 1.8280239271057938e-05, "loss": 0.8114, "step": 33700 }, { "epoch": 1.1737497391667246, "grad_norm": 0.3373079001903534, "learning_rate": 1.8262850386033247e-05, "loss": 0.6497, "step": 33750 }, { "epoch": 1.1754886276691938, "grad_norm": 14.84599494934082, "learning_rate": 1.8245461501008553e-05, "loss": 0.671, "step": 33800 }, { "epoch": 1.177227516171663, "grad_norm": 13.96418571472168, "learning_rate": 1.8228072615983863e-05, "loss": 0.735, "step": 33850 }, { "epoch": 1.1789664046741324, "grad_norm": 0.3411484956741333, "learning_rate": 1.821068373095917e-05, "loss": 0.5259, "step": 33900 }, { "epoch": 1.1807052931766016, "grad_norm": 0.343717485666275, "learning_rate": 1.819329484593448e-05, "loss": 0.7925, "step": 33950 }, { "epoch": 1.1824441816790707, "grad_norm": 0.32576248049736023, "learning_rate": 1.8175905960909785e-05, "loss": 0.662, "step": 34000 }, { "epoch": 1.18418307018154, "grad_norm": 0.454975962638855, "learning_rate": 1.8158517075885094e-05, "loss": 1.0209, "step": 34050 }, { "epoch": 1.185921958684009, "grad_norm": 14.128923416137695, "learning_rate": 1.81411281908604e-05, "loss": 0.6367, "step": 34100 }, { "epoch": 1.1876608471864785, "grad_norm": 14.307365417480469, "learning_rate": 1.812373930583571e-05, "loss": 0.7241, "step": 34150 }, { "epoch": 1.1893997356889476, "grad_norm": 13.707566261291504, "learning_rate": 1.8106350420811016e-05, "loss": 0.6085, "step": 34200 }, { "epoch": 1.1911386241914168, "grad_norm": 14.312888145446777, "learning_rate": 1.8088961535786326e-05, "loss": 0.9793, "step": 34250 }, { "epoch": 1.192877512693886, "grad_norm": 14.456464767456055, "learning_rate": 1.8071572650761636e-05, "loss": 0.5922, "step": 34300 }, { "epoch": 1.1946164011963554, "grad_norm": 0.27077430486679077, "learning_rate": 1.8054183765736942e-05, "loss": 0.4425, "step": 34350 }, { "epoch": 1.1963552896988245, "grad_norm": 0.3977102041244507, "learning_rate": 1.803679488071225e-05, "loss": 1.0169, "step": 34400 }, { "epoch": 1.1980941782012937, "grad_norm": 0.41692113876342773, "learning_rate": 1.8019405995687557e-05, "loss": 0.7468, "step": 34450 }, { "epoch": 1.1998330667037629, "grad_norm": 0.3784615695476532, "learning_rate": 1.8002017110662867e-05, "loss": 0.6576, "step": 34500 }, { "epoch": 1.2015719552062323, "grad_norm": 0.2975335717201233, "learning_rate": 1.7984628225638173e-05, "loss": 0.52, "step": 34550 }, { "epoch": 1.2033108437087014, "grad_norm": 0.30894696712493896, "learning_rate": 1.7967239340613483e-05, "loss": 0.5959, "step": 34600 }, { "epoch": 1.2050497322111706, "grad_norm": 0.31338492035865784, "learning_rate": 1.794985045558879e-05, "loss": 0.7967, "step": 34650 }, { "epoch": 1.2067886207136398, "grad_norm": 0.33010831475257874, "learning_rate": 1.79324615705641e-05, "loss": 0.6549, "step": 34700 }, { "epoch": 1.208527509216109, "grad_norm": 0.3093559741973877, "learning_rate": 1.7915072685539405e-05, "loss": 0.7798, "step": 34750 }, { "epoch": 1.2102663977185784, "grad_norm": 0.29810765385627747, "learning_rate": 1.789768380051471e-05, "loss": 0.6652, "step": 34800 }, { "epoch": 1.2120052862210475, "grad_norm": 0.32161569595336914, "learning_rate": 1.7880294915490017e-05, "loss": 0.6595, "step": 34850 }, { "epoch": 1.2137441747235167, "grad_norm": 0.3111821711063385, "learning_rate": 1.7862906030465327e-05, "loss": 0.5941, "step": 34900 }, { "epoch": 1.2154830632259859, "grad_norm": 0.358663409948349, "learning_rate": 1.7845517145440633e-05, "loss": 0.7139, "step": 34950 }, { "epoch": 1.2172219517284553, "grad_norm": 0.11144039779901505, "learning_rate": 1.7828128260415942e-05, "loss": 0.8004, "step": 35000 }, { "epoch": 1.2189608402309244, "grad_norm": 0.35516905784606934, "learning_rate": 1.781073937539125e-05, "loss": 0.6303, "step": 35050 }, { "epoch": 1.2206997287333936, "grad_norm": 0.10562097281217575, "learning_rate": 1.7793350490366558e-05, "loss": 0.7705, "step": 35100 }, { "epoch": 1.2224386172358628, "grad_norm": 0.3768903911113739, "learning_rate": 1.7775961605341864e-05, "loss": 0.6948, "step": 35150 }, { "epoch": 1.2241775057383322, "grad_norm": 14.387212753295898, "learning_rate": 1.7758572720317174e-05, "loss": 0.6481, "step": 35200 }, { "epoch": 1.2259163942408013, "grad_norm": 0.37545278668403625, "learning_rate": 1.774118383529248e-05, "loss": 0.8161, "step": 35250 }, { "epoch": 1.2276552827432705, "grad_norm": 0.33890387415885925, "learning_rate": 1.772379495026779e-05, "loss": 0.5786, "step": 35300 }, { "epoch": 1.2293941712457397, "grad_norm": 0.23463213443756104, "learning_rate": 1.7706406065243096e-05, "loss": 0.3011, "step": 35350 }, { "epoch": 1.2311330597482089, "grad_norm": 0.3072350025177002, "learning_rate": 1.7689017180218405e-05, "loss": 1.0103, "step": 35400 }, { "epoch": 1.2328719482506783, "grad_norm": 0.04887990653514862, "learning_rate": 1.767162829519371e-05, "loss": 0.4537, "step": 35450 }, { "epoch": 1.2346108367531474, "grad_norm": 14.86677360534668, "learning_rate": 1.765423941016902e-05, "loss": 1.0052, "step": 35500 }, { "epoch": 1.2363497252556166, "grad_norm": 0.3971104621887207, "learning_rate": 1.7636850525144327e-05, "loss": 0.7324, "step": 35550 }, { "epoch": 1.2380886137580858, "grad_norm": 0.10992255061864853, "learning_rate": 1.7619461640119637e-05, "loss": 0.4829, "step": 35600 }, { "epoch": 1.239827502260555, "grad_norm": 14.975232124328613, "learning_rate": 1.7602072755094943e-05, "loss": 0.692, "step": 35650 }, { "epoch": 1.2415663907630243, "grad_norm": 0.12857475876808167, "learning_rate": 1.7584683870070253e-05, "loss": 0.751, "step": 35700 }, { "epoch": 1.2433052792654935, "grad_norm": 0.05928495526313782, "learning_rate": 1.756729498504556e-05, "loss": 0.6304, "step": 35750 }, { "epoch": 1.2450441677679627, "grad_norm": 0.2798721492290497, "learning_rate": 1.754990610002087e-05, "loss": 0.5362, "step": 35800 }, { "epoch": 1.2467830562704318, "grad_norm": 14.585831642150879, "learning_rate": 1.7532517214996178e-05, "loss": 0.7901, "step": 35850 }, { "epoch": 1.2485219447729012, "grad_norm": 0.08445936441421509, "learning_rate": 1.7515128329971484e-05, "loss": 0.5465, "step": 35900 }, { "epoch": 1.2502608332753704, "grad_norm": 0.31010863184928894, "learning_rate": 1.7497739444946794e-05, "loss": 0.8325, "step": 35950 }, { "epoch": 1.2519997217778396, "grad_norm": 0.24487081170082092, "learning_rate": 1.7480350559922096e-05, "loss": 0.3713, "step": 36000 }, { "epoch": 1.2537386102803088, "grad_norm": 0.21312686800956726, "learning_rate": 1.7462961674897406e-05, "loss": 0.4869, "step": 36050 }, { "epoch": 1.2554774987827781, "grad_norm": 0.23699572682380676, "learning_rate": 1.7445572789872712e-05, "loss": 0.7539, "step": 36100 }, { "epoch": 1.2572163872852473, "grad_norm": 15.472735404968262, "learning_rate": 1.7428183904848022e-05, "loss": 0.5901, "step": 36150 }, { "epoch": 1.2589552757877165, "grad_norm": 0.2673361599445343, "learning_rate": 1.7410795019823328e-05, "loss": 0.6833, "step": 36200 }, { "epoch": 1.2606941642901857, "grad_norm": 0.2803460657596588, "learning_rate": 1.7393406134798638e-05, "loss": 0.5805, "step": 36250 }, { "epoch": 1.2624330527926548, "grad_norm": 0.2268855720758438, "learning_rate": 1.7376017249773944e-05, "loss": 0.4409, "step": 36300 }, { "epoch": 1.2641719412951242, "grad_norm": 15.459443092346191, "learning_rate": 1.7358628364749253e-05, "loss": 0.5989, "step": 36350 }, { "epoch": 1.2659108297975934, "grad_norm": 0.28569093346595764, "learning_rate": 1.734123947972456e-05, "loss": 0.7865, "step": 36400 }, { "epoch": 1.2676497183000626, "grad_norm": 0.32493311166763306, "learning_rate": 1.732385059469987e-05, "loss": 0.7739, "step": 36450 }, { "epoch": 1.269388606802532, "grad_norm": 0.31225457787513733, "learning_rate": 1.7306461709675175e-05, "loss": 0.6539, "step": 36500 }, { "epoch": 1.271127495305001, "grad_norm": 14.639530181884766, "learning_rate": 1.7289072824650485e-05, "loss": 0.844, "step": 36550 }, { "epoch": 1.2728663838074703, "grad_norm": 0.12661844491958618, "learning_rate": 1.727168393962579e-05, "loss": 0.8738, "step": 36600 }, { "epoch": 1.2746052723099395, "grad_norm": 0.34045031666755676, "learning_rate": 1.72542950546011e-05, "loss": 0.5336, "step": 36650 }, { "epoch": 1.2763441608124086, "grad_norm": 0.3881280720233917, "learning_rate": 1.7236906169576407e-05, "loss": 0.866, "step": 36700 }, { "epoch": 1.278083049314878, "grad_norm": 14.286499977111816, "learning_rate": 1.7219517284551716e-05, "loss": 0.7839, "step": 36750 }, { "epoch": 1.2798219378173472, "grad_norm": 17.39789581298828, "learning_rate": 1.7202128399527022e-05, "loss": 0.468, "step": 36800 }, { "epoch": 1.2815608263198164, "grad_norm": 0.32230138778686523, "learning_rate": 1.7184739514502332e-05, "loss": 0.7632, "step": 36850 }, { "epoch": 1.2832997148222856, "grad_norm": 0.28172898292541504, "learning_rate": 1.7167350629477638e-05, "loss": 0.4342, "step": 36900 }, { "epoch": 1.2850386033247547, "grad_norm": 0.32459765672683716, "learning_rate": 1.7149961744452948e-05, "loss": 0.8693, "step": 36950 }, { "epoch": 1.2867774918272241, "grad_norm": 0.3148864209651947, "learning_rate": 1.7132572859428254e-05, "loss": 0.6831, "step": 37000 }, { "epoch": 1.2885163803296933, "grad_norm": 0.24627991020679474, "learning_rate": 1.7115183974403563e-05, "loss": 0.3384, "step": 37050 }, { "epoch": 1.2902552688321625, "grad_norm": 0.2690008282661438, "learning_rate": 1.709779508937887e-05, "loss": 0.9343, "step": 37100 }, { "epoch": 1.2919941573346316, "grad_norm": 0.07868574559688568, "learning_rate": 1.708040620435418e-05, "loss": 0.385, "step": 37150 }, { "epoch": 1.2937330458371008, "grad_norm": 0.24632751941680908, "learning_rate": 1.7063017319329482e-05, "loss": 0.6029, "step": 37200 }, { "epoch": 1.2954719343395702, "grad_norm": 15.248940467834473, "learning_rate": 1.704562843430479e-05, "loss": 0.4851, "step": 37250 }, { "epoch": 1.2972108228420394, "grad_norm": 0.07632571458816528, "learning_rate": 1.7028239549280098e-05, "loss": 0.676, "step": 37300 }, { "epoch": 1.2989497113445085, "grad_norm": 14.113283157348633, "learning_rate": 1.7010850664255407e-05, "loss": 1.1448, "step": 37350 }, { "epoch": 1.300688599846978, "grad_norm": 14.16009521484375, "learning_rate": 1.6993461779230714e-05, "loss": 0.7483, "step": 37400 }, { "epoch": 1.302427488349447, "grad_norm": 8.325135231018066, "learning_rate": 1.6976072894206023e-05, "loss": 0.6896, "step": 37450 }, { "epoch": 1.3041663768519163, "grad_norm": 0.2641763985157013, "learning_rate": 1.6958684009181333e-05, "loss": 0.3609, "step": 37500 }, { "epoch": 1.3059052653543854, "grad_norm": 0.2351849377155304, "learning_rate": 1.694129512415664e-05, "loss": 0.6582, "step": 37550 }, { "epoch": 1.3076441538568546, "grad_norm": 0.3314284682273865, "learning_rate": 1.692390623913195e-05, "loss": 0.7713, "step": 37600 }, { "epoch": 1.309383042359324, "grad_norm": 0.32011252641677856, "learning_rate": 1.6906517354107255e-05, "loss": 0.6556, "step": 37650 }, { "epoch": 1.3111219308617932, "grad_norm": 0.35262325406074524, "learning_rate": 1.6889128469082564e-05, "loss": 0.6711, "step": 37700 }, { "epoch": 1.3128608193642624, "grad_norm": 16.903472900390625, "learning_rate": 1.687173958405787e-05, "loss": 0.8153, "step": 37750 }, { "epoch": 1.3145997078667315, "grad_norm": 0.05488205328583717, "learning_rate": 1.685435069903318e-05, "loss": 0.5562, "step": 37800 }, { "epoch": 1.3163385963692007, "grad_norm": 0.040771517902612686, "learning_rate": 1.6836961814008486e-05, "loss": 0.3446, "step": 37850 }, { "epoch": 1.31807748487167, "grad_norm": 0.17053009569644928, "learning_rate": 1.6819572928983796e-05, "loss": 0.2712, "step": 37900 }, { "epoch": 1.3198163733741393, "grad_norm": 0.19360297918319702, "learning_rate": 1.6802184043959102e-05, "loss": 0.6155, "step": 37950 }, { "epoch": 1.3215552618766084, "grad_norm": 9.876904487609863, "learning_rate": 1.678479515893441e-05, "loss": 0.5168, "step": 38000 }, { "epoch": 1.3232941503790778, "grad_norm": 0.2830142676830292, "learning_rate": 1.6767406273909718e-05, "loss": 0.9299, "step": 38050 }, { "epoch": 1.325033038881547, "grad_norm": 0.36638781428337097, "learning_rate": 1.6750017388885027e-05, "loss": 0.8832, "step": 38100 }, { "epoch": 1.3267719273840162, "grad_norm": 14.740436553955078, "learning_rate": 1.6732628503860333e-05, "loss": 0.6088, "step": 38150 }, { "epoch": 1.3285108158864853, "grad_norm": 0.359180212020874, "learning_rate": 1.6715239618835643e-05, "loss": 0.8452, "step": 38200 }, { "epoch": 1.3302497043889545, "grad_norm": 0.3864383399486542, "learning_rate": 1.669785073381095e-05, "loss": 0.7456, "step": 38250 }, { "epoch": 1.331988592891424, "grad_norm": 0.46097123622894287, "learning_rate": 1.668046184878626e-05, "loss": 0.9367, "step": 38300 }, { "epoch": 1.333727481393893, "grad_norm": 0.30760231614112854, "learning_rate": 1.666307296376156e-05, "loss": 0.4968, "step": 38350 }, { "epoch": 1.3354663698963622, "grad_norm": 0.43048593401908875, "learning_rate": 1.664568407873687e-05, "loss": 0.9311, "step": 38400 }, { "epoch": 1.3372052583988314, "grad_norm": 0.3921976387500763, "learning_rate": 1.6628295193712177e-05, "loss": 0.6349, "step": 38450 }, { "epoch": 1.3389441469013006, "grad_norm": 0.4269780218601227, "learning_rate": 1.6610906308687487e-05, "loss": 0.9158, "step": 38500 }, { "epoch": 1.34068303540377, "grad_norm": 0.3894469141960144, "learning_rate": 1.6593517423662793e-05, "loss": 0.6313, "step": 38550 }, { "epoch": 1.3424219239062392, "grad_norm": 13.977080345153809, "learning_rate": 1.6576128538638102e-05, "loss": 0.6639, "step": 38600 }, { "epoch": 1.3441608124087083, "grad_norm": 15.287317276000977, "learning_rate": 1.655873965361341e-05, "loss": 0.5728, "step": 38650 }, { "epoch": 1.3458997009111775, "grad_norm": 0.3106958866119385, "learning_rate": 1.6541350768588718e-05, "loss": 0.5267, "step": 38700 }, { "epoch": 1.3476385894136467, "grad_norm": 15.000812530517578, "learning_rate": 1.6523961883564024e-05, "loss": 0.6807, "step": 38750 }, { "epoch": 1.349377477916116, "grad_norm": 0.30887389183044434, "learning_rate": 1.6506572998539334e-05, "loss": 0.684, "step": 38800 }, { "epoch": 1.3511163664185852, "grad_norm": 0.1084451824426651, "learning_rate": 1.648918411351464e-05, "loss": 0.8619, "step": 38850 }, { "epoch": 1.3528552549210544, "grad_norm": 13.727706909179688, "learning_rate": 1.647179522848995e-05, "loss": 0.9269, "step": 38900 }, { "epoch": 1.3545941434235238, "grad_norm": 13.991558074951172, "learning_rate": 1.6454406343465256e-05, "loss": 0.7516, "step": 38950 }, { "epoch": 1.356333031925993, "grad_norm": 0.3315747082233429, "learning_rate": 1.6437017458440565e-05, "loss": 0.4582, "step": 39000 }, { "epoch": 1.3580719204284621, "grad_norm": 0.2814598083496094, "learning_rate": 1.6419628573415875e-05, "loss": 0.5906, "step": 39050 }, { "epoch": 1.3598108089309313, "grad_norm": 15.64398193359375, "learning_rate": 1.640223968839118e-05, "loss": 0.5696, "step": 39100 }, { "epoch": 1.3615496974334005, "grad_norm": 0.2522576153278351, "learning_rate": 1.638485080336649e-05, "loss": 0.6494, "step": 39150 }, { "epoch": 1.3632885859358699, "grad_norm": 8.81415843963623, "learning_rate": 1.6367461918341797e-05, "loss": 0.8638, "step": 39200 }, { "epoch": 1.365027474438339, "grad_norm": 0.3415588140487671, "learning_rate": 1.6350073033317107e-05, "loss": 0.6693, "step": 39250 }, { "epoch": 1.3667663629408082, "grad_norm": 6.174212455749512, "learning_rate": 1.6332684148292413e-05, "loss": 0.6858, "step": 39300 }, { "epoch": 1.3685052514432774, "grad_norm": 0.09783170372247696, "learning_rate": 1.6315295263267722e-05, "loss": 0.5771, "step": 39350 }, { "epoch": 1.3702441399457466, "grad_norm": 0.3193727433681488, "learning_rate": 1.629790637824303e-05, "loss": 0.6198, "step": 39400 }, { "epoch": 1.371983028448216, "grad_norm": 0.34785720705986023, "learning_rate": 1.6280517493218338e-05, "loss": 0.7339, "step": 39450 }, { "epoch": 1.3737219169506851, "grad_norm": 0.26217377185821533, "learning_rate": 1.6263128608193644e-05, "loss": 0.4622, "step": 39500 }, { "epoch": 1.3754608054531543, "grad_norm": 15.38202953338623, "learning_rate": 1.624573972316895e-05, "loss": 0.6731, "step": 39550 }, { "epoch": 1.3771996939556237, "grad_norm": 0.2318250834941864, "learning_rate": 1.6228350838144257e-05, "loss": 0.3739, "step": 39600 }, { "epoch": 1.3789385824580929, "grad_norm": 16.05558204650879, "learning_rate": 1.6210961953119566e-05, "loss": 0.8239, "step": 39650 }, { "epoch": 1.380677470960562, "grad_norm": 15.266398429870605, "learning_rate": 1.6193573068094872e-05, "loss": 0.9169, "step": 39700 }, { "epoch": 1.3824163594630312, "grad_norm": 15.554961204528809, "learning_rate": 1.6176184183070182e-05, "loss": 0.3135, "step": 39750 }, { "epoch": 1.3841552479655004, "grad_norm": 15.362130165100098, "learning_rate": 1.6158795298045488e-05, "loss": 0.7413, "step": 39800 }, { "epoch": 1.3858941364679698, "grad_norm": 0.2443486750125885, "learning_rate": 1.6141406413020798e-05, "loss": 0.4487, "step": 39850 }, { "epoch": 1.387633024970439, "grad_norm": 0.2557162642478943, "learning_rate": 1.6124017527996104e-05, "loss": 0.677, "step": 39900 }, { "epoch": 1.3893719134729081, "grad_norm": 0.26771655678749084, "learning_rate": 1.6106628642971413e-05, "loss": 0.6612, "step": 39950 }, { "epoch": 1.3911108019753773, "grad_norm": 0.34295395016670227, "learning_rate": 1.608923975794672e-05, "loss": 0.7774, "step": 40000 }, { "epoch": 1.3928496904778465, "grad_norm": 8.897124290466309, "learning_rate": 1.607185087292203e-05, "loss": 0.7174, "step": 40050 }, { "epoch": 1.3945885789803159, "grad_norm": 0.5183162689208984, "learning_rate": 1.6054461987897335e-05, "loss": 1.114, "step": 40100 }, { "epoch": 1.396327467482785, "grad_norm": 0.38652658462524414, "learning_rate": 1.6037073102872645e-05, "loss": 0.5484, "step": 40150 }, { "epoch": 1.3980663559852542, "grad_norm": 0.3219066560268402, "learning_rate": 1.601968421784795e-05, "loss": 0.5387, "step": 40200 }, { "epoch": 1.3998052444877234, "grad_norm": 0.32057735323905945, "learning_rate": 1.600229533282326e-05, "loss": 0.6595, "step": 40250 }, { "epoch": 1.4015441329901925, "grad_norm": 15.234573364257812, "learning_rate": 1.5984906447798567e-05, "loss": 0.5786, "step": 40300 }, { "epoch": 1.403283021492662, "grad_norm": 0.3357444703578949, "learning_rate": 1.5967517562773876e-05, "loss": 0.8288, "step": 40350 }, { "epoch": 1.405021909995131, "grad_norm": 0.30895742774009705, "learning_rate": 1.5950128677749183e-05, "loss": 0.5743, "step": 40400 }, { "epoch": 1.4067607984976003, "grad_norm": 0.32186660170555115, "learning_rate": 1.5932739792724492e-05, "loss": 0.7266, "step": 40450 }, { "epoch": 1.4084996870000697, "grad_norm": 14.691875457763672, "learning_rate": 1.59153509076998e-05, "loss": 0.856, "step": 40500 }, { "epoch": 1.4102385755025388, "grad_norm": 0.24888481199741364, "learning_rate": 1.5897962022675108e-05, "loss": 0.3666, "step": 40550 }, { "epoch": 1.411977464005008, "grad_norm": 0.07974103093147278, "learning_rate": 1.5880573137650414e-05, "loss": 0.6287, "step": 40600 }, { "epoch": 1.4137163525074772, "grad_norm": 0.33765289187431335, "learning_rate": 1.5863184252625724e-05, "loss": 0.9461, "step": 40650 }, { "epoch": 1.4154552410099464, "grad_norm": 0.27449631690979004, "learning_rate": 1.584579536760103e-05, "loss": 0.5003, "step": 40700 }, { "epoch": 1.4171941295124157, "grad_norm": 0.3194211423397064, "learning_rate": 1.5828406482576336e-05, "loss": 0.73, "step": 40750 }, { "epoch": 1.418933018014885, "grad_norm": 15.010588645935059, "learning_rate": 1.5811017597551646e-05, "loss": 0.6472, "step": 40800 }, { "epoch": 1.420671906517354, "grad_norm": 0.39936551451683044, "learning_rate": 1.5793628712526952e-05, "loss": 1.0075, "step": 40850 }, { "epoch": 1.4224107950198233, "grad_norm": 0.4064672291278839, "learning_rate": 1.577623982750226e-05, "loss": 0.7379, "step": 40900 }, { "epoch": 1.4241496835222924, "grad_norm": 0.4380810260772705, "learning_rate": 1.5758850942477567e-05, "loss": 0.7361, "step": 40950 }, { "epoch": 1.4258885720247618, "grad_norm": 13.805548667907715, "learning_rate": 1.5741462057452877e-05, "loss": 0.7617, "step": 41000 }, { "epoch": 1.427627460527231, "grad_norm": 0.1191515251994133, "learning_rate": 1.5724073172428183e-05, "loss": 0.5613, "step": 41050 }, { "epoch": 1.4293663490297002, "grad_norm": 0.34791848063468933, "learning_rate": 1.5706684287403493e-05, "loss": 0.6146, "step": 41100 }, { "epoch": 1.4311052375321696, "grad_norm": 0.4614674150943756, "learning_rate": 1.56892954023788e-05, "loss": 1.1374, "step": 41150 }, { "epoch": 1.4328441260346387, "grad_norm": 13.183795928955078, "learning_rate": 1.567190651735411e-05, "loss": 0.9729, "step": 41200 }, { "epoch": 1.434583014537108, "grad_norm": 0.16514423489570618, "learning_rate": 1.5654517632329415e-05, "loss": 0.7315, "step": 41250 }, { "epoch": 1.436321903039577, "grad_norm": 0.4884890615940094, "learning_rate": 1.5637128747304724e-05, "loss": 0.8402, "step": 41300 }, { "epoch": 1.4380607915420462, "grad_norm": 0.4369771182537079, "learning_rate": 1.561973986228003e-05, "loss": 0.7743, "step": 41350 }, { "epoch": 1.4397996800445156, "grad_norm": 0.3776700496673584, "learning_rate": 1.560235097725534e-05, "loss": 0.6146, "step": 41400 }, { "epoch": 1.4415385685469848, "grad_norm": 0.36948996782302856, "learning_rate": 1.5584962092230646e-05, "loss": 0.58, "step": 41450 }, { "epoch": 1.443277457049454, "grad_norm": 0.3505516052246094, "learning_rate": 1.5567573207205956e-05, "loss": 0.7235, "step": 41500 }, { "epoch": 1.4450163455519232, "grad_norm": 8.033348083496094, "learning_rate": 1.5550184322181262e-05, "loss": 0.9956, "step": 41550 }, { "epoch": 1.4467552340543923, "grad_norm": 0.4829118549823761, "learning_rate": 1.553279543715657e-05, "loss": 0.9302, "step": 41600 }, { "epoch": 1.4484941225568617, "grad_norm": 0.36253422498703003, "learning_rate": 1.5515406552131878e-05, "loss": 0.3912, "step": 41650 }, { "epoch": 1.450233011059331, "grad_norm": 0.10378283262252808, "learning_rate": 1.5498017667107187e-05, "loss": 0.6036, "step": 41700 }, { "epoch": 1.4519718995618, "grad_norm": 0.261432021856308, "learning_rate": 1.5480628782082493e-05, "loss": 0.6062, "step": 41750 }, { "epoch": 1.4537107880642695, "grad_norm": 15.028236389160156, "learning_rate": 1.5463239897057803e-05, "loss": 0.6901, "step": 41800 }, { "epoch": 1.4554496765667384, "grad_norm": 14.419363975524902, "learning_rate": 1.544585101203311e-05, "loss": 0.7019, "step": 41850 }, { "epoch": 1.4571885650692078, "grad_norm": 0.3258010149002075, "learning_rate": 1.5428462127008415e-05, "loss": 0.5798, "step": 41900 }, { "epoch": 1.458927453571677, "grad_norm": 15.359526634216309, "learning_rate": 1.541107324198372e-05, "loss": 0.8554, "step": 41950 }, { "epoch": 1.4606663420741461, "grad_norm": 8.816237449645996, "learning_rate": 1.539368435695903e-05, "loss": 0.362, "step": 42000 }, { "epoch": 1.4624052305766155, "grad_norm": 0.29025977849960327, "learning_rate": 1.5376295471934337e-05, "loss": 0.6248, "step": 42050 }, { "epoch": 1.4641441190790847, "grad_norm": 14.948585510253906, "learning_rate": 1.5358906586909647e-05, "loss": 0.7123, "step": 42100 }, { "epoch": 1.4658830075815539, "grad_norm": 0.1228952631354332, "learning_rate": 1.5341517701884953e-05, "loss": 0.829, "step": 42150 }, { "epoch": 1.467621896084023, "grad_norm": 0.37505483627319336, "learning_rate": 1.5324128816860263e-05, "loss": 0.6005, "step": 42200 }, { "epoch": 1.4693607845864922, "grad_norm": 0.12754780054092407, "learning_rate": 1.5306739931835572e-05, "loss": 0.8861, "step": 42250 }, { "epoch": 1.4710996730889616, "grad_norm": 0.35545504093170166, "learning_rate": 1.528935104681088e-05, "loss": 0.5714, "step": 42300 }, { "epoch": 1.4728385615914308, "grad_norm": 0.38497495651245117, "learning_rate": 1.5271962161786188e-05, "loss": 0.8214, "step": 42350 }, { "epoch": 1.4745774500939, "grad_norm": 0.5260298252105713, "learning_rate": 1.5254573276761494e-05, "loss": 1.0007, "step": 42400 }, { "epoch": 1.4763163385963691, "grad_norm": 14.432426452636719, "learning_rate": 1.5237184391736804e-05, "loss": 0.5254, "step": 42450 }, { "epoch": 1.4780552270988383, "grad_norm": 0.3723883628845215, "learning_rate": 1.521979550671211e-05, "loss": 0.7319, "step": 42500 }, { "epoch": 1.4797941156013077, "grad_norm": 0.12767699360847473, "learning_rate": 1.520240662168742e-05, "loss": 0.8943, "step": 42550 }, { "epoch": 1.4815330041037769, "grad_norm": 0.40357959270477295, "learning_rate": 1.5185017736662726e-05, "loss": 0.6638, "step": 42600 }, { "epoch": 1.483271892606246, "grad_norm": 14.842057228088379, "learning_rate": 1.5167628851638035e-05, "loss": 0.6043, "step": 42650 }, { "epoch": 1.4850107811087154, "grad_norm": 0.33560484647750854, "learning_rate": 1.5150239966613341e-05, "loss": 0.5475, "step": 42700 }, { "epoch": 1.4867496696111846, "grad_norm": 0.2995792627334595, "learning_rate": 1.513285108158865e-05, "loss": 0.4985, "step": 42750 }, { "epoch": 1.4884885581136538, "grad_norm": 0.3286113142967224, "learning_rate": 1.5115462196563955e-05, "loss": 0.8474, "step": 42800 }, { "epoch": 1.490227446616123, "grad_norm": 8.44316291809082, "learning_rate": 1.5098073311539265e-05, "loss": 0.7604, "step": 42850 }, { "epoch": 1.4919663351185921, "grad_norm": 14.119556427001953, "learning_rate": 1.5080684426514571e-05, "loss": 0.9391, "step": 42900 }, { "epoch": 1.4937052236210615, "grad_norm": 0.4502834975719452, "learning_rate": 1.506329554148988e-05, "loss": 0.7013, "step": 42950 }, { "epoch": 1.4954441121235307, "grad_norm": 0.3132372498512268, "learning_rate": 1.5045906656465187e-05, "loss": 0.5375, "step": 43000 }, { "epoch": 1.4971830006259998, "grad_norm": 0.11868192255496979, "learning_rate": 1.5028517771440496e-05, "loss": 0.7506, "step": 43050 }, { "epoch": 1.498921889128469, "grad_norm": 0.29090994596481323, "learning_rate": 1.5011128886415803e-05, "loss": 0.5297, "step": 43100 }, { "epoch": 1.5006607776309382, "grad_norm": 0.3990301787853241, "learning_rate": 1.4993740001391112e-05, "loss": 1.0468, "step": 43150 }, { "epoch": 1.5023996661334076, "grad_norm": 0.1082477867603302, "learning_rate": 1.497635111636642e-05, "loss": 0.5074, "step": 43200 }, { "epoch": 1.5041385546358768, "grad_norm": 0.31045249104499817, "learning_rate": 1.4958962231341728e-05, "loss": 0.5919, "step": 43250 }, { "epoch": 1.505877443138346, "grad_norm": 0.2740370035171509, "learning_rate": 1.4941573346317036e-05, "loss": 0.5356, "step": 43300 }, { "epoch": 1.5076163316408153, "grad_norm": 0.2750259339809418, "learning_rate": 1.4924184461292342e-05, "loss": 0.6114, "step": 43350 }, { "epoch": 1.5093552201432843, "grad_norm": 16.168596267700195, "learning_rate": 1.490679557626765e-05, "loss": 0.4787, "step": 43400 }, { "epoch": 1.5110941086457537, "grad_norm": 0.2125965654850006, "learning_rate": 1.4889406691242958e-05, "loss": 0.3832, "step": 43450 }, { "epoch": 1.5128329971482228, "grad_norm": 19.162761688232422, "learning_rate": 1.4872017806218266e-05, "loss": 0.7238, "step": 43500 }, { "epoch": 1.514571885650692, "grad_norm": 0.24151895940303802, "learning_rate": 1.4854628921193574e-05, "loss": 0.5732, "step": 43550 }, { "epoch": 1.5163107741531614, "grad_norm": 0.29702919721603394, "learning_rate": 1.4837240036168881e-05, "loss": 0.7439, "step": 43600 }, { "epoch": 1.5180496626556306, "grad_norm": 0.2624787986278534, "learning_rate": 1.481985115114419e-05, "loss": 0.552, "step": 43650 }, { "epoch": 1.5197885511580997, "grad_norm": 0.37746384739875793, "learning_rate": 1.4802462266119497e-05, "loss": 0.9858, "step": 43700 }, { "epoch": 1.521527439660569, "grad_norm": 14.887701988220215, "learning_rate": 1.4785073381094805e-05, "loss": 0.6507, "step": 43750 }, { "epoch": 1.523266328163038, "grad_norm": 0.11427232623100281, "learning_rate": 1.4767684496070113e-05, "loss": 0.7853, "step": 43800 }, { "epoch": 1.5250052166655075, "grad_norm": 0.29017579555511475, "learning_rate": 1.475029561104542e-05, "loss": 0.4396, "step": 43850 }, { "epoch": 1.5267441051679767, "grad_norm": 0.35939788818359375, "learning_rate": 1.4732906726020729e-05, "loss": 0.9105, "step": 43900 }, { "epoch": 1.5284829936704458, "grad_norm": 0.2940393090248108, "learning_rate": 1.4715517840996035e-05, "loss": 0.4235, "step": 43950 }, { "epoch": 1.5302218821729152, "grad_norm": 0.27985110878944397, "learning_rate": 1.4698128955971343e-05, "loss": 0.5442, "step": 44000 }, { "epoch": 1.5319607706753842, "grad_norm": 0.36589476466178894, "learning_rate": 1.468074007094665e-05, "loss": 1.004, "step": 44050 }, { "epoch": 1.5336996591778536, "grad_norm": 0.34856584668159485, "learning_rate": 1.4663351185921958e-05, "loss": 0.5781, "step": 44100 }, { "epoch": 1.5354385476803227, "grad_norm": 17.036237716674805, "learning_rate": 1.4645962300897266e-05, "loss": 0.7726, "step": 44150 }, { "epoch": 1.537177436182792, "grad_norm": 0.2964613735675812, "learning_rate": 1.4628573415872574e-05, "loss": 0.4642, "step": 44200 }, { "epoch": 1.5389163246852613, "grad_norm": 15.98739242553711, "learning_rate": 1.4611184530847882e-05, "loss": 0.4056, "step": 44250 }, { "epoch": 1.5406552131877302, "grad_norm": 0.2532614469528198, "learning_rate": 1.459379564582319e-05, "loss": 0.6505, "step": 44300 }, { "epoch": 1.5423941016901996, "grad_norm": 14.943839073181152, "learning_rate": 1.4576406760798498e-05, "loss": 0.8373, "step": 44350 }, { "epoch": 1.5441329901926688, "grad_norm": 0.28259339928627014, "learning_rate": 1.4559017875773806e-05, "loss": 0.5138, "step": 44400 }, { "epoch": 1.545871878695138, "grad_norm": 14.82338809967041, "learning_rate": 1.4541628990749114e-05, "loss": 0.7511, "step": 44450 }, { "epoch": 1.5476107671976074, "grad_norm": 13.80994701385498, "learning_rate": 1.4524240105724421e-05, "loss": 1.0056, "step": 44500 }, { "epoch": 1.5493496557000765, "grad_norm": 0.053489260375499725, "learning_rate": 1.4506851220699728e-05, "loss": 0.3762, "step": 44550 }, { "epoch": 1.5510885442025457, "grad_norm": 0.35368120670318604, "learning_rate": 1.4489462335675035e-05, "loss": 1.0054, "step": 44600 }, { "epoch": 1.552827432705015, "grad_norm": 14.929844856262207, "learning_rate": 1.4472073450650345e-05, "loss": 0.5457, "step": 44650 }, { "epoch": 1.554566321207484, "grad_norm": 14.032330513000488, "learning_rate": 1.4454684565625653e-05, "loss": 0.8987, "step": 44700 }, { "epoch": 1.5563052097099535, "grad_norm": 8.723248481750488, "learning_rate": 1.443729568060096e-05, "loss": 0.7021, "step": 44750 }, { "epoch": 1.5580440982124226, "grad_norm": 0.33651769161224365, "learning_rate": 1.4419906795576269e-05, "loss": 0.68, "step": 44800 }, { "epoch": 1.5597829867148918, "grad_norm": 0.31530413031578064, "learning_rate": 1.4402517910551577e-05, "loss": 0.6214, "step": 44850 }, { "epoch": 1.5615218752173612, "grad_norm": 0.26836153864860535, "learning_rate": 1.4385129025526884e-05, "loss": 0.4758, "step": 44900 }, { "epoch": 1.5632607637198301, "grad_norm": 0.31396177411079407, "learning_rate": 1.4367740140502192e-05, "loss": 0.8163, "step": 44950 }, { "epoch": 1.5649996522222995, "grad_norm": 15.323482513427734, "learning_rate": 1.43503512554775e-05, "loss": 0.5938, "step": 45000 }, { "epoch": 1.5667385407247687, "grad_norm": 0.08455043286085129, "learning_rate": 1.4332962370452808e-05, "loss": 0.6469, "step": 45050 }, { "epoch": 1.5684774292272379, "grad_norm": 0.271321564912796, "learning_rate": 1.4315573485428114e-05, "loss": 0.6423, "step": 45100 }, { "epoch": 1.5702163177297073, "grad_norm": 0.32453733682632446, "learning_rate": 1.4298184600403422e-05, "loss": 0.71, "step": 45150 }, { "epoch": 1.5719552062321764, "grad_norm": 0.3193889260292053, "learning_rate": 1.428079571537873e-05, "loss": 0.757, "step": 45200 }, { "epoch": 1.5736940947346456, "grad_norm": 0.3245013952255249, "learning_rate": 1.4263406830354038e-05, "loss": 0.7284, "step": 45250 }, { "epoch": 1.5754329832371148, "grad_norm": 0.40651944279670715, "learning_rate": 1.4246017945329346e-05, "loss": 0.8446, "step": 45300 }, { "epoch": 1.577171871739584, "grad_norm": 0.354087769985199, "learning_rate": 1.4228629060304654e-05, "loss": 0.6855, "step": 45350 }, { "epoch": 1.5789107602420533, "grad_norm": 0.35171443223953247, "learning_rate": 1.4211240175279961e-05, "loss": 0.6399, "step": 45400 }, { "epoch": 1.5806496487445225, "grad_norm": 0.10524750500917435, "learning_rate": 1.419385129025527e-05, "loss": 0.7575, "step": 45450 }, { "epoch": 1.5823885372469917, "grad_norm": 0.3319125175476074, "learning_rate": 1.4176462405230577e-05, "loss": 0.4392, "step": 45500 }, { "epoch": 1.584127425749461, "grad_norm": 14.415853500366211, "learning_rate": 1.4159073520205885e-05, "loss": 0.8657, "step": 45550 }, { "epoch": 1.58586631425193, "grad_norm": 0.11617577821016312, "learning_rate": 1.4141684635181193e-05, "loss": 0.5994, "step": 45600 }, { "epoch": 1.5876052027543994, "grad_norm": 0.38045790791511536, "learning_rate": 1.41242957501565e-05, "loss": 0.8175, "step": 45650 }, { "epoch": 1.5893440912568686, "grad_norm": 0.3355318009853363, "learning_rate": 1.4106906865131807e-05, "loss": 0.6465, "step": 45700 }, { "epoch": 1.5910829797593378, "grad_norm": 13.3422212600708, "learning_rate": 1.4089517980107115e-05, "loss": 0.9759, "step": 45750 }, { "epoch": 1.5928218682618072, "grad_norm": 0.4472561478614807, "learning_rate": 1.4072129095082423e-05, "loss": 0.8251, "step": 45800 }, { "epoch": 1.594560756764276, "grad_norm": 0.4939728081226349, "learning_rate": 1.405474021005773e-05, "loss": 0.8316, "step": 45850 }, { "epoch": 1.5962996452667455, "grad_norm": 0.398408442735672, "learning_rate": 1.4037351325033039e-05, "loss": 0.6741, "step": 45900 }, { "epoch": 1.5980385337692147, "grad_norm": 0.42074644565582275, "learning_rate": 1.4019962440008346e-05, "loss": 0.7309, "step": 45950 }, { "epoch": 1.5997774222716838, "grad_norm": 13.036781311035156, "learning_rate": 1.4002573554983654e-05, "loss": 0.8819, "step": 46000 }, { "epoch": 1.6015163107741532, "grad_norm": 0.1412254273891449, "learning_rate": 1.3985184669958962e-05, "loss": 0.6706, "step": 46050 }, { "epoch": 1.6032551992766224, "grad_norm": 0.4364508092403412, "learning_rate": 1.396779578493427e-05, "loss": 0.7427, "step": 46100 }, { "epoch": 1.6049940877790916, "grad_norm": 0.4100753664970398, "learning_rate": 1.3950406899909578e-05, "loss": 0.5497, "step": 46150 }, { "epoch": 1.606732976281561, "grad_norm": 0.3213721215724945, "learning_rate": 1.3933018014884886e-05, "loss": 0.6348, "step": 46200 }, { "epoch": 1.60847186478403, "grad_norm": 0.33552175760269165, "learning_rate": 1.3915629129860195e-05, "loss": 0.5986, "step": 46250 }, { "epoch": 1.6102107532864993, "grad_norm": 0.2943664491176605, "learning_rate": 1.3898240244835502e-05, "loss": 0.5063, "step": 46300 }, { "epoch": 1.6119496417889685, "grad_norm": 0.37391772866249084, "learning_rate": 1.388085135981081e-05, "loss": 1.0345, "step": 46350 }, { "epoch": 1.6136885302914377, "grad_norm": 0.38463249802589417, "learning_rate": 1.3863462474786117e-05, "loss": 0.656, "step": 46400 }, { "epoch": 1.615427418793907, "grad_norm": 0.36581677198410034, "learning_rate": 1.3846073589761425e-05, "loss": 0.6534, "step": 46450 }, { "epoch": 1.617166307296376, "grad_norm": 14.770733833312988, "learning_rate": 1.3828684704736733e-05, "loss": 0.5174, "step": 46500 }, { "epoch": 1.6189051957988454, "grad_norm": 0.2862494885921478, "learning_rate": 1.3811295819712041e-05, "loss": 0.4709, "step": 46550 }, { "epoch": 1.6206440843013146, "grad_norm": 15.328411102294922, "learning_rate": 1.3793906934687349e-05, "loss": 0.6909, "step": 46600 }, { "epoch": 1.6223829728037837, "grad_norm": 0.38016408681869507, "learning_rate": 1.3776518049662657e-05, "loss": 1.0934, "step": 46650 }, { "epoch": 1.6241218613062531, "grad_norm": 14.021830558776855, "learning_rate": 1.3759129164637964e-05, "loss": 0.6947, "step": 46700 }, { "epoch": 1.6258607498087223, "grad_norm": 0.3775421977043152, "learning_rate": 1.3741740279613272e-05, "loss": 0.7044, "step": 46750 }, { "epoch": 1.6275996383111915, "grad_norm": 0.4014865756034851, "learning_rate": 1.372435139458858e-05, "loss": 0.8013, "step": 46800 }, { "epoch": 1.6293385268136609, "grad_norm": 0.1362016797065735, "learning_rate": 1.3706962509563888e-05, "loss": 0.7457, "step": 46850 }, { "epoch": 1.6310774153161298, "grad_norm": 0.365329384803772, "learning_rate": 1.3689573624539194e-05, "loss": 0.5206, "step": 46900 }, { "epoch": 1.6328163038185992, "grad_norm": 0.35874998569488525, "learning_rate": 1.3672184739514502e-05, "loss": 0.7003, "step": 46950 }, { "epoch": 1.6345551923210684, "grad_norm": 0.2697305977344513, "learning_rate": 1.365479585448981e-05, "loss": 0.2912, "step": 47000 }, { "epoch": 1.6362940808235376, "grad_norm": 0.2883290648460388, "learning_rate": 1.3637406969465118e-05, "loss": 0.644, "step": 47050 }, { "epoch": 1.638032969326007, "grad_norm": 0.3316119611263275, "learning_rate": 1.3620018084440426e-05, "loss": 0.851, "step": 47100 }, { "epoch": 1.639771857828476, "grad_norm": 15.394453048706055, "learning_rate": 1.3602629199415734e-05, "loss": 0.7229, "step": 47150 }, { "epoch": 1.6415107463309453, "grad_norm": 0.37067911028862, "learning_rate": 1.3585240314391042e-05, "loss": 0.5543, "step": 47200 }, { "epoch": 1.6432496348334145, "grad_norm": 0.3781518340110779, "learning_rate": 1.356785142936635e-05, "loss": 0.8152, "step": 47250 }, { "epoch": 1.6449885233358836, "grad_norm": 0.31335780024528503, "learning_rate": 1.3550462544341657e-05, "loss": 0.5754, "step": 47300 }, { "epoch": 1.646727411838353, "grad_norm": 0.29282450675964355, "learning_rate": 1.3533073659316965e-05, "loss": 0.5442, "step": 47350 }, { "epoch": 1.6484663003408222, "grad_norm": 0.10697585344314575, "learning_rate": 1.3515684774292273e-05, "loss": 0.8522, "step": 47400 }, { "epoch": 1.6502051888432914, "grad_norm": 0.09600557386875153, "learning_rate": 1.3498295889267581e-05, "loss": 0.6246, "step": 47450 }, { "epoch": 1.6519440773457605, "grad_norm": 0.09877904504537582, "learning_rate": 1.3480907004242887e-05, "loss": 0.7166, "step": 47500 }, { "epoch": 1.6536829658482297, "grad_norm": 14.221246719360352, "learning_rate": 1.3463518119218195e-05, "loss": 0.7281, "step": 47550 }, { "epoch": 1.655421854350699, "grad_norm": 0.11393465101718903, "learning_rate": 1.3446129234193503e-05, "loss": 0.6974, "step": 47600 }, { "epoch": 1.6571607428531683, "grad_norm": 14.630108833312988, "learning_rate": 1.342874034916881e-05, "loss": 0.3023, "step": 47650 }, { "epoch": 1.6588996313556374, "grad_norm": 0.25833070278167725, "learning_rate": 1.3411351464144119e-05, "loss": 0.6738, "step": 47700 }, { "epoch": 1.6606385198581068, "grad_norm": 0.2826947867870331, "learning_rate": 1.3393962579119426e-05, "loss": 0.7504, "step": 47750 }, { "epoch": 1.6623774083605758, "grad_norm": 0.2349109649658203, "learning_rate": 1.3376573694094734e-05, "loss": 0.3729, "step": 47800 }, { "epoch": 1.6641162968630452, "grad_norm": 15.46367359161377, "learning_rate": 1.3359184809070044e-05, "loss": 0.6366, "step": 47850 }, { "epoch": 1.6658551853655144, "grad_norm": 0.23977632820606232, "learning_rate": 1.3341795924045352e-05, "loss": 0.6164, "step": 47900 }, { "epoch": 1.6675940738679835, "grad_norm": 0.25916197896003723, "learning_rate": 1.332440703902066e-05, "loss": 0.7533, "step": 47950 }, { "epoch": 1.669332962370453, "grad_norm": 0.29835841059684753, "learning_rate": 1.3307018153995968e-05, "loss": 0.646, "step": 48000 }, { "epoch": 1.6710718508729219, "grad_norm": 0.09007968753576279, "learning_rate": 1.3289629268971274e-05, "loss": 0.5256, "step": 48050 }, { "epoch": 1.6728107393753913, "grad_norm": 0.08959626406431198, "learning_rate": 1.3272240383946582e-05, "loss": 0.8728, "step": 48100 }, { "epoch": 1.6745496278778604, "grad_norm": 13.537739753723145, "learning_rate": 1.325485149892189e-05, "loss": 0.8838, "step": 48150 }, { "epoch": 1.6762885163803296, "grad_norm": 0.39538753032684326, "learning_rate": 1.3237462613897197e-05, "loss": 0.6237, "step": 48200 }, { "epoch": 1.678027404882799, "grad_norm": 0.12709836661815643, "learning_rate": 1.3220073728872505e-05, "loss": 0.7376, "step": 48250 }, { "epoch": 1.6797662933852682, "grad_norm": 0.31594255566596985, "learning_rate": 1.3202684843847813e-05, "loss": 0.4514, "step": 48300 }, { "epoch": 1.6815051818877373, "grad_norm": 0.2975133955478668, "learning_rate": 1.3185295958823121e-05, "loss": 0.7206, "step": 48350 }, { "epoch": 1.6832440703902067, "grad_norm": 2.238664150238037, "learning_rate": 1.3167907073798429e-05, "loss": 0.5222, "step": 48400 }, { "epoch": 1.6849829588926757, "grad_norm": 0.25375989079475403, "learning_rate": 1.3150518188773737e-05, "loss": 0.5556, "step": 48450 }, { "epoch": 1.686721847395145, "grad_norm": 15.181434631347656, "learning_rate": 1.3133129303749045e-05, "loss": 0.5492, "step": 48500 }, { "epoch": 1.6884607358976143, "grad_norm": 0.2668880522251129, "learning_rate": 1.3115740418724352e-05, "loss": 0.5912, "step": 48550 }, { "epoch": 1.6901996244000834, "grad_norm": 14.662363052368164, "learning_rate": 1.309835153369966e-05, "loss": 0.8028, "step": 48600 }, { "epoch": 1.6919385129025528, "grad_norm": 15.070253372192383, "learning_rate": 1.3080962648674966e-05, "loss": 0.7768, "step": 48650 }, { "epoch": 1.6936774014050218, "grad_norm": 0.27694839239120483, "learning_rate": 1.3063573763650274e-05, "loss": 0.4266, "step": 48700 }, { "epoch": 1.6954162899074912, "grad_norm": 0.3004125952720642, "learning_rate": 1.3046184878625582e-05, "loss": 0.7371, "step": 48750 }, { "epoch": 1.6971551784099603, "grad_norm": 0.09771247208118439, "learning_rate": 1.302879599360089e-05, "loss": 0.6806, "step": 48800 }, { "epoch": 1.6988940669124295, "grad_norm": 0.13541404902935028, "learning_rate": 1.3011407108576198e-05, "loss": 0.9403, "step": 48850 }, { "epoch": 1.700632955414899, "grad_norm": 0.45439642667770386, "learning_rate": 1.2994018223551506e-05, "loss": 0.8082, "step": 48900 }, { "epoch": 1.702371843917368, "grad_norm": 14.780818939208984, "learning_rate": 1.2976629338526814e-05, "loss": 0.4985, "step": 48950 }, { "epoch": 1.7041107324198372, "grad_norm": 15.200565338134766, "learning_rate": 1.2959240453502122e-05, "loss": 0.6335, "step": 49000 }, { "epoch": 1.7058496209223064, "grad_norm": 0.09984289854764938, "learning_rate": 1.294185156847743e-05, "loss": 0.5863, "step": 49050 }, { "epoch": 1.7075885094247756, "grad_norm": 0.26728367805480957, "learning_rate": 1.2924462683452737e-05, "loss": 0.6883, "step": 49100 }, { "epoch": 1.709327397927245, "grad_norm": 0.30496111512184143, "learning_rate": 1.2907073798428045e-05, "loss": 0.6037, "step": 49150 }, { "epoch": 1.7110662864297141, "grad_norm": 0.27050071954727173, "learning_rate": 1.2889684913403353e-05, "loss": 0.5802, "step": 49200 }, { "epoch": 1.7128051749321833, "grad_norm": 0.2541790008544922, "learning_rate": 1.287229602837866e-05, "loss": 0.6259, "step": 49250 }, { "epoch": 1.7145440634346527, "grad_norm": 0.2521624267101288, "learning_rate": 1.2854907143353967e-05, "loss": 0.4156, "step": 49300 }, { "epoch": 1.7162829519371217, "grad_norm": 0.28337451815605164, "learning_rate": 1.2837518258329275e-05, "loss": 0.8387, "step": 49350 }, { "epoch": 1.718021840439591, "grad_norm": 0.2925991415977478, "learning_rate": 1.2820129373304583e-05, "loss": 0.5899, "step": 49400 }, { "epoch": 1.7197607289420602, "grad_norm": 14.536752700805664, "learning_rate": 1.2802740488279892e-05, "loss": 1.0543, "step": 49450 }, { "epoch": 1.7214996174445294, "grad_norm": 14.110679626464844, "learning_rate": 1.27853516032552e-05, "loss": 0.586, "step": 49500 }, { "epoch": 1.7232385059469988, "grad_norm": 14.400794982910156, "learning_rate": 1.2767962718230508e-05, "loss": 0.788, "step": 49550 }, { "epoch": 1.7249773944494677, "grad_norm": 14.433677673339844, "learning_rate": 1.2750573833205816e-05, "loss": 1.0365, "step": 49600 }, { "epoch": 1.7267162829519371, "grad_norm": 13.457062721252441, "learning_rate": 1.2733184948181124e-05, "loss": 0.7729, "step": 49650 }, { "epoch": 1.7284551714544063, "grad_norm": 0.4310290217399597, "learning_rate": 1.2715796063156432e-05, "loss": 0.6661, "step": 49700 }, { "epoch": 1.7301940599568755, "grad_norm": 15.57725715637207, "learning_rate": 1.269840717813174e-05, "loss": 0.6615, "step": 49750 }, { "epoch": 1.7319329484593449, "grad_norm": 0.405399888753891, "learning_rate": 1.2681018293107048e-05, "loss": 0.6584, "step": 49800 }, { "epoch": 1.733671836961814, "grad_norm": 0.39442333579063416, "learning_rate": 1.2663629408082354e-05, "loss": 0.555, "step": 49850 }, { "epoch": 1.7354107254642832, "grad_norm": 0.3577411472797394, "learning_rate": 1.2646240523057662e-05, "loss": 0.6013, "step": 49900 }, { "epoch": 1.7371496139667526, "grad_norm": 0.28667372465133667, "learning_rate": 1.262885163803297e-05, "loss": 0.4982, "step": 49950 }, { "epoch": 1.7388885024692216, "grad_norm": 9.07281494140625, "learning_rate": 1.2611462753008277e-05, "loss": 0.7567, "step": 50000 }, { "epoch": 1.740627390971691, "grad_norm": 0.358649343252182, "learning_rate": 1.2594073867983585e-05, "loss": 0.749, "step": 50050 }, { "epoch": 1.7423662794741601, "grad_norm": 0.33667972683906555, "learning_rate": 1.2576684982958893e-05, "loss": 0.5683, "step": 50100 }, { "epoch": 1.7441051679766293, "grad_norm": 15.21135425567627, "learning_rate": 1.2559296097934201e-05, "loss": 0.5847, "step": 50150 }, { "epoch": 1.7458440564790987, "grad_norm": 16.905506134033203, "learning_rate": 1.2541907212909509e-05, "loss": 0.6165, "step": 50200 }, { "epoch": 1.7475829449815676, "grad_norm": 0.3170476257801056, "learning_rate": 1.2524518327884817e-05, "loss": 0.7373, "step": 50250 }, { "epoch": 1.749321833484037, "grad_norm": 0.11485689133405685, "learning_rate": 1.2507129442860125e-05, "loss": 0.9135, "step": 50300 }, { "epoch": 1.7510607219865062, "grad_norm": 0.3731885850429535, "learning_rate": 1.2489740557835432e-05, "loss": 0.6279, "step": 50350 }, { "epoch": 1.7527996104889754, "grad_norm": 14.190351486206055, "learning_rate": 1.2472351672810739e-05, "loss": 0.6788, "step": 50400 }, { "epoch": 1.7545384989914448, "grad_norm": 0.35587581992149353, "learning_rate": 1.2454962787786047e-05, "loss": 0.6191, "step": 50450 }, { "epoch": 1.756277387493914, "grad_norm": 14.302005767822266, "learning_rate": 1.2437573902761354e-05, "loss": 0.6377, "step": 50500 }, { "epoch": 1.758016275996383, "grad_norm": 0.44875991344451904, "learning_rate": 1.2420185017736662e-05, "loss": 0.8924, "step": 50550 }, { "epoch": 1.7597551644988525, "grad_norm": 16.368608474731445, "learning_rate": 1.240279613271197e-05, "loss": 0.5572, "step": 50600 }, { "epoch": 1.7614940530013214, "grad_norm": 14.729104995727539, "learning_rate": 1.2385407247687278e-05, "loss": 0.5164, "step": 50650 }, { "epoch": 1.7632329415037908, "grad_norm": 0.3430924713611603, "learning_rate": 1.2368018362662586e-05, "loss": 0.5553, "step": 50700 }, { "epoch": 1.76497183000626, "grad_norm": 15.879765510559082, "learning_rate": 1.2350629477637894e-05, "loss": 0.6773, "step": 50750 }, { "epoch": 1.7667107185087292, "grad_norm": 14.800215721130371, "learning_rate": 1.2333240592613202e-05, "loss": 0.4912, "step": 50800 }, { "epoch": 1.7684496070111986, "grad_norm": 0.2639375925064087, "learning_rate": 1.231585170758851e-05, "loss": 0.6077, "step": 50850 }, { "epoch": 1.7701884955136675, "grad_norm": 0.2984665036201477, "learning_rate": 1.2298462822563817e-05, "loss": 0.6986, "step": 50900 }, { "epoch": 1.771927384016137, "grad_norm": 0.28245478868484497, "learning_rate": 1.2281073937539125e-05, "loss": 0.6718, "step": 50950 }, { "epoch": 1.773666272518606, "grad_norm": 14.415521621704102, "learning_rate": 1.2263685052514431e-05, "loss": 0.8738, "step": 51000 }, { "epoch": 1.7754051610210753, "grad_norm": 0.3285273015499115, "learning_rate": 1.2246296167489741e-05, "loss": 0.5926, "step": 51050 }, { "epoch": 1.7771440495235447, "grad_norm": 0.1067660003900528, "learning_rate": 1.2228907282465049e-05, "loss": 0.736, "step": 51100 }, { "epoch": 1.7788829380260138, "grad_norm": 0.10578847676515579, "learning_rate": 1.2211518397440357e-05, "loss": 0.7684, "step": 51150 }, { "epoch": 1.780621826528483, "grad_norm": 15.020970344543457, "learning_rate": 1.2194129512415665e-05, "loss": 0.7403, "step": 51200 }, { "epoch": 1.7823607150309522, "grad_norm": 0.35607853531837463, "learning_rate": 1.2176740627390973e-05, "loss": 0.8067, "step": 51250 }, { "epoch": 1.7840996035334213, "grad_norm": 0.4047432541847229, "learning_rate": 1.215935174236628e-05, "loss": 0.9256, "step": 51300 }, { "epoch": 1.7858384920358907, "grad_norm": 0.3448825180530548, "learning_rate": 1.2141962857341588e-05, "loss": 0.4668, "step": 51350 }, { "epoch": 1.78757738053836, "grad_norm": 0.3137463331222534, "learning_rate": 1.2124573972316896e-05, "loss": 0.6211, "step": 51400 }, { "epoch": 1.789316269040829, "grad_norm": 0.316745400428772, "learning_rate": 1.2107185087292204e-05, "loss": 0.8036, "step": 51450 }, { "epoch": 1.7910551575432985, "grad_norm": 0.3484664857387543, "learning_rate": 1.2089796202267512e-05, "loss": 0.6541, "step": 51500 }, { "epoch": 1.7927940460457674, "grad_norm": 9.286653518676758, "learning_rate": 1.207240731724282e-05, "loss": 0.4756, "step": 51550 }, { "epoch": 1.7945329345482368, "grad_norm": 0.09949067234992981, "learning_rate": 1.2055018432218126e-05, "loss": 0.7471, "step": 51600 }, { "epoch": 1.796271823050706, "grad_norm": 0.09668143838644028, "learning_rate": 1.2037629547193434e-05, "loss": 0.5647, "step": 51650 }, { "epoch": 1.7980107115531752, "grad_norm": 0.2958596348762512, "learning_rate": 1.2020240662168742e-05, "loss": 0.6887, "step": 51700 }, { "epoch": 1.7997496000556445, "grad_norm": 15.044393539428711, "learning_rate": 1.200285177714405e-05, "loss": 0.4488, "step": 51750 }, { "epoch": 1.8014884885581135, "grad_norm": 0.25843560695648193, "learning_rate": 1.1985462892119357e-05, "loss": 0.5377, "step": 51800 }, { "epoch": 1.803227377060583, "grad_norm": 0.2756962776184082, "learning_rate": 1.1968074007094665e-05, "loss": 0.7298, "step": 51850 }, { "epoch": 1.804966265563052, "grad_norm": 0.31028085947036743, "learning_rate": 1.1950685122069973e-05, "loss": 0.6914, "step": 51900 }, { "epoch": 1.8067051540655212, "grad_norm": 0.30884119868278503, "learning_rate": 1.1933296237045281e-05, "loss": 0.7114, "step": 51950 }, { "epoch": 1.8084440425679906, "grad_norm": 0.3198888301849365, "learning_rate": 1.1915907352020589e-05, "loss": 0.7335, "step": 52000 }, { "epoch": 1.8101829310704598, "grad_norm": 14.36503791809082, "learning_rate": 1.1898518466995897e-05, "loss": 0.6533, "step": 52050 }, { "epoch": 1.811921819572929, "grad_norm": 0.3217068016529083, "learning_rate": 1.1881129581971205e-05, "loss": 0.6797, "step": 52100 }, { "epoch": 1.8136607080753984, "grad_norm": 16.27518653869629, "learning_rate": 1.1863740696946513e-05, "loss": 0.6269, "step": 52150 }, { "epoch": 1.8153995965778673, "grad_norm": 0.30472978949546814, "learning_rate": 1.1846351811921819e-05, "loss": 0.5572, "step": 52200 }, { "epoch": 1.8171384850803367, "grad_norm": 9.049840927124023, "learning_rate": 1.1828962926897127e-05, "loss": 0.7, "step": 52250 }, { "epoch": 1.8188773735828059, "grad_norm": 0.2727045714855194, "learning_rate": 1.1811574041872434e-05, "loss": 0.313, "step": 52300 }, { "epoch": 1.820616262085275, "grad_norm": 17.521814346313477, "learning_rate": 1.1794185156847742e-05, "loss": 0.7142, "step": 52350 }, { "epoch": 1.8223551505877444, "grad_norm": 0.07450219988822937, "learning_rate": 1.177679627182305e-05, "loss": 0.4339, "step": 52400 }, { "epoch": 1.8240940390902134, "grad_norm": 0.29161402583122253, "learning_rate": 1.1759407386798358e-05, "loss": 0.7687, "step": 52450 }, { "epoch": 1.8258329275926828, "grad_norm": 0.3114549517631531, "learning_rate": 1.1742018501773666e-05, "loss": 0.7598, "step": 52500 }, { "epoch": 1.827571816095152, "grad_norm": 0.2939223349094391, "learning_rate": 1.1724629616748974e-05, "loss": 0.6135, "step": 52550 }, { "epoch": 1.8293107045976211, "grad_norm": 0.33294710516929626, "learning_rate": 1.1707240731724282e-05, "loss": 0.8584, "step": 52600 }, { "epoch": 1.8310495931000905, "grad_norm": 0.3102196156978607, "learning_rate": 1.1689851846699591e-05, "loss": 0.511, "step": 52650 }, { "epoch": 1.8327884816025597, "grad_norm": 15.022314071655273, "learning_rate": 1.16724629616749e-05, "loss": 0.733, "step": 52700 }, { "epoch": 1.8345273701050289, "grad_norm": 0.2877540588378906, "learning_rate": 1.1655074076650205e-05, "loss": 0.5897, "step": 52750 }, { "epoch": 1.836266258607498, "grad_norm": 0.27458998560905457, "learning_rate": 1.1637685191625513e-05, "loss": 0.5634, "step": 52800 }, { "epoch": 1.8380051471099672, "grad_norm": 0.08329129964113235, "learning_rate": 1.1620296306600821e-05, "loss": 0.5744, "step": 52850 }, { "epoch": 1.8397440356124366, "grad_norm": 0.2897718846797943, "learning_rate": 1.1602907421576129e-05, "loss": 0.6746, "step": 52900 }, { "epoch": 1.8414829241149058, "grad_norm": 0.3438716232776642, "learning_rate": 1.1585518536551437e-05, "loss": 0.8892, "step": 52950 }, { "epoch": 1.843221812617375, "grad_norm": 0.3468877375125885, "learning_rate": 1.1568129651526745e-05, "loss": 0.8251, "step": 53000 }, { "epoch": 1.8449607011198443, "grad_norm": 0.35163414478302, "learning_rate": 1.1550740766502053e-05, "loss": 0.73, "step": 53050 }, { "epoch": 1.8466995896223133, "grad_norm": 0.3049357235431671, "learning_rate": 1.153335188147736e-05, "loss": 0.3569, "step": 53100 }, { "epoch": 1.8484384781247827, "grad_norm": 8.576674461364746, "learning_rate": 1.1515962996452668e-05, "loss": 0.8373, "step": 53150 }, { "epoch": 1.8501773666272519, "grad_norm": 14.463669776916504, "learning_rate": 1.1498574111427976e-05, "loss": 0.8323, "step": 53200 }, { "epoch": 1.851916255129721, "grad_norm": 16.29819107055664, "learning_rate": 1.1481185226403284e-05, "loss": 0.6568, "step": 53250 }, { "epoch": 1.8536551436321904, "grad_norm": 13.820489883422852, "learning_rate": 1.1463796341378592e-05, "loss": 0.8057, "step": 53300 }, { "epoch": 1.8553940321346594, "grad_norm": 0.34753111004829407, "learning_rate": 1.1446407456353898e-05, "loss": 0.4767, "step": 53350 }, { "epoch": 1.8571329206371288, "grad_norm": 0.3634461462497711, "learning_rate": 1.1429018571329206e-05, "loss": 0.7852, "step": 53400 }, { "epoch": 1.858871809139598, "grad_norm": 0.09368952363729477, "learning_rate": 1.1411629686304514e-05, "loss": 0.4936, "step": 53450 }, { "epoch": 1.860610697642067, "grad_norm": 15.14303207397461, "learning_rate": 1.1394240801279822e-05, "loss": 0.6232, "step": 53500 }, { "epoch": 1.8623495861445365, "grad_norm": 0.35452160239219666, "learning_rate": 1.137685191625513e-05, "loss": 0.9311, "step": 53550 }, { "epoch": 1.8640884746470057, "grad_norm": 0.322387158870697, "learning_rate": 1.1359463031230438e-05, "loss": 0.5552, "step": 53600 }, { "epoch": 1.8658273631494748, "grad_norm": 0.32211044430732727, "learning_rate": 1.1342074146205745e-05, "loss": 0.6054, "step": 53650 }, { "epoch": 1.8675662516519442, "grad_norm": 0.28915441036224365, "learning_rate": 1.1324685261181053e-05, "loss": 0.568, "step": 53700 }, { "epoch": 1.8693051401544132, "grad_norm": 0.33411139249801636, "learning_rate": 1.1307296376156361e-05, "loss": 0.875, "step": 53750 }, { "epoch": 1.8710440286568826, "grad_norm": 0.3060671091079712, "learning_rate": 1.1289907491131669e-05, "loss": 0.4825, "step": 53800 }, { "epoch": 1.8727829171593517, "grad_norm": 15.411432266235352, "learning_rate": 1.1272518606106977e-05, "loss": 0.7928, "step": 53850 }, { "epoch": 1.874521805661821, "grad_norm": 0.31562742590904236, "learning_rate": 1.1255129721082285e-05, "loss": 0.7281, "step": 53900 }, { "epoch": 1.8762606941642903, "grad_norm": 0.3400427997112274, "learning_rate": 1.1237740836057591e-05, "loss": 0.7289, "step": 53950 }, { "epoch": 1.8779995826667593, "grad_norm": 0.3861015737056732, "learning_rate": 1.1220351951032899e-05, "loss": 0.8867, "step": 54000 }, { "epoch": 1.8797384711692287, "grad_norm": 0.13147202134132385, "learning_rate": 1.1202963066008207e-05, "loss": 0.9052, "step": 54050 }, { "epoch": 1.8814773596716978, "grad_norm": 0.44602084159851074, "learning_rate": 1.1185574180983515e-05, "loss": 0.8167, "step": 54100 }, { "epoch": 1.883216248174167, "grad_norm": 0.42475029826164246, "learning_rate": 1.1168185295958822e-05, "loss": 0.5461, "step": 54150 }, { "epoch": 1.8849551366766364, "grad_norm": 0.420391708612442, "learning_rate": 1.115079641093413e-05, "loss": 0.8461, "step": 54200 }, { "epoch": 1.8866940251791056, "grad_norm": 14.529937744140625, "learning_rate": 1.113340752590944e-05, "loss": 0.5023, "step": 54250 }, { "epoch": 1.8884329136815747, "grad_norm": 0.371905654668808, "learning_rate": 1.1116018640884748e-05, "loss": 0.6859, "step": 54300 }, { "epoch": 1.8901718021840441, "grad_norm": 0.3435218632221222, "learning_rate": 1.1098629755860056e-05, "loss": 0.5656, "step": 54350 }, { "epoch": 1.891910690686513, "grad_norm": 0.24976381659507751, "learning_rate": 1.1081240870835363e-05, "loss": 0.2653, "step": 54400 }, { "epoch": 1.8936495791889825, "grad_norm": 0.08417559415102005, "learning_rate": 1.1063851985810671e-05, "loss": 0.7219, "step": 54450 }, { "epoch": 1.8953884676914516, "grad_norm": 14.697444915771484, "learning_rate": 1.104646310078598e-05, "loss": 0.4324, "step": 54500 }, { "epoch": 1.8971273561939208, "grad_norm": 0.2652629315853119, "learning_rate": 1.1029074215761285e-05, "loss": 0.6401, "step": 54550 }, { "epoch": 1.8988662446963902, "grad_norm": 15.494746208190918, "learning_rate": 1.1011685330736593e-05, "loss": 0.4564, "step": 54600 }, { "epoch": 1.9006051331988592, "grad_norm": 0.08194435387849808, "learning_rate": 1.0994296445711901e-05, "loss": 0.6356, "step": 54650 }, { "epoch": 1.9023440217013285, "grad_norm": 14.66989517211914, "learning_rate": 1.0976907560687209e-05, "loss": 0.7322, "step": 54700 }, { "epoch": 1.9040829102037977, "grad_norm": 0.29282376170158386, "learning_rate": 1.0959518675662517e-05, "loss": 0.7101, "step": 54750 }, { "epoch": 1.905821798706267, "grad_norm": 0.08829312771558762, "learning_rate": 1.0942129790637825e-05, "loss": 0.6131, "step": 54800 }, { "epoch": 1.9075606872087363, "grad_norm": 15.343838691711426, "learning_rate": 1.0924740905613133e-05, "loss": 0.4508, "step": 54850 }, { "epoch": 1.9092995757112052, "grad_norm": 15.681466102600098, "learning_rate": 1.090735202058844e-05, "loss": 0.6769, "step": 54900 }, { "epoch": 1.9110384642136746, "grad_norm": 0.2863258421421051, "learning_rate": 1.0889963135563748e-05, "loss": 0.6185, "step": 54950 }, { "epoch": 1.9127773527161438, "grad_norm": 0.2744685411453247, "learning_rate": 1.0872574250539056e-05, "loss": 0.7312, "step": 55000 }, { "epoch": 1.914516241218613, "grad_norm": 0.2563205063343048, "learning_rate": 1.0855185365514364e-05, "loss": 0.4841, "step": 55050 }, { "epoch": 1.9162551297210824, "grad_norm": 14.824396133422852, "learning_rate": 1.0837796480489672e-05, "loss": 0.8317, "step": 55100 }, { "epoch": 1.9179940182235515, "grad_norm": 18.249614715576172, "learning_rate": 1.0820407595464978e-05, "loss": 0.7872, "step": 55150 }, { "epoch": 1.9197329067260207, "grad_norm": 0.36765536665916443, "learning_rate": 1.0803018710440286e-05, "loss": 0.6779, "step": 55200 }, { "epoch": 1.92147179522849, "grad_norm": 14.85068130493164, "learning_rate": 1.0785629825415594e-05, "loss": 0.6348, "step": 55250 }, { "epoch": 1.923210683730959, "grad_norm": 0.3647606074810028, "learning_rate": 1.0768240940390902e-05, "loss": 0.9293, "step": 55300 }, { "epoch": 1.9249495722334284, "grad_norm": 0.3844655454158783, "learning_rate": 1.075085205536621e-05, "loss": 0.6894, "step": 55350 }, { "epoch": 1.9266884607358976, "grad_norm": 0.06835456192493439, "learning_rate": 1.0733463170341518e-05, "loss": 0.5927, "step": 55400 }, { "epoch": 1.9284273492383668, "grad_norm": 0.3281351625919342, "learning_rate": 1.0716074285316825e-05, "loss": 0.5804, "step": 55450 }, { "epoch": 1.9301662377408362, "grad_norm": 0.31403055787086487, "learning_rate": 1.0698685400292133e-05, "loss": 0.6757, "step": 55500 }, { "epoch": 1.9319051262433051, "grad_norm": 0.28186556696891785, "learning_rate": 1.0681296515267441e-05, "loss": 0.4814, "step": 55550 }, { "epoch": 1.9336440147457745, "grad_norm": 0.3594147264957428, "learning_rate": 1.0663907630242749e-05, "loss": 0.8875, "step": 55600 }, { "epoch": 1.9353829032482437, "grad_norm": 0.2812623977661133, "learning_rate": 1.0646518745218057e-05, "loss": 0.2865, "step": 55650 }, { "epoch": 1.9371217917507129, "grad_norm": 0.27383142709732056, "learning_rate": 1.0629129860193363e-05, "loss": 0.8107, "step": 55700 }, { "epoch": 1.9388606802531823, "grad_norm": 0.32762858271598816, "learning_rate": 1.0611740975168671e-05, "loss": 0.9023, "step": 55750 }, { "epoch": 1.9405995687556514, "grad_norm": 0.31273242831230164, "learning_rate": 1.0594352090143979e-05, "loss": 0.546, "step": 55800 }, { "epoch": 1.9423384572581206, "grad_norm": 8.895495414733887, "learning_rate": 1.0576963205119288e-05, "loss": 0.7628, "step": 55850 }, { "epoch": 1.94407734576059, "grad_norm": 14.669936180114746, "learning_rate": 1.0559574320094596e-05, "loss": 0.8084, "step": 55900 }, { "epoch": 1.945816234263059, "grad_norm": 14.927128791809082, "learning_rate": 1.0542185435069904e-05, "loss": 0.7653, "step": 55950 }, { "epoch": 1.9475551227655283, "grad_norm": 0.3605245351791382, "learning_rate": 1.0524796550045212e-05, "loss": 0.5989, "step": 56000 }, { "epoch": 1.9492940112679975, "grad_norm": 0.32332727313041687, "learning_rate": 1.050740766502052e-05, "loss": 0.5645, "step": 56050 }, { "epoch": 1.9510328997704667, "grad_norm": 0.2952893376350403, "learning_rate": 1.0490018779995828e-05, "loss": 0.563, "step": 56100 }, { "epoch": 1.952771788272936, "grad_norm": 0.31085893511772156, "learning_rate": 1.0472629894971136e-05, "loss": 0.809, "step": 56150 }, { "epoch": 1.954510676775405, "grad_norm": 0.30592653155326843, "learning_rate": 1.0455241009946444e-05, "loss": 0.7554, "step": 56200 }, { "epoch": 1.9562495652778744, "grad_norm": 14.018999099731445, "learning_rate": 1.0437852124921751e-05, "loss": 0.961, "step": 56250 }, { "epoch": 1.9579884537803436, "grad_norm": 0.37466442584991455, "learning_rate": 1.0420463239897058e-05, "loss": 0.7251, "step": 56300 }, { "epoch": 1.9597273422828128, "grad_norm": 0.12005720287561417, "learning_rate": 1.0403074354872365e-05, "loss": 0.6845, "step": 56350 }, { "epoch": 1.9614662307852821, "grad_norm": 13.975214958190918, "learning_rate": 1.0385685469847673e-05, "loss": 0.7454, "step": 56400 }, { "epoch": 1.9632051192877513, "grad_norm": 0.36748602986335754, "learning_rate": 1.0368296584822981e-05, "loss": 0.5381, "step": 56450 }, { "epoch": 1.9649440077902205, "grad_norm": 0.09700479358434677, "learning_rate": 1.0350907699798289e-05, "loss": 0.6523, "step": 56500 }, { "epoch": 1.9666828962926897, "grad_norm": 0.31464141607284546, "learning_rate": 1.0333518814773597e-05, "loss": 0.702, "step": 56550 }, { "epoch": 1.9684217847951588, "grad_norm": 0.3040376603603363, "learning_rate": 1.0316129929748905e-05, "loss": 0.4293, "step": 56600 }, { "epoch": 1.9701606732976282, "grad_norm": 0.3730393350124359, "learning_rate": 1.0298741044724213e-05, "loss": 1.0444, "step": 56650 }, { "epoch": 1.9718995618000974, "grad_norm": 0.40293508768081665, "learning_rate": 1.028135215969952e-05, "loss": 0.84, "step": 56700 }, { "epoch": 1.9736384503025666, "grad_norm": 15.248039245605469, "learning_rate": 1.0263963274674828e-05, "loss": 0.4591, "step": 56750 }, { "epoch": 1.975377338805036, "grad_norm": 14.792391777038574, "learning_rate": 1.0246574389650136e-05, "loss": 0.5469, "step": 56800 }, { "epoch": 1.977116227307505, "grad_norm": 0.35679200291633606, "learning_rate": 1.0229185504625444e-05, "loss": 0.8213, "step": 56850 }, { "epoch": 1.9788551158099743, "grad_norm": 0.12317205220460892, "learning_rate": 1.021179661960075e-05, "loss": 0.9224, "step": 56900 }, { "epoch": 1.9805940043124435, "grad_norm": 0.4095461070537567, "learning_rate": 1.0194407734576058e-05, "loss": 0.7397, "step": 56950 }, { "epoch": 1.9823328928149127, "grad_norm": 0.3325926959514618, "learning_rate": 1.0177018849551366e-05, "loss": 0.453, "step": 57000 }, { "epoch": 1.984071781317382, "grad_norm": 0.3340759575366974, "learning_rate": 1.0159629964526674e-05, "loss": 0.6454, "step": 57050 }, { "epoch": 1.985810669819851, "grad_norm": 0.35061588883399963, "learning_rate": 1.0142241079501982e-05, "loss": 0.9416, "step": 57100 }, { "epoch": 1.9875495583223204, "grad_norm": 8.486052513122559, "learning_rate": 1.012485219447729e-05, "loss": 0.8322, "step": 57150 }, { "epoch": 1.9892884468247896, "grad_norm": 13.873724937438965, "learning_rate": 1.0107463309452598e-05, "loss": 0.5512, "step": 57200 }, { "epoch": 1.9910273353272587, "grad_norm": 0.2879263758659363, "learning_rate": 1.0090074424427906e-05, "loss": 0.3389, "step": 57250 }, { "epoch": 1.9927662238297281, "grad_norm": 0.08767715841531754, "learning_rate": 1.0072685539403213e-05, "loss": 0.5777, "step": 57300 }, { "epoch": 1.9945051123321973, "grad_norm": 14.9371919631958, "learning_rate": 1.0055296654378521e-05, "loss": 0.9042, "step": 57350 }, { "epoch": 1.9962440008346665, "grad_norm": 0.1013684794306755, "learning_rate": 1.0037907769353829e-05, "loss": 0.5035, "step": 57400 }, { "epoch": 1.9979828893371359, "grad_norm": 9.226420402526855, "learning_rate": 1.0020518884329139e-05, "loss": 0.5694, "step": 57450 }, { "epoch": 1.9997217778396048, "grad_norm": 0.09209857136011124, "learning_rate": 1.0003129999304445e-05, "loss": 0.6843, "step": 57500 }, { "epoch": 2.0, "eval_accuracy": 0.9826713734219038, "eval_confusion_matrix": [ [ 111590, 15 ], [ 1978, 1429 ] ], "eval_f1": 0.5891568748711606, "eval_loss": 0.9257137775421143, "eval_precision": 0.989612188365651, "eval_recall": 0.4194305840915762, "eval_roc_auc": 0.7481895685740283, "eval_runtime": 542.1326, "eval_samples_per_second": 212.147, "eval_steps_per_second": 6.631, "step": 57508 }, { "epoch": 2.001460666342074, "grad_norm": 0.10331760346889496, "learning_rate": 9.985741114279753e-06, "loss": 0.768, "step": 57550 }, { "epoch": 2.003199554844543, "grad_norm": 0.3387294411659241, "learning_rate": 9.96835222925506e-06, "loss": 0.6225, "step": 57600 }, { "epoch": 2.0049384433470125, "grad_norm": 14.870137214660645, "learning_rate": 9.950963344230369e-06, "loss": 0.6883, "step": 57650 }, { "epoch": 2.006677331849482, "grad_norm": 0.09480606019496918, "learning_rate": 9.933574459205676e-06, "loss": 0.6012, "step": 57700 }, { "epoch": 2.008416220351951, "grad_norm": 15.996614456176758, "learning_rate": 9.916185574180984e-06, "loss": 0.6167, "step": 57750 }, { "epoch": 2.0101551088544203, "grad_norm": 0.30981868505477905, "learning_rate": 9.898796689156292e-06, "loss": 0.6961, "step": 57800 }, { "epoch": 2.0118939973568897, "grad_norm": 0.2759547531604767, "learning_rate": 9.8814078041316e-06, "loss": 0.6597, "step": 57850 }, { "epoch": 2.0136328858593586, "grad_norm": 0.32305023074150085, "learning_rate": 9.864018919106908e-06, "loss": 0.5976, "step": 57900 }, { "epoch": 2.015371774361828, "grad_norm": 0.10485555231571198, "learning_rate": 9.846630034082216e-06, "loss": 0.7434, "step": 57950 }, { "epoch": 2.017110662864297, "grad_norm": 0.33409371972084045, "learning_rate": 9.829241149057524e-06, "loss": 0.8024, "step": 58000 }, { "epoch": 2.0188495513667664, "grad_norm": 0.3146596848964691, "learning_rate": 9.81185226403283e-06, "loss": 0.5376, "step": 58050 }, { "epoch": 2.0205884398692358, "grad_norm": 0.3164394497871399, "learning_rate": 9.794463379008138e-06, "loss": 0.5971, "step": 58100 }, { "epoch": 2.0223273283717047, "grad_norm": 15.077522277832031, "learning_rate": 9.777074493983446e-06, "loss": 0.5723, "step": 58150 }, { "epoch": 2.024066216874174, "grad_norm": 16.926288604736328, "learning_rate": 9.759685608958753e-06, "loss": 0.9406, "step": 58200 }, { "epoch": 2.025805105376643, "grad_norm": 8.230751991271973, "learning_rate": 9.742296723934061e-06, "loss": 0.8147, "step": 58250 }, { "epoch": 2.0275439938791124, "grad_norm": 0.3829768896102905, "learning_rate": 9.72490783890937e-06, "loss": 0.6539, "step": 58300 }, { "epoch": 2.029282882381582, "grad_norm": 15.082670211791992, "learning_rate": 9.707518953884677e-06, "loss": 0.4824, "step": 58350 }, { "epoch": 2.031021770884051, "grad_norm": 0.10746856778860092, "learning_rate": 9.690130068859985e-06, "loss": 0.8308, "step": 58400 }, { "epoch": 2.03276065938652, "grad_norm": 0.27786752581596375, "learning_rate": 9.672741183835293e-06, "loss": 0.4164, "step": 58450 }, { "epoch": 2.0344995478889896, "grad_norm": 0.26461032032966614, "learning_rate": 9.6553522988106e-06, "loss": 0.7309, "step": 58500 }, { "epoch": 2.0362384363914585, "grad_norm": 0.24978114664554596, "learning_rate": 9.637963413785909e-06, "loss": 0.4963, "step": 58550 }, { "epoch": 2.037977324893928, "grad_norm": 0.3430357277393341, "learning_rate": 9.620574528761216e-06, "loss": 0.9557, "step": 58600 }, { "epoch": 2.039716213396397, "grad_norm": 0.09504514187574387, "learning_rate": 9.603185643736523e-06, "loss": 0.7355, "step": 58650 }, { "epoch": 2.0414551018988663, "grad_norm": 0.334536075592041, "learning_rate": 9.58579675871183e-06, "loss": 0.6244, "step": 58700 }, { "epoch": 2.0431939904013356, "grad_norm": 15.013228416442871, "learning_rate": 9.568407873687138e-06, "loss": 0.7582, "step": 58750 }, { "epoch": 2.0449328789038046, "grad_norm": 0.3811526894569397, "learning_rate": 9.551018988662446e-06, "loss": 0.6784, "step": 58800 }, { "epoch": 2.046671767406274, "grad_norm": 0.4010978043079376, "learning_rate": 9.533630103637754e-06, "loss": 0.8076, "step": 58850 }, { "epoch": 2.048410655908743, "grad_norm": 0.42423686385154724, "learning_rate": 9.516241218613062e-06, "loss": 0.8694, "step": 58900 }, { "epoch": 2.0501495444112123, "grad_norm": 0.3780515789985657, "learning_rate": 9.49885233358837e-06, "loss": 0.6213, "step": 58950 }, { "epoch": 2.0518884329136817, "grad_norm": 0.37921106815338135, "learning_rate": 9.481463448563678e-06, "loss": 0.6874, "step": 59000 }, { "epoch": 2.0536273214161507, "grad_norm": 13.407455444335938, "learning_rate": 9.464074563538987e-06, "loss": 0.5466, "step": 59050 }, { "epoch": 2.05536620991862, "grad_norm": 8.597731590270996, "learning_rate": 9.446685678514295e-06, "loss": 0.6095, "step": 59100 }, { "epoch": 2.057105098421089, "grad_norm": 0.3010380268096924, "learning_rate": 9.429296793489603e-06, "loss": 0.451, "step": 59150 }, { "epoch": 2.0588439869235584, "grad_norm": 14.987975120544434, "learning_rate": 9.411907908464911e-06, "loss": 0.7224, "step": 59200 }, { "epoch": 2.060582875426028, "grad_norm": 0.3147706389427185, "learning_rate": 9.394519023440217e-06, "loss": 0.7161, "step": 59250 }, { "epoch": 2.0623217639284968, "grad_norm": 14.541171073913574, "learning_rate": 9.377130138415525e-06, "loss": 0.3422, "step": 59300 }, { "epoch": 2.064060652430966, "grad_norm": 0.2777429521083832, "learning_rate": 9.359741253390833e-06, "loss": 0.6064, "step": 59350 }, { "epoch": 2.0657995409334355, "grad_norm": 15.174882888793945, "learning_rate": 9.34235236836614e-06, "loss": 0.7311, "step": 59400 }, { "epoch": 2.0675384294359045, "grad_norm": 17.365177154541016, "learning_rate": 9.324963483341449e-06, "loss": 0.4271, "step": 59450 }, { "epoch": 2.069277317938374, "grad_norm": 0.24511702358722687, "learning_rate": 9.307574598316756e-06, "loss": 0.5436, "step": 59500 }, { "epoch": 2.071016206440843, "grad_norm": 0.08835063129663467, "learning_rate": 9.290185713292064e-06, "loss": 0.906, "step": 59550 }, { "epoch": 2.0727550949433122, "grad_norm": 15.076498985290527, "learning_rate": 9.272796828267372e-06, "loss": 0.6091, "step": 59600 }, { "epoch": 2.0744939834457816, "grad_norm": 0.3341560363769531, "learning_rate": 9.25540794324268e-06, "loss": 0.8096, "step": 59650 }, { "epoch": 2.0762328719482506, "grad_norm": 0.0941532701253891, "learning_rate": 9.238019058217988e-06, "loss": 0.416, "step": 59700 }, { "epoch": 2.07797176045072, "grad_norm": 0.2095929980278015, "learning_rate": 9.220630173193296e-06, "loss": 0.3144, "step": 59750 }, { "epoch": 2.079710648953189, "grad_norm": 0.3325834274291992, "learning_rate": 9.203241288168604e-06, "loss": 1.0321, "step": 59800 }, { "epoch": 2.0814495374556583, "grad_norm": 0.3200731575489044, "learning_rate": 9.18585240314391e-06, "loss": 0.7843, "step": 59850 }, { "epoch": 2.0831884259581277, "grad_norm": 0.3165806233882904, "learning_rate": 9.168463518119218e-06, "loss": 0.605, "step": 59900 }, { "epoch": 2.0849273144605966, "grad_norm": 0.2909805178642273, "learning_rate": 9.151074633094526e-06, "loss": 0.5388, "step": 59950 }, { "epoch": 2.086666202963066, "grad_norm": 11.869168281555176, "learning_rate": 9.133685748069833e-06, "loss": 0.717, "step": 60000 }, { "epoch": 2.0884050914655354, "grad_norm": 0.29258957505226135, "learning_rate": 9.116296863045141e-06, "loss": 0.4619, "step": 60050 }, { "epoch": 2.0901439799680044, "grad_norm": 0.26873666048049927, "learning_rate": 9.09890797802045e-06, "loss": 0.6476, "step": 60100 }, { "epoch": 2.0918828684704738, "grad_norm": 0.3097116947174072, "learning_rate": 9.081519092995757e-06, "loss": 0.7853, "step": 60150 }, { "epoch": 2.0936217569729427, "grad_norm": 0.40075263381004333, "learning_rate": 9.064130207971065e-06, "loss": 1.1125, "step": 60200 }, { "epoch": 2.095360645475412, "grad_norm": 0.36865100264549255, "learning_rate": 9.046741322946373e-06, "loss": 0.7263, "step": 60250 }, { "epoch": 2.0970995339778815, "grad_norm": 0.32686591148376465, "learning_rate": 9.02935243792168e-06, "loss": 0.5343, "step": 60300 }, { "epoch": 2.0988384224803505, "grad_norm": 0.29199641942977905, "learning_rate": 9.011963552896989e-06, "loss": 0.475, "step": 60350 }, { "epoch": 2.10057731098282, "grad_norm": 0.27382081747055054, "learning_rate": 8.994574667872296e-06, "loss": 0.4957, "step": 60400 }, { "epoch": 2.102316199485289, "grad_norm": 0.3226562738418579, "learning_rate": 8.977185782847603e-06, "loss": 0.739, "step": 60450 }, { "epoch": 2.104055087987758, "grad_norm": 14.834962844848633, "learning_rate": 8.95979689782291e-06, "loss": 0.6757, "step": 60500 }, { "epoch": 2.1057939764902276, "grad_norm": 0.2767277956008911, "learning_rate": 8.942408012798218e-06, "loss": 0.657, "step": 60550 }, { "epoch": 2.1075328649926965, "grad_norm": 0.35351768136024475, "learning_rate": 8.925019127773526e-06, "loss": 0.9093, "step": 60600 }, { "epoch": 2.109271753495166, "grad_norm": 0.10501737892627716, "learning_rate": 8.907630242748836e-06, "loss": 0.7402, "step": 60650 }, { "epoch": 2.111010641997635, "grad_norm": 0.3136848211288452, "learning_rate": 8.890241357724144e-06, "loss": 0.4828, "step": 60700 }, { "epoch": 2.1127495305001043, "grad_norm": 0.26690125465393066, "learning_rate": 8.872852472699452e-06, "loss": 0.4374, "step": 60750 }, { "epoch": 2.1144884190025737, "grad_norm": 0.2770821154117584, "learning_rate": 8.85546358767476e-06, "loss": 0.6762, "step": 60800 }, { "epoch": 2.1162273075050426, "grad_norm": 0.04559651389718056, "learning_rate": 8.838074702650067e-06, "loss": 0.5332, "step": 60850 }, { "epoch": 2.117966196007512, "grad_norm": 14.83686637878418, "learning_rate": 8.820685817625375e-06, "loss": 1.2039, "step": 60900 }, { "epoch": 2.1197050845099814, "grad_norm": 0.2713572382926941, "learning_rate": 8.803296932600683e-06, "loss": 0.2703, "step": 60950 }, { "epoch": 2.1214439730124504, "grad_norm": 0.2995685636997223, "learning_rate": 8.78590804757599e-06, "loss": 0.7007, "step": 61000 }, { "epoch": 2.1231828615149198, "grad_norm": 0.2755599021911621, "learning_rate": 8.768519162551297e-06, "loss": 0.486, "step": 61050 }, { "epoch": 2.1249217500173887, "grad_norm": 0.3913181722164154, "learning_rate": 8.751130277526605e-06, "loss": 0.936, "step": 61100 }, { "epoch": 2.126660638519858, "grad_norm": 13.234795570373535, "learning_rate": 8.733741392501913e-06, "loss": 1.103, "step": 61150 }, { "epoch": 2.1283995270223275, "grad_norm": 0.3684265613555908, "learning_rate": 8.71635250747722e-06, "loss": 0.3903, "step": 61200 }, { "epoch": 2.1301384155247964, "grad_norm": 0.32997769117355347, "learning_rate": 8.698963622452529e-06, "loss": 0.7149, "step": 61250 }, { "epoch": 2.131877304027266, "grad_norm": 15.263748168945312, "learning_rate": 8.681574737427837e-06, "loss": 0.4543, "step": 61300 }, { "epoch": 2.1336161925297352, "grad_norm": 0.2786884009838104, "learning_rate": 8.664185852403144e-06, "loss": 0.505, "step": 61350 }, { "epoch": 2.135355081032204, "grad_norm": 0.32927316427230835, "learning_rate": 8.646796967378452e-06, "loss": 0.9468, "step": 61400 }, { "epoch": 2.1370939695346736, "grad_norm": 0.3092537522315979, "learning_rate": 8.62940808235376e-06, "loss": 0.6283, "step": 61450 }, { "epoch": 2.1388328580371425, "grad_norm": 0.3240683078765869, "learning_rate": 8.612019197329068e-06, "loss": 0.5923, "step": 61500 }, { "epoch": 2.140571746539612, "grad_norm": 15.312796592712402, "learning_rate": 8.594630312304376e-06, "loss": 0.7086, "step": 61550 }, { "epoch": 2.1423106350420813, "grad_norm": 0.33104822039604187, "learning_rate": 8.577241427279682e-06, "loss": 0.7397, "step": 61600 }, { "epoch": 2.1440495235445503, "grad_norm": 0.32247892022132874, "learning_rate": 8.55985254225499e-06, "loss": 0.5269, "step": 61650 }, { "epoch": 2.1457884120470196, "grad_norm": 8.64822769165039, "learning_rate": 8.542463657230298e-06, "loss": 0.8193, "step": 61700 }, { "epoch": 2.1475273005494886, "grad_norm": 0.3680492043495178, "learning_rate": 8.525074772205606e-06, "loss": 0.6307, "step": 61750 }, { "epoch": 2.149266189051958, "grad_norm": 0.3100355863571167, "learning_rate": 8.507685887180914e-06, "loss": 0.5437, "step": 61800 }, { "epoch": 2.1510050775544274, "grad_norm": 0.2657436728477478, "learning_rate": 8.490297002156221e-06, "loss": 0.3838, "step": 61850 }, { "epoch": 2.1527439660568963, "grad_norm": 0.0824597030878067, "learning_rate": 8.47290811713153e-06, "loss": 0.6141, "step": 61900 }, { "epoch": 2.1544828545593657, "grad_norm": 0.28507447242736816, "learning_rate": 8.455519232106837e-06, "loss": 0.6503, "step": 61950 }, { "epoch": 2.1562217430618347, "grad_norm": 0.3597736358642578, "learning_rate": 8.438130347082145e-06, "loss": 1.0598, "step": 62000 }, { "epoch": 2.157960631564304, "grad_norm": 0.3114331066608429, "learning_rate": 8.420741462057453e-06, "loss": 0.5692, "step": 62050 }, { "epoch": 2.1596995200667735, "grad_norm": 0.31353646516799927, "learning_rate": 8.40335257703276e-06, "loss": 0.8436, "step": 62100 }, { "epoch": 2.1614384085692424, "grad_norm": 14.334765434265137, "learning_rate": 8.385963692008069e-06, "loss": 0.8223, "step": 62150 }, { "epoch": 2.163177297071712, "grad_norm": 0.3189065754413605, "learning_rate": 8.368574806983375e-06, "loss": 0.3897, "step": 62200 }, { "epoch": 2.1649161855741808, "grad_norm": 0.10443305224180222, "learning_rate": 8.351185921958684e-06, "loss": 0.6241, "step": 62250 }, { "epoch": 2.16665507407665, "grad_norm": 0.3098902106285095, "learning_rate": 8.333797036933992e-06, "loss": 0.6348, "step": 62300 }, { "epoch": 2.1683939625791195, "grad_norm": 0.321620911359787, "learning_rate": 8.3164081519093e-06, "loss": 0.5677, "step": 62350 }, { "epoch": 2.1701328510815885, "grad_norm": 16.803712844848633, "learning_rate": 8.299019266884608e-06, "loss": 0.6197, "step": 62400 }, { "epoch": 2.171871739584058, "grad_norm": 0.0925876721739769, "learning_rate": 8.281630381859916e-06, "loss": 0.7754, "step": 62450 }, { "epoch": 2.1736106280865273, "grad_norm": 11.555059432983398, "learning_rate": 8.264241496835224e-06, "loss": 0.7228, "step": 62500 }, { "epoch": 2.1753495165889962, "grad_norm": 0.3258427679538727, "learning_rate": 8.246852611810532e-06, "loss": 0.5957, "step": 62550 }, { "epoch": 2.1770884050914656, "grad_norm": 0.09231085330247879, "learning_rate": 8.22946372678584e-06, "loss": 0.4022, "step": 62600 }, { "epoch": 2.1788272935939346, "grad_norm": 0.07787708193063736, "learning_rate": 8.212074841761147e-06, "loss": 0.5366, "step": 62650 }, { "epoch": 2.180566182096404, "grad_norm": 0.25338202714920044, "learning_rate": 8.194685956736455e-06, "loss": 0.3903, "step": 62700 }, { "epoch": 2.1823050705988734, "grad_norm": 16.211557388305664, "learning_rate": 8.177297071711763e-06, "loss": 0.5818, "step": 62750 }, { "epoch": 2.1840439591013423, "grad_norm": 14.897072792053223, "learning_rate": 8.15990818668707e-06, "loss": 0.8259, "step": 62800 }, { "epoch": 2.1857828476038117, "grad_norm": 0.2579725980758667, "learning_rate": 8.142519301662377e-06, "loss": 0.6644, "step": 62850 }, { "epoch": 2.187521736106281, "grad_norm": 0.28423696756362915, "learning_rate": 8.125130416637685e-06, "loss": 0.6244, "step": 62900 }, { "epoch": 2.18926062460875, "grad_norm": 14.880553245544434, "learning_rate": 8.107741531612993e-06, "loss": 0.6262, "step": 62950 }, { "epoch": 2.1909995131112194, "grad_norm": 15.255820274353027, "learning_rate": 8.0903526465883e-06, "loss": 0.5367, "step": 63000 }, { "epoch": 2.1927384016136884, "grad_norm": 14.802027702331543, "learning_rate": 8.072963761563609e-06, "loss": 0.6041, "step": 63050 }, { "epoch": 2.1944772901161578, "grad_norm": 0.2945719063282013, "learning_rate": 8.055574876538917e-06, "loss": 0.6896, "step": 63100 }, { "epoch": 2.196216178618627, "grad_norm": 0.25124725699424744, "learning_rate": 8.038185991514224e-06, "loss": 0.4519, "step": 63150 }, { "epoch": 2.197955067121096, "grad_norm": 0.26629412174224854, "learning_rate": 8.020797106489532e-06, "loss": 0.4641, "step": 63200 }, { "epoch": 2.1996939556235655, "grad_norm": 15.161707878112793, "learning_rate": 8.00340822146484e-06, "loss": 0.6468, "step": 63250 }, { "epoch": 2.2014328441260345, "grad_norm": 14.558478355407715, "learning_rate": 7.986019336440148e-06, "loss": 0.9447, "step": 63300 }, { "epoch": 2.203171732628504, "grad_norm": 0.3294551968574524, "learning_rate": 7.968630451415454e-06, "loss": 0.6765, "step": 63350 }, { "epoch": 2.2049106211309732, "grad_norm": 0.3839125633239746, "learning_rate": 7.951241566390762e-06, "loss": 0.677, "step": 63400 }, { "epoch": 2.206649509633442, "grad_norm": 0.29087886214256287, "learning_rate": 7.93385268136607e-06, "loss": 0.4242, "step": 63450 }, { "epoch": 2.2083883981359116, "grad_norm": 0.264085054397583, "learning_rate": 7.916463796341378e-06, "loss": 0.516, "step": 63500 }, { "epoch": 2.2101272866383805, "grad_norm": 0.2767043113708496, "learning_rate": 7.899074911316686e-06, "loss": 0.7098, "step": 63550 }, { "epoch": 2.21186617514085, "grad_norm": 15.053038597106934, "learning_rate": 7.881686026291994e-06, "loss": 0.9971, "step": 63600 }, { "epoch": 2.2136050636433193, "grad_norm": 0.12508659064769745, "learning_rate": 7.864297141267302e-06, "loss": 0.4713, "step": 63650 }, { "epoch": 2.2153439521457883, "grad_norm": 0.3385521173477173, "learning_rate": 7.84690825624261e-06, "loss": 0.8394, "step": 63700 }, { "epoch": 2.2170828406482577, "grad_norm": 0.0995241329073906, "learning_rate": 7.829519371217917e-06, "loss": 0.6429, "step": 63750 }, { "epoch": 2.218821729150727, "grad_norm": 0.10783933848142624, "learning_rate": 7.812130486193225e-06, "loss": 0.6578, "step": 63800 }, { "epoch": 2.220560617653196, "grad_norm": 0.09085489809513092, "learning_rate": 7.794741601168535e-06, "loss": 0.4754, "step": 63850 }, { "epoch": 2.2222995061556654, "grad_norm": 0.09765300899744034, "learning_rate": 7.777352716143843e-06, "loss": 0.6175, "step": 63900 }, { "epoch": 2.2240383946581344, "grad_norm": 0.28414249420166016, "learning_rate": 7.759963831119149e-06, "loss": 0.5805, "step": 63950 }, { "epoch": 2.2257772831606037, "grad_norm": 0.3107820153236389, "learning_rate": 7.742574946094457e-06, "loss": 0.7651, "step": 64000 }, { "epoch": 2.227516171663073, "grad_norm": 0.11119381338357925, "learning_rate": 7.725186061069764e-06, "loss": 1.0348, "step": 64050 }, { "epoch": 2.229255060165542, "grad_norm": 0.35018813610076904, "learning_rate": 7.707797176045072e-06, "loss": 0.442, "step": 64100 }, { "epoch": 2.2309939486680115, "grad_norm": 0.35977476835250854, "learning_rate": 7.69040829102038e-06, "loss": 0.8355, "step": 64150 }, { "epoch": 2.2327328371704804, "grad_norm": 0.3603460192680359, "learning_rate": 7.673019405995688e-06, "loss": 0.602, "step": 64200 }, { "epoch": 2.23447172567295, "grad_norm": 14.967622756958008, "learning_rate": 7.655630520970996e-06, "loss": 0.6069, "step": 64250 }, { "epoch": 2.236210614175419, "grad_norm": 14.597759246826172, "learning_rate": 7.638241635946304e-06, "loss": 0.8258, "step": 64300 }, { "epoch": 2.237949502677888, "grad_norm": 0.3300585448741913, "learning_rate": 7.620852750921612e-06, "loss": 0.6311, "step": 64350 }, { "epoch": 2.2396883911803576, "grad_norm": 0.09721571207046509, "learning_rate": 7.603463865896919e-06, "loss": 0.4082, "step": 64400 }, { "epoch": 2.241427279682827, "grad_norm": 0.2788519859313965, "learning_rate": 7.586074980872227e-06, "loss": 0.58, "step": 64450 }, { "epoch": 2.243166168185296, "grad_norm": 0.27534806728363037, "learning_rate": 7.5686860958475345e-06, "loss": 0.4991, "step": 64500 }, { "epoch": 2.2449050566877653, "grad_norm": 14.981958389282227, "learning_rate": 7.551297210822842e-06, "loss": 0.5218, "step": 64550 }, { "epoch": 2.2466439451902342, "grad_norm": 0.2928787171840668, "learning_rate": 7.53390832579815e-06, "loss": 0.7459, "step": 64600 }, { "epoch": 2.2483828336927036, "grad_norm": 9.211077690124512, "learning_rate": 7.516519440773458e-06, "loss": 0.5347, "step": 64650 }, { "epoch": 2.250121722195173, "grad_norm": 0.34492364525794983, "learning_rate": 7.499130555748765e-06, "loss": 1.0467, "step": 64700 }, { "epoch": 2.251860610697642, "grad_norm": 0.33183783292770386, "learning_rate": 7.481741670724073e-06, "loss": 0.5713, "step": 64750 }, { "epoch": 2.2535994992001114, "grad_norm": 0.1083383709192276, "learning_rate": 7.464352785699381e-06, "loss": 0.7469, "step": 64800 }, { "epoch": 2.2553383877025803, "grad_norm": 0.30070334672927856, "learning_rate": 7.446963900674689e-06, "loss": 0.4107, "step": 64850 }, { "epoch": 2.2570772762050497, "grad_norm": 16.25684356689453, "learning_rate": 7.429575015649997e-06, "loss": 0.6534, "step": 64900 }, { "epoch": 2.258816164707519, "grad_norm": 9.065841674804688, "learning_rate": 7.4121861306253045e-06, "loss": 0.6368, "step": 64950 }, { "epoch": 2.260555053209988, "grad_norm": 15.47312068939209, "learning_rate": 7.3947972456006116e-06, "loss": 0.6121, "step": 65000 }, { "epoch": 2.2622939417124575, "grad_norm": 14.904685974121094, "learning_rate": 7.37740836057592e-06, "loss": 0.5877, "step": 65050 }, { "epoch": 2.2640328302149264, "grad_norm": 0.09297342598438263, "learning_rate": 7.360019475551228e-06, "loss": 0.6982, "step": 65100 }, { "epoch": 2.265771718717396, "grad_norm": 0.29734280705451965, "learning_rate": 7.342630590526536e-06, "loss": 0.645, "step": 65150 }, { "epoch": 2.267510607219865, "grad_norm": 0.0968429446220398, "learning_rate": 7.325241705501844e-06, "loss": 0.5929, "step": 65200 }, { "epoch": 2.269249495722334, "grad_norm": 14.800728797912598, "learning_rate": 7.307852820477152e-06, "loss": 0.7114, "step": 65250 }, { "epoch": 2.2709883842248035, "grad_norm": 0.0938631147146225, "learning_rate": 7.290463935452459e-06, "loss": 0.584, "step": 65300 }, { "epoch": 2.2727272727272725, "grad_norm": 0.2932316064834595, "learning_rate": 7.273075050427767e-06, "loss": 0.4982, "step": 65350 }, { "epoch": 2.274466161229742, "grad_norm": 0.3374950587749481, "learning_rate": 7.2556861654030745e-06, "loss": 0.7859, "step": 65400 }, { "epoch": 2.2762050497322113, "grad_norm": 0.299877792596817, "learning_rate": 7.238297280378382e-06, "loss": 0.6869, "step": 65450 }, { "epoch": 2.27794393823468, "grad_norm": 0.10691149532794952, "learning_rate": 7.22090839535369e-06, "loss": 0.6832, "step": 65500 }, { "epoch": 2.2796828267371496, "grad_norm": 0.2951295077800751, "learning_rate": 7.203519510328998e-06, "loss": 0.523, "step": 65550 }, { "epoch": 2.281421715239619, "grad_norm": 0.31462362408638, "learning_rate": 7.186130625304305e-06, "loss": 0.6972, "step": 65600 }, { "epoch": 2.283160603742088, "grad_norm": 0.2754490375518799, "learning_rate": 7.168741740279613e-06, "loss": 0.513, "step": 65650 }, { "epoch": 2.2848994922445574, "grad_norm": 0.29208385944366455, "learning_rate": 7.151352855254921e-06, "loss": 0.5949, "step": 65700 }, { "epoch": 2.2866383807470267, "grad_norm": 14.229211807250977, "learning_rate": 7.133963970230229e-06, "loss": 0.7471, "step": 65750 }, { "epoch": 2.2883772692494957, "grad_norm": 14.793254852294922, "learning_rate": 7.116575085205537e-06, "loss": 0.684, "step": 65800 }, { "epoch": 2.290116157751965, "grad_norm": 0.09081799536943436, "learning_rate": 7.0991862001808446e-06, "loss": 0.5253, "step": 65850 }, { "epoch": 2.291855046254434, "grad_norm": 0.30153000354766846, "learning_rate": 7.0817973151561524e-06, "loss": 0.7015, "step": 65900 }, { "epoch": 2.2935939347569034, "grad_norm": 0.08396171033382416, "learning_rate": 7.06440843013146e-06, "loss": 0.6201, "step": 65950 }, { "epoch": 2.295332823259373, "grad_norm": 0.29424381256103516, "learning_rate": 7.047019545106768e-06, "loss": 0.6469, "step": 66000 }, { "epoch": 2.2970717117618418, "grad_norm": 0.3328405022621155, "learning_rate": 7.029630660082076e-06, "loss": 0.6605, "step": 66050 }, { "epoch": 2.298810600264311, "grad_norm": 14.767300605773926, "learning_rate": 7.012241775057384e-06, "loss": 0.7813, "step": 66100 }, { "epoch": 2.30054948876678, "grad_norm": 11.763884544372559, "learning_rate": 6.994852890032691e-06, "loss": 0.663, "step": 66150 }, { "epoch": 2.3022883772692495, "grad_norm": 0.32674354314804077, "learning_rate": 6.977464005007999e-06, "loss": 0.7779, "step": 66200 }, { "epoch": 2.304027265771719, "grad_norm": 14.134980201721191, "learning_rate": 6.960075119983307e-06, "loss": 0.7902, "step": 66250 }, { "epoch": 2.305766154274188, "grad_norm": 14.696518898010254, "learning_rate": 6.942686234958615e-06, "loss": 0.605, "step": 66300 }, { "epoch": 2.3075050427766572, "grad_norm": 16.42340850830078, "learning_rate": 6.9252973499339225e-06, "loss": 0.7235, "step": 66350 }, { "epoch": 2.309243931279126, "grad_norm": 0.3458247184753418, "learning_rate": 6.90790846490923e-06, "loss": 0.7106, "step": 66400 }, { "epoch": 2.3109828197815956, "grad_norm": 8.473856925964355, "learning_rate": 6.890519579884537e-06, "loss": 0.5356, "step": 66450 }, { "epoch": 2.312721708284065, "grad_norm": 15.084478378295898, "learning_rate": 6.873130694859845e-06, "loss": 0.7863, "step": 66500 }, { "epoch": 2.314460596786534, "grad_norm": 16.126943588256836, "learning_rate": 6.855741809835153e-06, "loss": 0.7626, "step": 66550 }, { "epoch": 2.3161994852890033, "grad_norm": 0.3003986179828644, "learning_rate": 6.838352924810461e-06, "loss": 0.4777, "step": 66600 }, { "epoch": 2.3179383737914723, "grad_norm": 0.2998530864715576, "learning_rate": 6.82096403978577e-06, "loss": 0.722, "step": 66650 }, { "epoch": 2.3196772622939417, "grad_norm": 0.30530238151550293, "learning_rate": 6.8035751547610776e-06, "loss": 0.7534, "step": 66700 }, { "epoch": 2.321416150796411, "grad_norm": 0.31367501616477966, "learning_rate": 6.786186269736385e-06, "loss": 0.6487, "step": 66750 }, { "epoch": 2.32315503929888, "grad_norm": 0.09603828191757202, "learning_rate": 6.7687973847116925e-06, "loss": 0.4479, "step": 66800 }, { "epoch": 2.3248939278013494, "grad_norm": 0.29711979627609253, "learning_rate": 6.751408499687e-06, "loss": 0.7631, "step": 66850 }, { "epoch": 2.3266328163038184, "grad_norm": 0.09643172472715378, "learning_rate": 6.734019614662308e-06, "loss": 0.7256, "step": 66900 }, { "epoch": 2.3283717048062877, "grad_norm": 0.29537665843963623, "learning_rate": 6.716630729637616e-06, "loss": 0.3949, "step": 66950 }, { "epoch": 2.330110593308757, "grad_norm": 15.01783561706543, "learning_rate": 6.699241844612924e-06, "loss": 0.8684, "step": 67000 }, { "epoch": 2.331849481811226, "grad_norm": 0.2746674418449402, "learning_rate": 6.681852959588231e-06, "loss": 0.3262, "step": 67050 }, { "epoch": 2.3335883703136955, "grad_norm": 0.3175896406173706, "learning_rate": 6.664464074563539e-06, "loss": 0.9222, "step": 67100 }, { "epoch": 2.335327258816165, "grad_norm": 8.658668518066406, "learning_rate": 6.647075189538847e-06, "loss": 0.7021, "step": 67150 }, { "epoch": 2.337066147318634, "grad_norm": 0.3502490520477295, "learning_rate": 6.629686304514155e-06, "loss": 0.7869, "step": 67200 }, { "epoch": 2.338805035821103, "grad_norm": 0.32713866233825684, "learning_rate": 6.6122974194894625e-06, "loss": 0.7385, "step": 67250 }, { "epoch": 2.3405439243235726, "grad_norm": 0.36399731040000916, "learning_rate": 6.59490853446477e-06, "loss": 0.8131, "step": 67300 }, { "epoch": 2.3422828128260416, "grad_norm": 0.34137967228889465, "learning_rate": 6.577519649440077e-06, "loss": 0.7063, "step": 67350 }, { "epoch": 2.344021701328511, "grad_norm": 0.34767404198646545, "learning_rate": 6.560130764415385e-06, "loss": 0.6834, "step": 67400 }, { "epoch": 2.34576058983098, "grad_norm": 0.3168736696243286, "learning_rate": 6.542741879390694e-06, "loss": 0.4581, "step": 67450 }, { "epoch": 2.3474994783334493, "grad_norm": 14.82581901550293, "learning_rate": 6.525352994366002e-06, "loss": 1.1417, "step": 67500 }, { "epoch": 2.3492383668359187, "grad_norm": 13.935430526733398, "learning_rate": 6.50796410934131e-06, "loss": 0.6085, "step": 67550 }, { "epoch": 2.3509772553383876, "grad_norm": 0.3764286935329437, "learning_rate": 6.490575224316618e-06, "loss": 0.637, "step": 67600 }, { "epoch": 2.352716143840857, "grad_norm": 6.133382320404053, "learning_rate": 6.473186339291925e-06, "loss": 0.7437, "step": 67650 }, { "epoch": 2.354455032343326, "grad_norm": 0.3908880352973938, "learning_rate": 6.4557974542672325e-06, "loss": 0.6306, "step": 67700 }, { "epoch": 2.3561939208457954, "grad_norm": 0.3300613760948181, "learning_rate": 6.43840856924254e-06, "loss": 0.4336, "step": 67750 }, { "epoch": 2.3579328093482648, "grad_norm": 0.3410859704017639, "learning_rate": 6.421019684217848e-06, "loss": 0.8124, "step": 67800 }, { "epoch": 2.3596716978507337, "grad_norm": 0.32739803194999695, "learning_rate": 6.403630799193156e-06, "loss": 0.7246, "step": 67850 }, { "epoch": 2.361410586353203, "grad_norm": 0.33726173639297485, "learning_rate": 6.386241914168464e-06, "loss": 0.5704, "step": 67900 }, { "epoch": 2.363149474855672, "grad_norm": 0.10011206567287445, "learning_rate": 6.368853029143771e-06, "loss": 0.686, "step": 67950 }, { "epoch": 2.3648883633581415, "grad_norm": 0.32553601264953613, "learning_rate": 6.351464144119079e-06, "loss": 0.6162, "step": 68000 }, { "epoch": 2.366627251860611, "grad_norm": 0.3075249195098877, "learning_rate": 6.334075259094387e-06, "loss": 0.6383, "step": 68050 }, { "epoch": 2.36836614036308, "grad_norm": 0.26676568388938904, "learning_rate": 6.316686374069695e-06, "loss": 0.5324, "step": 68100 }, { "epoch": 2.370105028865549, "grad_norm": 15.127925872802734, "learning_rate": 6.2992974890450025e-06, "loss": 0.8516, "step": 68150 }, { "epoch": 2.371843917368018, "grad_norm": 0.12305378168821335, "learning_rate": 6.28190860402031e-06, "loss": 0.9253, "step": 68200 }, { "epoch": 2.3735828058704875, "grad_norm": 0.3410135805606842, "learning_rate": 6.264519718995618e-06, "loss": 0.5513, "step": 68250 }, { "epoch": 2.375321694372957, "grad_norm": 0.053131379187107086, "learning_rate": 6.247130833970926e-06, "loss": 0.5122, "step": 68300 }, { "epoch": 2.377060582875426, "grad_norm": 15.328137397766113, "learning_rate": 6.229741948946234e-06, "loss": 0.8669, "step": 68350 }, { "epoch": 2.3787994713778953, "grad_norm": 0.3154887557029724, "learning_rate": 6.212353063921542e-06, "loss": 0.3744, "step": 68400 }, { "epoch": 2.3805383598803647, "grad_norm": 0.27208104729652405, "learning_rate": 6.19496417889685e-06, "loss": 0.4707, "step": 68450 }, { "epoch": 2.3822772483828336, "grad_norm": 15.23327922821045, "learning_rate": 6.177575293872157e-06, "loss": 0.5824, "step": 68500 }, { "epoch": 2.384016136885303, "grad_norm": 0.2831372916698456, "learning_rate": 6.160186408847465e-06, "loss": 0.59, "step": 68550 }, { "epoch": 2.385755025387772, "grad_norm": 14.819686889648438, "learning_rate": 6.1427975238227725e-06, "loss": 0.8033, "step": 68600 }, { "epoch": 2.3874939138902413, "grad_norm": 0.27056631445884705, "learning_rate": 6.12540863879808e-06, "loss": 0.5039, "step": 68650 }, { "epoch": 2.3892328023927107, "grad_norm": 0.24557478725910187, "learning_rate": 6.108019753773388e-06, "loss": 0.6092, "step": 68700 }, { "epoch": 2.3909716908951797, "grad_norm": 8.802968978881836, "learning_rate": 6.090630868748696e-06, "loss": 0.7981, "step": 68750 }, { "epoch": 2.392710579397649, "grad_norm": 0.2575099766254425, "learning_rate": 6.073241983724003e-06, "loss": 0.4529, "step": 68800 }, { "epoch": 2.3944494679001185, "grad_norm": 15.242324829101562, "learning_rate": 6.055853098699311e-06, "loss": 0.656, "step": 68850 }, { "epoch": 2.3961883564025874, "grad_norm": 16.871849060058594, "learning_rate": 6.038464213674619e-06, "loss": 0.87, "step": 68900 }, { "epoch": 2.397927244905057, "grad_norm": 0.31800368428230286, "learning_rate": 6.021075328649927e-06, "loss": 0.7255, "step": 68950 }, { "epoch": 2.3996661334075258, "grad_norm": 0.10343417525291443, "learning_rate": 6.003686443625235e-06, "loss": 0.84, "step": 69000 }, { "epoch": 2.401405021909995, "grad_norm": 0.3419122099876404, "learning_rate": 5.986297558600543e-06, "loss": 0.8296, "step": 69050 }, { "epoch": 2.4031439104124646, "grad_norm": 0.37043097615242004, "learning_rate": 5.9689086735758504e-06, "loss": 0.83, "step": 69100 }, { "epoch": 2.4048827989149335, "grad_norm": 13.750035285949707, "learning_rate": 5.951519788551158e-06, "loss": 0.7752, "step": 69150 }, { "epoch": 2.406621687417403, "grad_norm": 0.120005302131176, "learning_rate": 5.934130903526466e-06, "loss": 0.3524, "step": 69200 }, { "epoch": 2.408360575919872, "grad_norm": 0.12846647202968597, "learning_rate": 5.916742018501774e-06, "loss": 0.9697, "step": 69250 }, { "epoch": 2.4100994644223412, "grad_norm": 16.256132125854492, "learning_rate": 5.899353133477082e-06, "loss": 0.4765, "step": 69300 }, { "epoch": 2.4118383529248106, "grad_norm": 0.3425805866718292, "learning_rate": 5.88196424845239e-06, "loss": 0.8657, "step": 69350 }, { "epoch": 2.4135772414272796, "grad_norm": 0.36371877789497375, "learning_rate": 5.864575363427697e-06, "loss": 0.6255, "step": 69400 }, { "epoch": 2.415316129929749, "grad_norm": 0.3634818196296692, "learning_rate": 5.847186478403005e-06, "loss": 0.5931, "step": 69450 }, { "epoch": 2.417055018432218, "grad_norm": 0.34747493267059326, "learning_rate": 5.8297975933783126e-06, "loss": 0.6564, "step": 69500 }, { "epoch": 2.4187939069346873, "grad_norm": 0.3458154499530792, "learning_rate": 5.8124087083536204e-06, "loss": 0.5692, "step": 69550 }, { "epoch": 2.4205327954371567, "grad_norm": 0.3281562328338623, "learning_rate": 5.795019823328928e-06, "loss": 0.733, "step": 69600 }, { "epoch": 2.4222716839396257, "grad_norm": 0.3775782287120819, "learning_rate": 5.777630938304236e-06, "loss": 0.796, "step": 69650 }, { "epoch": 2.424010572442095, "grad_norm": 0.39359569549560547, "learning_rate": 5.760242053279543e-06, "loss": 0.9069, "step": 69700 }, { "epoch": 2.425749460944564, "grad_norm": 14.008756637573242, "learning_rate": 5.742853168254851e-06, "loss": 0.8085, "step": 69750 }, { "epoch": 2.4274883494470334, "grad_norm": 0.3793421983718872, "learning_rate": 5.725464283230159e-06, "loss": 0.6309, "step": 69800 }, { "epoch": 2.429227237949503, "grad_norm": 0.3454445004463196, "learning_rate": 5.708075398205468e-06, "loss": 0.4621, "step": 69850 }, { "epoch": 2.4309661264519717, "grad_norm": 0.330728679895401, "learning_rate": 5.6906865131807756e-06, "loss": 0.774, "step": 69900 }, { "epoch": 2.432705014954441, "grad_norm": 0.3546920120716095, "learning_rate": 5.6732976281560834e-06, "loss": 0.8447, "step": 69950 }, { "epoch": 2.4344439034569105, "grad_norm": 0.1300666332244873, "learning_rate": 5.6559087431313905e-06, "loss": 0.7076, "step": 70000 }, { "epoch": 2.4361827919593795, "grad_norm": 0.3250364363193512, "learning_rate": 5.638519858106698e-06, "loss": 0.4121, "step": 70050 }, { "epoch": 2.437921680461849, "grad_norm": 8.745099067687988, "learning_rate": 5.621130973082006e-06, "loss": 0.5519, "step": 70100 }, { "epoch": 2.439660568964318, "grad_norm": 0.33490511775016785, "learning_rate": 5.603742088057314e-06, "loss": 0.6128, "step": 70150 }, { "epoch": 2.441399457466787, "grad_norm": 0.3449120819568634, "learning_rate": 5.586353203032622e-06, "loss": 0.6375, "step": 70200 }, { "epoch": 2.4431383459692566, "grad_norm": 0.2938508093357086, "learning_rate": 5.56896431800793e-06, "loss": 0.5636, "step": 70250 }, { "epoch": 2.4448772344717256, "grad_norm": 0.3131754994392395, "learning_rate": 5.551575432983237e-06, "loss": 0.5678, "step": 70300 }, { "epoch": 2.446616122974195, "grad_norm": 0.09292981028556824, "learning_rate": 5.534186547958545e-06, "loss": 0.6718, "step": 70350 }, { "epoch": 2.4483550114766643, "grad_norm": 0.3373245298862457, "learning_rate": 5.516797662933853e-06, "loss": 0.7281, "step": 70400 }, { "epoch": 2.4500938999791333, "grad_norm": 17.491125106811523, "learning_rate": 5.4994087779091605e-06, "loss": 0.6997, "step": 70450 }, { "epoch": 2.4518327884816027, "grad_norm": 18.443614959716797, "learning_rate": 5.482019892884468e-06, "loss": 0.6668, "step": 70500 }, { "epoch": 2.4535716769840716, "grad_norm": 15.008947372436523, "learning_rate": 5.464631007859776e-06, "loss": 0.7506, "step": 70550 }, { "epoch": 2.455310565486541, "grad_norm": 14.182000160217285, "learning_rate": 5.447242122835083e-06, "loss": 0.7511, "step": 70600 }, { "epoch": 2.4570494539890104, "grad_norm": 8.392163276672363, "learning_rate": 5.429853237810392e-06, "loss": 0.819, "step": 70650 }, { "epoch": 2.4587883424914794, "grad_norm": 14.505101203918457, "learning_rate": 5.4124643527857e-06, "loss": 0.6613, "step": 70700 }, { "epoch": 2.4605272309939488, "grad_norm": 0.35213959217071533, "learning_rate": 5.395075467761008e-06, "loss": 0.6183, "step": 70750 }, { "epoch": 2.4622661194964177, "grad_norm": 0.35315388441085815, "learning_rate": 5.377686582736316e-06, "loss": 0.6091, "step": 70800 }, { "epoch": 2.464005007998887, "grad_norm": 0.33576440811157227, "learning_rate": 5.360297697711623e-06, "loss": 0.5752, "step": 70850 }, { "epoch": 2.4657438965013565, "grad_norm": 16.59521484375, "learning_rate": 5.3429088126869305e-06, "loss": 0.8922, "step": 70900 }, { "epoch": 2.4674827850038255, "grad_norm": 0.31349653005599976, "learning_rate": 5.325519927662238e-06, "loss": 0.5591, "step": 70950 }, { "epoch": 2.469221673506295, "grad_norm": 0.3269708454608917, "learning_rate": 5.308131042637546e-06, "loss": 0.5189, "step": 71000 }, { "epoch": 2.470960562008764, "grad_norm": 0.35081860423088074, "learning_rate": 5.290742157612854e-06, "loss": 0.8037, "step": 71050 }, { "epoch": 2.472699450511233, "grad_norm": 14.93113899230957, "learning_rate": 5.273353272588162e-06, "loss": 0.5942, "step": 71100 }, { "epoch": 2.4744383390137026, "grad_norm": 0.3738803565502167, "learning_rate": 5.255964387563469e-06, "loss": 0.7314, "step": 71150 }, { "epoch": 2.4761772275161715, "grad_norm": 0.10138379037380219, "learning_rate": 5.238575502538777e-06, "loss": 0.575, "step": 71200 }, { "epoch": 2.477916116018641, "grad_norm": 14.590140342712402, "learning_rate": 5.221186617514085e-06, "loss": 0.993, "step": 71250 }, { "epoch": 2.47965500452111, "grad_norm": 0.39985883235931396, "learning_rate": 5.203797732489393e-06, "loss": 0.7592, "step": 71300 }, { "epoch": 2.4813938930235793, "grad_norm": 0.10468708723783493, "learning_rate": 5.1864088474647005e-06, "loss": 0.6914, "step": 71350 }, { "epoch": 2.4831327815260487, "grad_norm": 0.37774431705474854, "learning_rate": 5.169019962440008e-06, "loss": 0.5478, "step": 71400 }, { "epoch": 2.4848716700285176, "grad_norm": 0.33875125646591187, "learning_rate": 5.151631077415316e-06, "loss": 0.6079, "step": 71450 }, { "epoch": 2.486610558530987, "grad_norm": 0.3092256784439087, "learning_rate": 5.134242192390624e-06, "loss": 0.4696, "step": 71500 }, { "epoch": 2.4883494470334564, "grad_norm": 0.3067108392715454, "learning_rate": 5.116853307365932e-06, "loss": 0.8349, "step": 71550 }, { "epoch": 2.4900883355359253, "grad_norm": 0.3246482312679291, "learning_rate": 5.09946442234124e-06, "loss": 0.739, "step": 71600 }, { "epoch": 2.4918272240383947, "grad_norm": 0.32082903385162354, "learning_rate": 5.082075537316548e-06, "loss": 0.562, "step": 71650 }, { "epoch": 2.4935661125408637, "grad_norm": 0.3205757439136505, "learning_rate": 5.064686652291856e-06, "loss": 0.5433, "step": 71700 }, { "epoch": 2.495305001043333, "grad_norm": 0.32167741656303406, "learning_rate": 5.047297767267163e-06, "loss": 0.9991, "step": 71750 }, { "epoch": 2.4970438895458025, "grad_norm": 0.321094274520874, "learning_rate": 5.0299088822424705e-06, "loss": 0.6194, "step": 71800 }, { "epoch": 2.4987827780482714, "grad_norm": 0.40397679805755615, "learning_rate": 5.012519997217778e-06, "loss": 0.6723, "step": 71850 }, { "epoch": 2.500521666550741, "grad_norm": 0.11809306591749191, "learning_rate": 4.995131112193086e-06, "loss": 0.6111, "step": 71900 }, { "epoch": 2.50226055505321, "grad_norm": 0.11758511513471603, "learning_rate": 4.977742227168394e-06, "loss": 0.7034, "step": 71950 }, { "epoch": 2.503999443555679, "grad_norm": 0.340659499168396, "learning_rate": 4.960353342143702e-06, "loss": 0.6852, "step": 72000 }, { "epoch": 2.5057383320581486, "grad_norm": 0.3395274579524994, "learning_rate": 4.942964457119009e-06, "loss": 0.7189, "step": 72050 }, { "epoch": 2.5074772205606175, "grad_norm": 0.3469090461730957, "learning_rate": 4.925575572094317e-06, "loss": 0.675, "step": 72100 }, { "epoch": 2.509216109063087, "grad_norm": 0.38067564368247986, "learning_rate": 4.908186687069625e-06, "loss": 0.5669, "step": 72150 }, { "epoch": 2.5109549975655563, "grad_norm": 0.3017711341381073, "learning_rate": 4.890797802044933e-06, "loss": 0.41, "step": 72200 }, { "epoch": 2.5126938860680252, "grad_norm": 15.093563079833984, "learning_rate": 4.873408917020241e-06, "loss": 0.5672, "step": 72250 }, { "epoch": 2.5144327745704946, "grad_norm": 14.670958518981934, "learning_rate": 4.856020031995549e-06, "loss": 0.5568, "step": 72300 }, { "epoch": 2.5161716630729636, "grad_norm": 0.28553950786590576, "learning_rate": 4.838631146970856e-06, "loss": 0.5061, "step": 72350 }, { "epoch": 2.517910551575433, "grad_norm": 0.2646492123603821, "learning_rate": 4.821242261946164e-06, "loss": 0.4408, "step": 72400 }, { "epoch": 2.5196494400779024, "grad_norm": 0.28734058141708374, "learning_rate": 4.803853376921472e-06, "loss": 0.7569, "step": 72450 }, { "epoch": 2.5213883285803713, "grad_norm": 14.916251182556152, "learning_rate": 4.78646449189678e-06, "loss": 0.5074, "step": 72500 }, { "epoch": 2.5231272170828407, "grad_norm": 0.08438620716333389, "learning_rate": 4.769075606872088e-06, "loss": 0.6759, "step": 72550 }, { "epoch": 2.5248661055853097, "grad_norm": 15.534076690673828, "learning_rate": 4.751686721847396e-06, "loss": 0.5968, "step": 72600 }, { "epoch": 2.526604994087779, "grad_norm": 8.97250747680664, "learning_rate": 4.734297836822703e-06, "loss": 0.4737, "step": 72650 }, { "epoch": 2.5283438825902484, "grad_norm": 0.29305270314216614, "learning_rate": 4.7169089517980106e-06, "loss": 0.6524, "step": 72700 }, { "epoch": 2.5300827710927174, "grad_norm": 0.2665903866291046, "learning_rate": 4.6995200667733184e-06, "loss": 0.6682, "step": 72750 }, { "epoch": 2.531821659595187, "grad_norm": 0.08398403972387314, "learning_rate": 4.682131181748626e-06, "loss": 0.7808, "step": 72800 }, { "epoch": 2.5335605480976557, "grad_norm": 0.09175426512956619, "learning_rate": 4.664742296723934e-06, "loss": 0.7137, "step": 72850 }, { "epoch": 2.535299436600125, "grad_norm": 0.3011569678783417, "learning_rate": 4.647353411699242e-06, "loss": 0.7263, "step": 72900 }, { "epoch": 2.5370383251025945, "grad_norm": 0.30016323924064636, "learning_rate": 4.629964526674549e-06, "loss": 0.5427, "step": 72950 }, { "epoch": 2.538777213605064, "grad_norm": 18.811155319213867, "learning_rate": 4.612575641649857e-06, "loss": 0.8519, "step": 73000 }, { "epoch": 2.540516102107533, "grad_norm": 0.29617977142333984, "learning_rate": 4.595186756625166e-06, "loss": 0.4444, "step": 73050 }, { "epoch": 2.542254990610002, "grad_norm": 0.3079003691673279, "learning_rate": 4.5777978716004736e-06, "loss": 0.7174, "step": 73100 }, { "epoch": 2.543993879112471, "grad_norm": 16.358213424682617, "learning_rate": 4.5604089865757814e-06, "loss": 0.6741, "step": 73150 }, { "epoch": 2.5457327676149406, "grad_norm": 0.0901346281170845, "learning_rate": 4.543020101551089e-06, "loss": 0.3909, "step": 73200 }, { "epoch": 2.54747165611741, "grad_norm": 0.25800684094429016, "learning_rate": 4.525631216526396e-06, "loss": 0.6419, "step": 73250 }, { "epoch": 2.549210544619879, "grad_norm": 0.3126048445701599, "learning_rate": 4.508242331501704e-06, "loss": 0.9457, "step": 73300 }, { "epoch": 2.5509494331223483, "grad_norm": 14.949040412902832, "learning_rate": 4.490853446477012e-06, "loss": 0.5828, "step": 73350 }, { "epoch": 2.5526883216248173, "grad_norm": 14.750711441040039, "learning_rate": 4.47346456145232e-06, "loss": 0.7284, "step": 73400 }, { "epoch": 2.5544272101272867, "grad_norm": 0.3029789924621582, "learning_rate": 4.456075676427628e-06, "loss": 0.6456, "step": 73450 }, { "epoch": 2.556166098629756, "grad_norm": 0.09228137135505676, "learning_rate": 4.438686791402935e-06, "loss": 0.6527, "step": 73500 }, { "epoch": 2.557904987132225, "grad_norm": 0.1144268810749054, "learning_rate": 4.421297906378243e-06, "loss": 1.0027, "step": 73550 }, { "epoch": 2.5596438756346944, "grad_norm": 0.23792581260204315, "learning_rate": 4.403909021353551e-06, "loss": 0.4675, "step": 73600 }, { "epoch": 2.5613827641371634, "grad_norm": 0.3120270073413849, "learning_rate": 4.3865201363288585e-06, "loss": 0.6628, "step": 73650 }, { "epoch": 2.5631216526396328, "grad_norm": 0.3474210202693939, "learning_rate": 4.369131251304166e-06, "loss": 0.6381, "step": 73700 }, { "epoch": 2.564860541142102, "grad_norm": 0.323406457901001, "learning_rate": 4.351742366279474e-06, "loss": 0.5561, "step": 73750 }, { "epoch": 2.566599429644571, "grad_norm": 0.27603837847709656, "learning_rate": 4.334353481254781e-06, "loss": 0.6596, "step": 73800 }, { "epoch": 2.5683383181470405, "grad_norm": 0.2711716890335083, "learning_rate": 4.31696459623009e-06, "loss": 0.2321, "step": 73850 }, { "epoch": 2.5700772066495094, "grad_norm": 8.789525985717773, "learning_rate": 4.299575711205398e-06, "loss": 0.8268, "step": 73900 }, { "epoch": 2.571816095151979, "grad_norm": 0.2900063097476959, "learning_rate": 4.282186826180706e-06, "loss": 0.6568, "step": 73950 }, { "epoch": 2.5735549836544482, "grad_norm": 9.59249496459961, "learning_rate": 4.264797941156014e-06, "loss": 0.5326, "step": 74000 }, { "epoch": 2.575293872156917, "grad_norm": 9.332077026367188, "learning_rate": 4.2474090561313215e-06, "loss": 0.6433, "step": 74050 }, { "epoch": 2.5770327606593866, "grad_norm": 15.744625091552734, "learning_rate": 4.2300201711066285e-06, "loss": 0.4092, "step": 74100 }, { "epoch": 2.5787716491618555, "grad_norm": 0.29218411445617676, "learning_rate": 4.212631286081936e-06, "loss": 0.3724, "step": 74150 }, { "epoch": 2.580510537664325, "grad_norm": 0.08202467113733292, "learning_rate": 4.195242401057244e-06, "loss": 0.863, "step": 74200 }, { "epoch": 2.5822494261667943, "grad_norm": 0.08180984109640121, "learning_rate": 4.177853516032552e-06, "loss": 0.6864, "step": 74250 }, { "epoch": 2.5839883146692633, "grad_norm": 0.26765042543411255, "learning_rate": 4.16046463100786e-06, "loss": 0.6714, "step": 74300 }, { "epoch": 2.5857272031717327, "grad_norm": 15.20392894744873, "learning_rate": 4.143075745983168e-06, "loss": 0.8751, "step": 74350 }, { "epoch": 2.5874660916742016, "grad_norm": 0.29754915833473206, "learning_rate": 4.125686860958475e-06, "loss": 0.7102, "step": 74400 }, { "epoch": 2.589204980176671, "grad_norm": 15.20317268371582, "learning_rate": 4.108297975933783e-06, "loss": 0.6399, "step": 74450 }, { "epoch": 2.5909438686791404, "grad_norm": 0.09010639041662216, "learning_rate": 4.090909090909091e-06, "loss": 0.6626, "step": 74500 }, { "epoch": 2.59268275718161, "grad_norm": 0.32721996307373047, "learning_rate": 4.0735202058843985e-06, "loss": 0.771, "step": 74550 }, { "epoch": 2.5944216456840787, "grad_norm": 15.318130493164062, "learning_rate": 4.056131320859706e-06, "loss": 0.7186, "step": 74600 }, { "epoch": 2.5961605341865477, "grad_norm": 0.31942614912986755, "learning_rate": 4.038742435835015e-06, "loss": 0.8128, "step": 74650 }, { "epoch": 2.597899422689017, "grad_norm": 0.09265422075986862, "learning_rate": 4.021353550810322e-06, "loss": 0.5304, "step": 74700 }, { "epoch": 2.5996383111914865, "grad_norm": 0.28793925046920776, "learning_rate": 4.00396466578563e-06, "loss": 0.6681, "step": 74750 }, { "epoch": 2.601377199693956, "grad_norm": 0.2918592691421509, "learning_rate": 3.986575780760938e-06, "loss": 0.6063, "step": 74800 }, { "epoch": 2.603116088196425, "grad_norm": 0.27539315819740295, "learning_rate": 3.969186895736246e-06, "loss": 0.3285, "step": 74850 }, { "epoch": 2.604854976698894, "grad_norm": 0.3373204469680786, "learning_rate": 3.951798010711554e-06, "loss": 0.7623, "step": 74900 }, { "epoch": 2.606593865201363, "grad_norm": 15.69711971282959, "learning_rate": 3.9344091256868615e-06, "loss": 0.8953, "step": 74950 }, { "epoch": 2.6083327537038326, "grad_norm": 15.241955757141113, "learning_rate": 3.9170202406621685e-06, "loss": 0.5963, "step": 75000 }, { "epoch": 2.610071642206302, "grad_norm": 0.2597866952419281, "learning_rate": 3.899631355637476e-06, "loss": 0.4808, "step": 75050 }, { "epoch": 2.611810530708771, "grad_norm": 9.360833168029785, "learning_rate": 3.882242470612784e-06, "loss": 0.538, "step": 75100 }, { "epoch": 2.6135494192112403, "grad_norm": 0.24698738753795624, "learning_rate": 3.864853585588092e-06, "loss": 0.4778, "step": 75150 }, { "epoch": 2.6152883077137092, "grad_norm": 0.6705573201179504, "learning_rate": 3.8474647005634e-06, "loss": 0.8385, "step": 75200 }, { "epoch": 2.6170271962161786, "grad_norm": 0.24652983248233795, "learning_rate": 3.830075815538708e-06, "loss": 0.7767, "step": 75250 }, { "epoch": 2.618766084718648, "grad_norm": 0.11802521347999573, "learning_rate": 3.8126869305140153e-06, "loss": 0.8578, "step": 75300 }, { "epoch": 2.620504973221117, "grad_norm": 0.08421850949525833, "learning_rate": 3.7952980454893228e-06, "loss": 0.3825, "step": 75350 }, { "epoch": 2.6222438617235864, "grad_norm": 0.27566710114479065, "learning_rate": 3.7779091604646307e-06, "loss": 0.6515, "step": 75400 }, { "epoch": 2.6239827502260553, "grad_norm": 0.08899378776550293, "learning_rate": 3.7605202754399394e-06, "loss": 0.5732, "step": 75450 }, { "epoch": 2.6257216387285247, "grad_norm": 0.24540136754512787, "learning_rate": 3.7431313904152464e-06, "loss": 0.4326, "step": 75500 }, { "epoch": 2.627460527230994, "grad_norm": 0.25683218240737915, "learning_rate": 3.7257425053905543e-06, "loss": 0.6564, "step": 75550 }, { "epoch": 2.629199415733463, "grad_norm": 0.29963716864585876, "learning_rate": 3.708353620365862e-06, "loss": 0.4832, "step": 75600 }, { "epoch": 2.6309383042359324, "grad_norm": 0.26299530267715454, "learning_rate": 3.69096473534117e-06, "loss": 0.7498, "step": 75650 }, { "epoch": 2.6326771927384014, "grad_norm": 0.28807127475738525, "learning_rate": 3.673575850316478e-06, "loss": 0.9872, "step": 75700 }, { "epoch": 2.634416081240871, "grad_norm": 0.08260340243577957, "learning_rate": 3.6561869652917858e-06, "loss": 0.3055, "step": 75750 }, { "epoch": 2.63615496974334, "grad_norm": 0.27327561378479004, "learning_rate": 3.6387980802670932e-06, "loss": 0.8157, "step": 75800 }, { "epoch": 2.637893858245809, "grad_norm": 0.25998470187187195, "learning_rate": 3.621409195242401e-06, "loss": 0.5541, "step": 75850 }, { "epoch": 2.6396327467482785, "grad_norm": 0.08429420739412308, "learning_rate": 3.604020310217709e-06, "loss": 0.6188, "step": 75900 }, { "epoch": 2.6413716352507475, "grad_norm": 0.2669333815574646, "learning_rate": 3.5866314251930164e-06, "loss": 0.482, "step": 75950 }, { "epoch": 2.643110523753217, "grad_norm": 0.2916324734687805, "learning_rate": 3.5692425401683243e-06, "loss": 0.4482, "step": 76000 }, { "epoch": 2.6448494122556863, "grad_norm": 0.081846222281456, "learning_rate": 3.5518536551436326e-06, "loss": 0.7146, "step": 76050 }, { "epoch": 2.6465883007581557, "grad_norm": 0.2721257209777832, "learning_rate": 3.53446477011894e-06, "loss": 0.5706, "step": 76100 }, { "epoch": 2.6483271892606246, "grad_norm": 14.889713287353516, "learning_rate": 3.517075885094248e-06, "loss": 0.7197, "step": 76150 }, { "epoch": 2.650066077763094, "grad_norm": 9.229114532470703, "learning_rate": 3.499687000069556e-06, "loss": 0.9364, "step": 76200 }, { "epoch": 2.651804966265563, "grad_norm": 0.0938228890299797, "learning_rate": 3.4822981150448632e-06, "loss": 0.688, "step": 76250 }, { "epoch": 2.6535438547680323, "grad_norm": 0.09290089458227158, "learning_rate": 3.464909230020171e-06, "loss": 0.5825, "step": 76300 }, { "epoch": 2.6552827432705017, "grad_norm": 0.3204679489135742, "learning_rate": 3.447520344995479e-06, "loss": 0.9376, "step": 76350 }, { "epoch": 2.6570216317729707, "grad_norm": 0.09839994460344315, "learning_rate": 3.4301314599707864e-06, "loss": 0.7386, "step": 76400 }, { "epoch": 2.65876052027544, "grad_norm": 0.30225107073783875, "learning_rate": 3.4127425749460947e-06, "loss": 0.6957, "step": 76450 }, { "epoch": 2.660499408777909, "grad_norm": 0.3641868233680725, "learning_rate": 3.3953536899214026e-06, "loss": 0.5277, "step": 76500 }, { "epoch": 2.6622382972803784, "grad_norm": 0.09461122751235962, "learning_rate": 3.37796480489671e-06, "loss": 0.6836, "step": 76550 }, { "epoch": 2.663977185782848, "grad_norm": 0.28631946444511414, "learning_rate": 3.360575919872018e-06, "loss": 0.5016, "step": 76600 }, { "epoch": 2.6657160742853168, "grad_norm": 0.30579492449760437, "learning_rate": 3.343187034847326e-06, "loss": 0.548, "step": 76650 }, { "epoch": 2.667454962787786, "grad_norm": 0.2784505784511566, "learning_rate": 3.3257981498226333e-06, "loss": 0.5627, "step": 76700 }, { "epoch": 2.669193851290255, "grad_norm": 0.29382219910621643, "learning_rate": 3.308409264797941e-06, "loss": 0.8273, "step": 76750 }, { "epoch": 2.6709327397927245, "grad_norm": 0.288221150636673, "learning_rate": 3.291020379773249e-06, "loss": 0.712, "step": 76800 }, { "epoch": 2.672671628295194, "grad_norm": 9.291687965393066, "learning_rate": 3.273631494748557e-06, "loss": 0.6635, "step": 76850 }, { "epoch": 2.674410516797663, "grad_norm": 0.27473074197769165, "learning_rate": 3.2562426097238648e-06, "loss": 0.6103, "step": 76900 }, { "epoch": 2.6761494053001322, "grad_norm": 0.28478240966796875, "learning_rate": 3.2388537246991726e-06, "loss": 0.7039, "step": 76950 }, { "epoch": 2.677888293802601, "grad_norm": 0.2681798040866852, "learning_rate": 3.22146483967448e-06, "loss": 0.4478, "step": 77000 }, { "epoch": 2.6796271823050706, "grad_norm": 0.10612065345048904, "learning_rate": 3.204075954649788e-06, "loss": 0.8836, "step": 77050 }, { "epoch": 2.68136607080754, "grad_norm": 15.768795013427734, "learning_rate": 3.186687069625096e-06, "loss": 0.6726, "step": 77100 }, { "epoch": 2.683104959310009, "grad_norm": 0.09094037860631943, "learning_rate": 3.1692981846004033e-06, "loss": 0.4292, "step": 77150 }, { "epoch": 2.6848438478124783, "grad_norm": 15.408159255981445, "learning_rate": 3.151909299575711e-06, "loss": 0.7528, "step": 77200 }, { "epoch": 2.6865827363149473, "grad_norm": 0.3140193521976471, "learning_rate": 3.134520414551019e-06, "loss": 0.9284, "step": 77250 }, { "epoch": 2.6883216248174167, "grad_norm": 0.5164923071861267, "learning_rate": 3.117131529526327e-06, "loss": 0.6848, "step": 77300 }, { "epoch": 2.690060513319886, "grad_norm": 0.31768378615379333, "learning_rate": 3.0997426445016348e-06, "loss": 0.6281, "step": 77350 }, { "epoch": 2.691799401822355, "grad_norm": 0.3062048554420471, "learning_rate": 3.0823537594769422e-06, "loss": 0.7537, "step": 77400 }, { "epoch": 2.6935382903248244, "grad_norm": 15.22857666015625, "learning_rate": 3.06496487445225e-06, "loss": 0.8275, "step": 77450 }, { "epoch": 2.6952771788272933, "grad_norm": 0.3275367319583893, "learning_rate": 3.047575989427558e-06, "loss": 0.7225, "step": 77500 }, { "epoch": 2.6970160673297627, "grad_norm": 0.35854870080947876, "learning_rate": 3.0301871044028654e-06, "loss": 0.907, "step": 77550 }, { "epoch": 2.698754955832232, "grad_norm": 0.3688882291316986, "learning_rate": 3.0127982193781733e-06, "loss": 0.7482, "step": 77600 }, { "epoch": 2.7004938443347015, "grad_norm": 0.3668949007987976, "learning_rate": 2.9954093343534816e-06, "loss": 0.6011, "step": 77650 }, { "epoch": 2.7022327328371705, "grad_norm": 0.34101101756095886, "learning_rate": 2.978020449328789e-06, "loss": 0.7216, "step": 77700 }, { "epoch": 2.70397162133964, "grad_norm": 0.3247714340686798, "learning_rate": 2.960631564304097e-06, "loss": 0.4996, "step": 77750 }, { "epoch": 2.705710509842109, "grad_norm": 0.35116156935691833, "learning_rate": 2.943242679279405e-06, "loss": 0.6353, "step": 77800 }, { "epoch": 2.707449398344578, "grad_norm": 0.3041549623012543, "learning_rate": 2.9258537942547122e-06, "loss": 0.76, "step": 77850 }, { "epoch": 2.7091882868470476, "grad_norm": 0.6779861450195312, "learning_rate": 2.90846490923002e-06, "loss": 0.7011, "step": 77900 }, { "epoch": 2.7109271753495165, "grad_norm": 0.32573091983795166, "learning_rate": 2.891076024205328e-06, "loss": 0.6317, "step": 77950 }, { "epoch": 2.712666063851986, "grad_norm": 0.10453498363494873, "learning_rate": 2.8736871391806354e-06, "loss": 0.6021, "step": 78000 }, { "epoch": 2.714404952354455, "grad_norm": 0.34832414984703064, "learning_rate": 2.8562982541559437e-06, "loss": 0.9633, "step": 78050 }, { "epoch": 2.7161438408569243, "grad_norm": 0.42301079630851746, "learning_rate": 2.8389093691312516e-06, "loss": 0.6546, "step": 78100 }, { "epoch": 2.7178827293593937, "grad_norm": 0.2958068251609802, "learning_rate": 2.821520484106559e-06, "loss": 0.4695, "step": 78150 }, { "epoch": 2.7196216178618626, "grad_norm": 0.3142865002155304, "learning_rate": 2.804131599081867e-06, "loss": 0.6452, "step": 78200 }, { "epoch": 2.721360506364332, "grad_norm": 15.081034660339355, "learning_rate": 2.786742714057175e-06, "loss": 0.6671, "step": 78250 }, { "epoch": 2.723099394866801, "grad_norm": 0.28963834047317505, "learning_rate": 2.7693538290324823e-06, "loss": 0.64, "step": 78300 }, { "epoch": 2.7248382833692704, "grad_norm": 0.29057201743125916, "learning_rate": 2.75196494400779e-06, "loss": 0.5089, "step": 78350 }, { "epoch": 2.7265771718717398, "grad_norm": 14.182466506958008, "learning_rate": 2.734576058983098e-06, "loss": 0.6382, "step": 78400 }, { "epoch": 2.7283160603742087, "grad_norm": 0.28946182131767273, "learning_rate": 2.717187173958406e-06, "loss": 0.7946, "step": 78450 }, { "epoch": 2.730054948876678, "grad_norm": 0.08726541697978973, "learning_rate": 2.6997982889337138e-06, "loss": 0.6496, "step": 78500 }, { "epoch": 2.731793837379147, "grad_norm": 0.41820836067199707, "learning_rate": 2.6824094039090216e-06, "loss": 0.4938, "step": 78550 }, { "epoch": 2.7335327258816164, "grad_norm": 0.32797956466674805, "learning_rate": 2.665020518884329e-06, "loss": 0.5059, "step": 78600 }, { "epoch": 2.735271614384086, "grad_norm": 0.28389525413513184, "learning_rate": 2.647631633859637e-06, "loss": 0.5356, "step": 78650 }, { "epoch": 2.737010502886555, "grad_norm": 0.29060474038124084, "learning_rate": 2.630242748834945e-06, "loss": 0.6794, "step": 78700 }, { "epoch": 2.738749391389024, "grad_norm": 0.2735118567943573, "learning_rate": 2.6128538638102523e-06, "loss": 0.4377, "step": 78750 }, { "epoch": 2.740488279891493, "grad_norm": 0.2525642216205597, "learning_rate": 2.59546497878556e-06, "loss": 0.4599, "step": 78800 }, { "epoch": 2.7422271683939625, "grad_norm": 0.2641368508338928, "learning_rate": 2.5780760937608684e-06, "loss": 0.5273, "step": 78850 }, { "epoch": 2.743966056896432, "grad_norm": 0.2526324987411499, "learning_rate": 2.560687208736176e-06, "loss": 0.2359, "step": 78900 }, { "epoch": 2.745704945398901, "grad_norm": 0.24125351011753082, "learning_rate": 2.5432983237114838e-06, "loss": 0.4757, "step": 78950 }, { "epoch": 2.7474438339013703, "grad_norm": 0.07665158808231354, "learning_rate": 2.5259094386867916e-06, "loss": 0.6589, "step": 79000 }, { "epoch": 2.749182722403839, "grad_norm": 0.2379508763551712, "learning_rate": 2.508520553662099e-06, "loss": 1.0183, "step": 79050 }, { "epoch": 2.7509216109063086, "grad_norm": 15.810200691223145, "learning_rate": 2.491131668637407e-06, "loss": 0.7826, "step": 79100 }, { "epoch": 2.752660499408778, "grad_norm": 0.2631561756134033, "learning_rate": 2.473742783612715e-06, "loss": 0.574, "step": 79150 }, { "epoch": 2.7543993879112474, "grad_norm": 0.41178780794143677, "learning_rate": 2.4563538985880223e-06, "loss": 0.9823, "step": 79200 }, { "epoch": 2.7561382764137163, "grad_norm": 15.30538272857666, "learning_rate": 2.4389650135633306e-06, "loss": 0.8008, "step": 79250 }, { "epoch": 2.7578771649161857, "grad_norm": 0.3031037449836731, "learning_rate": 2.4215761285386385e-06, "loss": 0.6899, "step": 79300 }, { "epoch": 2.7596160534186547, "grad_norm": 0.3008303940296173, "learning_rate": 2.404187243513946e-06, "loss": 0.6016, "step": 79350 }, { "epoch": 2.761354941921124, "grad_norm": 8.836114883422852, "learning_rate": 2.3867983584892538e-06, "loss": 0.6778, "step": 79400 }, { "epoch": 2.7630938304235935, "grad_norm": 0.9777456521987915, "learning_rate": 2.3694094734645617e-06, "loss": 0.5768, "step": 79450 }, { "epoch": 2.7648327189260624, "grad_norm": 0.08954401314258575, "learning_rate": 2.352020588439869e-06, "loss": 0.6218, "step": 79500 }, { "epoch": 2.766571607428532, "grad_norm": 0.08298700302839279, "learning_rate": 2.334631703415177e-06, "loss": 0.7415, "step": 79550 }, { "epoch": 2.7683104959310008, "grad_norm": 0.2958933115005493, "learning_rate": 2.317242818390485e-06, "loss": 0.5379, "step": 79600 }, { "epoch": 2.77004938443347, "grad_norm": 0.30135008692741394, "learning_rate": 2.2998539333657927e-06, "loss": 0.8193, "step": 79650 }, { "epoch": 2.7717882729359395, "grad_norm": 0.10203606635332108, "learning_rate": 2.2824650483411006e-06, "loss": 0.6667, "step": 79700 }, { "epoch": 2.7735271614384085, "grad_norm": 0.33943524956703186, "learning_rate": 2.2650761633164085e-06, "loss": 0.4074, "step": 79750 }, { "epoch": 2.775266049940878, "grad_norm": 0.2825494110584259, "learning_rate": 2.247687278291716e-06, "loss": 0.5593, "step": 79800 }, { "epoch": 2.777004938443347, "grad_norm": 0.2705647647380829, "learning_rate": 2.230298393267024e-06, "loss": 0.793, "step": 79850 }, { "epoch": 2.7787438269458162, "grad_norm": 0.3898164927959442, "learning_rate": 2.2129095082423313e-06, "loss": 0.9251, "step": 79900 }, { "epoch": 2.7804827154482856, "grad_norm": 0.27698221802711487, "learning_rate": 2.195520623217639e-06, "loss": 0.5464, "step": 79950 }, { "epoch": 2.7822216039507546, "grad_norm": 15.395230293273926, "learning_rate": 2.178131738192947e-06, "loss": 0.5357, "step": 80000 }, { "epoch": 2.783960492453224, "grad_norm": 0.27641183137893677, "learning_rate": 2.160742853168255e-06, "loss": 0.5388, "step": 80050 }, { "epoch": 2.785699380955693, "grad_norm": 0.2669236660003662, "learning_rate": 2.1433539681435627e-06, "loss": 0.7756, "step": 80100 }, { "epoch": 2.7874382694581623, "grad_norm": 61.669036865234375, "learning_rate": 2.1259650831188706e-06, "loss": 0.6443, "step": 80150 }, { "epoch": 2.7891771579606317, "grad_norm": 0.2955467104911804, "learning_rate": 2.108576198094178e-06, "loss": 0.77, "step": 80200 }, { "epoch": 2.7909160464631007, "grad_norm": 0.827914834022522, "learning_rate": 2.091187313069486e-06, "loss": 0.6718, "step": 80250 }, { "epoch": 2.79265493496557, "grad_norm": 15.171199798583984, "learning_rate": 2.073798428044794e-06, "loss": 0.4577, "step": 80300 }, { "epoch": 2.794393823468039, "grad_norm": 0.27211466431617737, "learning_rate": 2.0564095430201013e-06, "loss": 0.8098, "step": 80350 }, { "epoch": 2.7961327119705084, "grad_norm": 0.29508858919143677, "learning_rate": 2.039020657995409e-06, "loss": 0.6116, "step": 80400 }, { "epoch": 2.797871600472978, "grad_norm": 0.27506178617477417, "learning_rate": 2.0216317729707174e-06, "loss": 0.764, "step": 80450 }, { "epoch": 2.7996104889754467, "grad_norm": 0.29620999097824097, "learning_rate": 2.004242887946025e-06, "loss": 0.6793, "step": 80500 }, { "epoch": 2.801349377477916, "grad_norm": 0.280780166387558, "learning_rate": 1.9868540029213328e-06, "loss": 0.5017, "step": 80550 }, { "epoch": 2.803088265980385, "grad_norm": 0.28513792157173157, "learning_rate": 1.9694651178966406e-06, "loss": 0.7572, "step": 80600 }, { "epoch": 2.8048271544828545, "grad_norm": 15.295577049255371, "learning_rate": 1.952076232871948e-06, "loss": 0.6129, "step": 80650 }, { "epoch": 2.806566042985324, "grad_norm": 0.22789685428142548, "learning_rate": 1.934687347847256e-06, "loss": 0.5836, "step": 80700 }, { "epoch": 2.8083049314877933, "grad_norm": 28.63546371459961, "learning_rate": 1.917298462822564e-06, "loss": 0.7289, "step": 80750 }, { "epoch": 2.810043819990262, "grad_norm": 8.925670623779297, "learning_rate": 1.8999095777978715e-06, "loss": 0.6952, "step": 80800 }, { "epoch": 2.8117827084927316, "grad_norm": 0.2745881974697113, "learning_rate": 1.8825206927731796e-06, "loss": 0.721, "step": 80850 }, { "epoch": 2.8135215969952005, "grad_norm": 0.08955248445272446, "learning_rate": 1.865131807748487e-06, "loss": 0.5576, "step": 80900 }, { "epoch": 2.81526048549767, "grad_norm": 0.3001914322376251, "learning_rate": 1.8477429227237951e-06, "loss": 0.5841, "step": 80950 }, { "epoch": 2.8169993740001393, "grad_norm": 28.611026763916016, "learning_rate": 1.8303540376991028e-06, "loss": 0.6905, "step": 81000 }, { "epoch": 2.8187382625026083, "grad_norm": 14.74248218536377, "learning_rate": 1.8129651526744104e-06, "loss": 0.6871, "step": 81050 }, { "epoch": 2.8204771510050777, "grad_norm": 0.08451410382986069, "learning_rate": 1.7955762676497183e-06, "loss": 0.8262, "step": 81100 }, { "epoch": 2.8222160395075466, "grad_norm": 0.2813693881034851, "learning_rate": 1.7781873826250262e-06, "loss": 0.5188, "step": 81150 }, { "epoch": 2.823954928010016, "grad_norm": 14.974181175231934, "learning_rate": 1.7607984976003339e-06, "loss": 0.643, "step": 81200 }, { "epoch": 2.8256938165124854, "grad_norm": 0.2953049838542938, "learning_rate": 1.7434096125756417e-06, "loss": 0.5521, "step": 81250 }, { "epoch": 2.8274327050149544, "grad_norm": 15.224420547485352, "learning_rate": 1.7260207275509494e-06, "loss": 0.7818, "step": 81300 }, { "epoch": 2.8291715935174238, "grad_norm": 0.27730950713157654, "learning_rate": 1.7086318425262573e-06, "loss": 0.6226, "step": 81350 }, { "epoch": 2.8309104820198927, "grad_norm": 15.675491333007812, "learning_rate": 1.6912429575015651e-06, "loss": 0.8122, "step": 81400 }, { "epoch": 2.832649370522362, "grad_norm": 15.347591400146484, "learning_rate": 1.6738540724768728e-06, "loss": 0.8009, "step": 81450 }, { "epoch": 2.8343882590248315, "grad_norm": 0.3060605823993683, "learning_rate": 1.6564651874521805e-06, "loss": 0.7996, "step": 81500 }, { "epoch": 2.8361271475273004, "grad_norm": 0.2839072346687317, "learning_rate": 1.6390763024274885e-06, "loss": 0.5875, "step": 81550 }, { "epoch": 2.83786603602977, "grad_norm": 0.3060269057750702, "learning_rate": 1.6216874174027962e-06, "loss": 0.8399, "step": 81600 }, { "epoch": 2.839604924532239, "grad_norm": 0.09366797655820847, "learning_rate": 1.6042985323781039e-06, "loss": 0.6703, "step": 81650 }, { "epoch": 2.841343813034708, "grad_norm": 0.3006170094013214, "learning_rate": 1.5869096473534117e-06, "loss": 0.7485, "step": 81700 }, { "epoch": 2.8430827015371776, "grad_norm": 0.3037119209766388, "learning_rate": 1.5695207623287196e-06, "loss": 0.7198, "step": 81750 }, { "epoch": 2.8448215900396465, "grad_norm": 27.92637825012207, "learning_rate": 1.5521318773040273e-06, "loss": 0.7627, "step": 81800 }, { "epoch": 2.846560478542116, "grad_norm": 0.293649286031723, "learning_rate": 1.534742992279335e-06, "loss": 0.6027, "step": 81850 }, { "epoch": 2.848299367044585, "grad_norm": 0.3007853925228119, "learning_rate": 1.5173541072546428e-06, "loss": 0.4784, "step": 81900 }, { "epoch": 2.8500382555470543, "grad_norm": 0.29856160283088684, "learning_rate": 1.4999652222299507e-06, "loss": 0.7395, "step": 81950 }, { "epoch": 2.8517771440495236, "grad_norm": 17.031660079956055, "learning_rate": 1.4825763372052583e-06, "loss": 0.7464, "step": 82000 }, { "epoch": 2.853516032551993, "grad_norm": 0.3227154016494751, "learning_rate": 1.4651874521805662e-06, "loss": 0.4588, "step": 82050 }, { "epoch": 2.855254921054462, "grad_norm": 0.3364619016647339, "learning_rate": 1.4477985671558739e-06, "loss": 0.6607, "step": 82100 }, { "epoch": 2.856993809556931, "grad_norm": 16.756914138793945, "learning_rate": 1.4304096821311818e-06, "loss": 0.613, "step": 82150 }, { "epoch": 2.8587326980594003, "grad_norm": 0.3933362066745758, "learning_rate": 1.4130207971064896e-06, "loss": 0.6602, "step": 82200 }, { "epoch": 2.8604715865618697, "grad_norm": 0.28859493136405945, "learning_rate": 1.3956319120817973e-06, "loss": 0.8083, "step": 82250 }, { "epoch": 2.862210475064339, "grad_norm": 14.997337341308594, "learning_rate": 1.378243027057105e-06, "loss": 0.8804, "step": 82300 }, { "epoch": 2.863949363566808, "grad_norm": 0.29211071133613586, "learning_rate": 1.360854142032413e-06, "loss": 0.5269, "step": 82350 }, { "epoch": 2.8656882520692775, "grad_norm": 0.5354862213134766, "learning_rate": 1.3434652570077207e-06, "loss": 0.3705, "step": 82400 }, { "epoch": 2.8674271405717464, "grad_norm": 9.170504570007324, "learning_rate": 1.3260763719830284e-06, "loss": 0.3391, "step": 82450 }, { "epoch": 2.869166029074216, "grad_norm": 0.09345777332782745, "learning_rate": 1.3086874869583362e-06, "loss": 0.6235, "step": 82500 }, { "epoch": 2.870904917576685, "grad_norm": 0.27642184495925903, "learning_rate": 1.2912986019336441e-06, "loss": 0.6699, "step": 82550 }, { "epoch": 2.872643806079154, "grad_norm": 0.28986379504203796, "learning_rate": 1.2739097169089518e-06, "loss": 0.5606, "step": 82600 }, { "epoch": 2.8743826945816235, "grad_norm": 0.30565524101257324, "learning_rate": 1.2565208318842596e-06, "loss": 0.422, "step": 82650 }, { "epoch": 2.8761215830840925, "grad_norm": 0.08958898484706879, "learning_rate": 1.2391319468595673e-06, "loss": 0.765, "step": 82700 }, { "epoch": 2.877860471586562, "grad_norm": 0.2698003649711609, "learning_rate": 1.2217430618348752e-06, "loss": 0.5036, "step": 82750 }, { "epoch": 2.8795993600890313, "grad_norm": 0.31371304392814636, "learning_rate": 1.204354176810183e-06, "loss": 0.5316, "step": 82800 }, { "epoch": 2.8813382485915002, "grad_norm": 0.26707175374031067, "learning_rate": 1.1869652917854907e-06, "loss": 0.6486, "step": 82850 }, { "epoch": 2.8830771370939696, "grad_norm": 15.359684944152832, "learning_rate": 1.1695764067607984e-06, "loss": 0.6176, "step": 82900 }, { "epoch": 2.8848160255964386, "grad_norm": 0.26980575919151306, "learning_rate": 1.1521875217361065e-06, "loss": 0.4481, "step": 82950 }, { "epoch": 2.886554914098908, "grad_norm": 0.26169586181640625, "learning_rate": 1.1347986367114141e-06, "loss": 0.9884, "step": 83000 }, { "epoch": 2.8882938026013774, "grad_norm": 0.29289427399635315, "learning_rate": 1.1174097516867218e-06, "loss": 0.837, "step": 83050 }, { "epoch": 2.8900326911038463, "grad_norm": 0.28367534279823303, "learning_rate": 1.1000208666620295e-06, "loss": 0.5978, "step": 83100 }, { "epoch": 2.8917715796063157, "grad_norm": 0.05387123301625252, "learning_rate": 1.0826319816373375e-06, "loss": 0.5586, "step": 83150 }, { "epoch": 2.8935104681087847, "grad_norm": 0.3222648799419403, "learning_rate": 1.0652430966126452e-06, "loss": 0.846, "step": 83200 }, { "epoch": 2.895249356611254, "grad_norm": 0.3018621504306793, "learning_rate": 1.0478542115879529e-06, "loss": 0.8141, "step": 83250 }, { "epoch": 2.8969882451137234, "grad_norm": 0.276151567697525, "learning_rate": 1.0304653265632607e-06, "loss": 0.7742, "step": 83300 }, { "epoch": 2.8987271336161924, "grad_norm": 0.29011988639831543, "learning_rate": 1.0130764415385686e-06, "loss": 0.7414, "step": 83350 }, { "epoch": 2.900466022118662, "grad_norm": 0.30144640803337097, "learning_rate": 9.956875565138763e-07, "loss": 0.8263, "step": 83400 }, { "epoch": 2.9022049106211307, "grad_norm": 0.30378133058547974, "learning_rate": 9.782986714891841e-07, "loss": 0.6267, "step": 83450 }, { "epoch": 2.9039437991236, "grad_norm": 0.2974216341972351, "learning_rate": 9.609097864644918e-07, "loss": 0.4684, "step": 83500 }, { "epoch": 2.9056826876260695, "grad_norm": 0.5001803636550903, "learning_rate": 9.435209014397998e-07, "loss": 0.56, "step": 83550 }, { "epoch": 2.907421576128539, "grad_norm": 25.032337188720703, "learning_rate": 9.261320164151075e-07, "loss": 0.8272, "step": 83600 }, { "epoch": 2.909160464631008, "grad_norm": 0.15811966359615326, "learning_rate": 9.087431313904152e-07, "loss": 1.0817, "step": 83650 }, { "epoch": 2.910899353133477, "grad_norm": 0.3133315443992615, "learning_rate": 8.913542463657231e-07, "loss": 0.4073, "step": 83700 }, { "epoch": 2.912638241635946, "grad_norm": 16.796998977661133, "learning_rate": 8.739653613410308e-07, "loss": 0.7175, "step": 83750 }, { "epoch": 2.9143771301384156, "grad_norm": 0.35782063007354736, "learning_rate": 8.565764763163386e-07, "loss": 0.5313, "step": 83800 }, { "epoch": 2.916116018640885, "grad_norm": 15.169659614562988, "learning_rate": 8.391875912916464e-07, "loss": 0.5481, "step": 83850 }, { "epoch": 2.917854907143354, "grad_norm": 0.05046667903661728, "learning_rate": 8.217987062669542e-07, "loss": 0.7807, "step": 83900 }, { "epoch": 2.9195937956458233, "grad_norm": 0.2853194773197174, "learning_rate": 8.044098212422619e-07, "loss": 0.5301, "step": 83950 }, { "epoch": 2.9213326841482923, "grad_norm": 0.05534028261899948, "learning_rate": 7.870209362175697e-07, "loss": 0.7594, "step": 84000 }, { "epoch": 2.9230715726507617, "grad_norm": 0.09064784646034241, "learning_rate": 7.696320511928775e-07, "loss": 0.606, "step": 84050 }, { "epoch": 2.924810461153231, "grad_norm": 29.788469314575195, "learning_rate": 7.522431661681853e-07, "loss": 0.384, "step": 84100 }, { "epoch": 2.9265493496557, "grad_norm": 0.2745174169540405, "learning_rate": 7.34854281143493e-07, "loss": 0.462, "step": 84150 }, { "epoch": 2.9282882381581694, "grad_norm": 0.3964908719062805, "learning_rate": 7.174653961188009e-07, "loss": 0.7395, "step": 84200 }, { "epoch": 2.9300271266606384, "grad_norm": 0.28486576676368713, "learning_rate": 7.000765110941086e-07, "loss": 0.7751, "step": 84250 }, { "epoch": 2.9317660151631078, "grad_norm": 14.388537406921387, "learning_rate": 6.826876260694164e-07, "loss": 0.8883, "step": 84300 }, { "epoch": 2.933504903665577, "grad_norm": 0.2984370291233063, "learning_rate": 6.652987410447242e-07, "loss": 0.4117, "step": 84350 }, { "epoch": 2.935243792168046, "grad_norm": 14.60980224609375, "learning_rate": 6.479098560200321e-07, "loss": 0.868, "step": 84400 }, { "epoch": 2.9369826806705155, "grad_norm": 16.63714599609375, "learning_rate": 6.305209709953397e-07, "loss": 0.4364, "step": 84450 }, { "epoch": 2.9387215691729844, "grad_norm": 0.2931533455848694, "learning_rate": 6.131320859706476e-07, "loss": 0.2988, "step": 84500 }, { "epoch": 2.940460457675454, "grad_norm": 0.30575355887413025, "learning_rate": 5.957432009459554e-07, "loss": 0.6474, "step": 84550 }, { "epoch": 2.9421993461779232, "grad_norm": 0.2841874361038208, "learning_rate": 5.783543159212631e-07, "loss": 0.741, "step": 84600 }, { "epoch": 2.943938234680392, "grad_norm": 9.059798240661621, "learning_rate": 5.609654308965709e-07, "loss": 0.5336, "step": 84650 }, { "epoch": 2.9456771231828616, "grad_norm": 15.143796920776367, "learning_rate": 5.435765458718787e-07, "loss": 0.9629, "step": 84700 }, { "epoch": 2.9474160116853305, "grad_norm": 0.28480568528175354, "learning_rate": 5.261876608471864e-07, "loss": 0.9279, "step": 84750 }, { "epoch": 2.9491549001878, "grad_norm": 0.29083627462387085, "learning_rate": 5.087987758224943e-07, "loss": 0.4638, "step": 84800 }, { "epoch": 2.9508937886902693, "grad_norm": 0.3540242314338684, "learning_rate": 4.91409890797802e-07, "loss": 0.6633, "step": 84850 }, { "epoch": 2.9526326771927383, "grad_norm": 14.859944343566895, "learning_rate": 4.7402100577310984e-07, "loss": 0.687, "step": 84900 }, { "epoch": 2.9543715656952076, "grad_norm": 0.2987927794456482, "learning_rate": 4.566321207484176e-07, "loss": 0.6983, "step": 84950 }, { "epoch": 2.9561104541976766, "grad_norm": 0.29898738861083984, "learning_rate": 4.3924323572372543e-07, "loss": 0.6221, "step": 85000 }, { "epoch": 2.957849342700146, "grad_norm": 0.09583161771297455, "learning_rate": 4.218543506990332e-07, "loss": 0.4931, "step": 85050 }, { "epoch": 2.9595882312026154, "grad_norm": 0.2841198146343231, "learning_rate": 4.0446546567434096e-07, "loss": 0.5915, "step": 85100 }, { "epoch": 2.9613271197050848, "grad_norm": 14.370145797729492, "learning_rate": 3.8707658064964873e-07, "loss": 0.7148, "step": 85150 }, { "epoch": 2.9630660082075537, "grad_norm": 15.013972282409668, "learning_rate": 3.6968769562495655e-07, "loss": 0.5081, "step": 85200 }, { "epoch": 2.964804896710023, "grad_norm": 0.08561894297599792, "learning_rate": 3.522988106002643e-07, "loss": 0.6722, "step": 85250 }, { "epoch": 2.966543785212492, "grad_norm": 0.2991926074028015, "learning_rate": 3.349099255755721e-07, "loss": 0.4933, "step": 85300 }, { "epoch": 2.9682826737149615, "grad_norm": 0.2901758849620819, "learning_rate": 3.175210405508799e-07, "loss": 0.7137, "step": 85350 }, { "epoch": 2.970021562217431, "grad_norm": 0.34760478138923645, "learning_rate": 3.001321555261877e-07, "loss": 0.6429, "step": 85400 }, { "epoch": 2.9717604507199, "grad_norm": 15.532635688781738, "learning_rate": 2.8274327050149544e-07, "loss": 0.7747, "step": 85450 }, { "epoch": 2.973499339222369, "grad_norm": 0.3590717315673828, "learning_rate": 2.653543854768032e-07, "loss": 0.4597, "step": 85500 }, { "epoch": 2.975238227724838, "grad_norm": 8.7227783203125, "learning_rate": 2.4796550045211103e-07, "loss": 0.6802, "step": 85550 }, { "epoch": 2.9769771162273075, "grad_norm": 0.29600927233695984, "learning_rate": 2.305766154274188e-07, "loss": 0.4686, "step": 85600 }, { "epoch": 2.978716004729777, "grad_norm": 0.4198228120803833, "learning_rate": 2.1318773040272657e-07, "loss": 0.7993, "step": 85650 }, { "epoch": 2.980454893232246, "grad_norm": 0.38335585594177246, "learning_rate": 1.9579884537803436e-07, "loss": 0.6567, "step": 85700 }, { "epoch": 2.9821937817347153, "grad_norm": 14.858195304870605, "learning_rate": 1.7840996035334216e-07, "loss": 0.5013, "step": 85750 }, { "epoch": 2.9839326702371842, "grad_norm": 0.35712581872940063, "learning_rate": 1.6102107532864993e-07, "loss": 0.5827, "step": 85800 }, { "epoch": 2.9856715587396536, "grad_norm": 0.05570732802152634, "learning_rate": 1.4363219030395772e-07, "loss": 0.7598, "step": 85850 }, { "epoch": 2.987410447242123, "grad_norm": 0.2906959652900696, "learning_rate": 1.262433052792655e-07, "loss": 0.4244, "step": 85900 }, { "epoch": 2.989149335744592, "grad_norm": 0.09244731813669205, "learning_rate": 1.0885442025457328e-07, "loss": 0.726, "step": 85950 }, { "epoch": 2.9908882242470614, "grad_norm": 11.994555473327637, "learning_rate": 9.146553522988106e-08, "loss": 0.6727, "step": 86000 }, { "epoch": 2.9926271127495303, "grad_norm": 0.307987242937088, "learning_rate": 7.407665020518884e-08, "loss": 0.657, "step": 86050 }, { "epoch": 2.9943660012519997, "grad_norm": 18.233705520629883, "learning_rate": 5.6687765180496625e-08, "loss": 0.9067, "step": 86100 }, { "epoch": 2.996104889754469, "grad_norm": 0.2682857811450958, "learning_rate": 3.929888015580441e-08, "loss": 0.5154, "step": 86150 }, { "epoch": 2.997843778256938, "grad_norm": 0.31615200638771057, "learning_rate": 2.1909995131112194e-08, "loss": 0.6619, "step": 86200 }, { "epoch": 2.9995826667594074, "grad_norm": 0.29539021849632263, "learning_rate": 4.521110106419976e-09, "loss": 0.6408, "step": 86250 }, { "epoch": 3.0, "eval_accuracy": 0.9828452683198275, "eval_confusion_matrix": [ [ 111587, 18 ], [ 1955, 1452 ] ], "eval_f1": 0.5954480213245847, "eval_loss": 0.9132923483848572, "eval_precision": 0.9877551020408163, "eval_recall": 0.426181391253302, "eval_roc_auc": 0.754024472315363, "eval_runtime": 545.229, "eval_samples_per_second": 210.943, "eval_steps_per_second": 6.594, "step": 86262 } ], "logging_steps": 50, "max_steps": 86262, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.141224142644122e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }